summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile2
-rw-r--r--block.c316
-rw-r--r--block/cow.c1
-rw-r--r--block/mirror.c71
-rw-r--r--block/nfs.c22
-rw-r--r--block/qapi.c6
-rw-r--r--block/qcow.c1
-rw-r--r--block/qcow2.c1
-rw-r--r--block/qed.c1
-rw-r--r--block/quorum.c97
-rw-r--r--block/vmdk.c1
-rw-r--r--blockdev.c47
-rw-r--r--blockjob.c22
-rwxr-xr-xconfigure12
-rw-r--r--docs/qapi-code-gen.txt8
-rw-r--r--docs/qmp/qmp-events.txt559
-rw-r--r--hmp.c1
-rw-r--r--hw/block/dataplane/virtio-blk.c246
-rw-r--r--hw/block/virtio-blk.c139
-rw-r--r--hw/char/virtio-console.c12
-rw-r--r--hw/core/qdev-properties-system.c3
-rw-r--r--hw/intc/xics.c182
-rw-r--r--hw/intc/xics_kvm.c34
-rw-r--r--hw/misc/vfio.c78
-rw-r--r--hw/net/eepro100.c4
-rw-r--r--hw/net/virtio-net.c2
-rw-r--r--hw/nvram/spapr_nvram.c4
-rw-r--r--hw/pci-host/uninorth.c2
-rw-r--r--hw/ppc/Makefile.objs3
-rw-r--r--hw/ppc/e500.c13
-rw-r--r--hw/ppc/mac_newworld.c18
-rw-r--r--hw/ppc/spapr.c104
-rw-r--r--hw/ppc/spapr_events.c5
-rw-r--r--hw/ppc/spapr_iommu.c7
-rw-r--r--hw/ppc/spapr_pci.c217
-rw-r--r--hw/ppc/spapr_pci_vfio.c102
-rw-r--r--hw/ppc/spapr_rtas.c97
-rw-r--r--hw/ppc/spapr_vio.c9
-rw-r--r--hw/watchdog/watchdog.c2
-rw-r--r--include/block/block.h6
-rw-r--r--include/block/block_int.h6
-rw-r--r--include/block/blockjob.h10
-rw-r--r--include/block/qapi.h1
-rw-r--r--include/hw/misc/vfio.h9
-rw-r--r--include/hw/pci-host/spapr.h32
-rw-r--r--include/hw/ppc/spapr.h82
-rw-r--r--include/hw/ppc/xics.h9
-rw-r--r--include/hw/virtio/virtio-blk.h23
-rw-r--r--include/migration/vmstate.h11
-rw-r--r--include/net/net.h2
-rw-r--r--linux-user/elfload.c44
-rw-r--r--monitor.c3
-rw-r--r--net/Makefile.objs1
-rw-r--r--net/clients.h2
-rw-r--r--net/l2tpv3.c757
-rw-r--r--net/net.c10
-rw-r--r--pc-bios/s390-ccw.imgbin9432 -> 17624 bytes
-rw-r--r--pc-bios/s390-ccw/bootmap.c445
-rw-r--r--pc-bios/s390-ccw/bootmap.h344
-rw-r--r--pc-bios/s390-ccw/main.c13
-rw-r--r--pc-bios/s390-ccw/s390-ccw.h38
-rw-r--r--pc-bios/s390-ccw/sclp-ascii.c4
-rw-r--r--pc-bios/s390-ccw/virtio.c122
-rw-r--r--pc-bios/s390-ccw/virtio.h50
-rw-r--r--qapi-schema.json80
-rw-r--r--qapi/block-core.json33
-rw-r--r--qapi/event.json (renamed from qapi-event.json)18
-rw-r--r--qemu-bridge-helper.c9
-rw-r--r--qemu-char.c1
-rw-r--r--qemu-options.hx82
-rw-r--r--qmp-commands.hx24
-rw-r--r--scripts/qapi-event.py5
-rw-r--r--scripts/qapi.py2
-rw-r--r--target-ppc/cpu-models.c3
-rw-r--r--target-ppc/cpu-models.h7
-rw-r--r--target-ppc/cpu-qom.h2
-rw-r--r--target-ppc/cpu.h2
-rw-r--r--target-ppc/gdbstub.c137
-rw-r--r--target-ppc/kvm.c7
-rw-r--r--target-ppc/kvm_ppc.h6
-rw-r--r--target-ppc/translate.c13
-rw-r--r--target-ppc/translate_init.c28
-rw-r--r--tcg/ppc/tcg-target.c6
-rw-r--r--tests/qapi-schema/event-nest-struct.err2
-rwxr-xr-xtests/qemu-iotests/0318
-rwxr-xr-xtests/qemu-iotests/0366
-rwxr-xr-xtests/qemu-iotests/03918
-rwxr-xr-xtests/qemu-iotests/04012
-rw-r--r--tests/qemu-iotests/040.out4
-rwxr-xr-xtests/qemu-iotests/041207
-rw-r--r--tests/qemu-iotests/041.out4
-rwxr-xr-xtests/qemu-iotests/0516
-rw-r--r--tests/qemu-iotests/051.out14
-rwxr-xr-xtests/qemu-iotests/0542
-rwxr-xr-xtests/qemu-iotests/06020
-rwxr-xr-xtests/qemu-iotests/06124
-rwxr-xr-xtests/qemu-iotests/0652
-rwxr-xr-xtests/qemu-iotests/08115
-rw-r--r--tests/qemu-iotests/081.out10
-rwxr-xr-xtests/qemu-iotests/08310
-rwxr-xr-xtests/qemu-iotests/09586
-rw-r--r--tests/qemu-iotests/095.out31
-rwxr-xr-xtests/qemu-iotests/check110
-rw-r--r--tests/qemu-iotests/common11
-rw-r--r--tests/qemu-iotests/common.config2
-rw-r--r--tests/qemu-iotests/common.rc8
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/qemu-iotests/iotests.py3
-rw-r--r--trace-events11
-rw-r--r--util/qemu-option.c11
-rw-r--r--vmstate.c13
112 files changed, 4494 insertions, 1098 deletions
diff --git a/.gitignore b/.gitignore
index c658613560..2286d0a109 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@
 /qapi-generated
 /qapi-types.[ch]
 /qapi-visit.[ch]
+/qapi-event.[ch]
 /qmp-commands.h
 /qmp-marshal.c
 /qemu-doc.html
diff --git a/Makefile b/Makefile
index 145adb68a2..1eea0c418f 100644
--- a/Makefile
+++ b/Makefile
@@ -248,7 +248,7 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 
 qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
                $(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
-               $(SRC_PATH)/qapi-event.json
+               $(SRC_PATH)/qapi/event.json
 
 qapi-types.c qapi-types.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
diff --git a/block.c b/block.c
index 217f523dd0..6856c18aa8 100644
--- a/block.c
+++ b/block.c
@@ -831,7 +831,7 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
      * Clear flags that are internal to the block layer before opening the
      * image.
      */
-    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
 
     /*
      * Snapshots should be writable.
@@ -928,6 +928,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
     bs->zero_beyond_eof = true;
     open_flags = bdrv_open_flags(bs, flags);
     bs->read_only = !(open_flags & BDRV_O_RDWR);
+    bs->growable = !!(flags & BDRV_O_PROTOCOL);
 
     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
         error_setg(errp,
@@ -1005,94 +1006,124 @@ free_and_fail:
     return ret;
 }
 
+static QDict *parse_json_filename(const char *filename, Error **errp)
+{
+    QObject *options_obj;
+    QDict *options;
+    int ret;
+
+    ret = strstart(filename, "json:", &filename);
+    assert(ret);
+
+    options_obj = qobject_from_json(filename);
+    if (!options_obj) {
+        error_setg(errp, "Could not parse the JSON options");
+        return NULL;
+    }
+
+    if (qobject_type(options_obj) != QTYPE_QDICT) {
+        qobject_decref(options_obj);
+        error_setg(errp, "Invalid JSON object given");
+        return NULL;
+    }
+
+    options = qobject_to_qdict(options_obj);
+    qdict_flatten(options);
+
+    return options;
+}
+
 /*
- * Opens a file using a protocol (file, host_device, nbd, ...)
- *
- * options is an indirect pointer to a QDict of options to pass to the block
- * drivers, or pointer to NULL for an empty set of options. If this function
- * takes ownership of the QDict reference, it will set *options to NULL;
- * otherwise, it will contain unused/unrecognized options after this function
- * returns. Then, the caller is responsible for freeing it. If it intends to
- * reuse the QDict, QINCREF() should be called beforehand.
+ * Fills in default options for opening images and converts the legacy
+ * filename/flags pair to option QDict entries.
  */
-static int bdrv_file_open(BlockDriverState *bs, const char *filename,
-                          QDict **options, int flags, Error **errp)
+static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
+                             BlockDriver *drv, Error **errp)
 {
-    BlockDriver *drv;
+    const char *filename = *pfilename;
     const char *drvname;
+    bool protocol = flags & BDRV_O_PROTOCOL;
     bool parse_filename = false;
     Error *local_err = NULL;
-    int ret;
+
+    /* Parse json: pseudo-protocol */
+    if (filename && g_str_has_prefix(filename, "json:")) {
+        QDict *json_options = parse_json_filename(filename, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -EINVAL;
+        }
+
+        /* Options given in the filename have lower priority than options
+         * specified directly */
+        qdict_join(*options, json_options, false);
+        QDECREF(json_options);
+        *pfilename = filename = NULL;
+    }
 
     /* Fetch the file name from the options QDict if necessary */
-    if (!filename) {
-        filename = qdict_get_try_str(*options, "filename");
-    } else if (filename && !qdict_haskey(*options, "filename")) {
-        qdict_put(*options, "filename", qstring_from_str(filename));
-        parse_filename = true;
-    } else {
-        error_setg(errp, "Can't specify 'file' and 'filename' options at the "
-                   "same time");
-        ret = -EINVAL;
-        goto fail;
+    if (protocol && filename) {
+        if (!qdict_haskey(*options, "filename")) {
+            qdict_put(*options, "filename", qstring_from_str(filename));
+            parse_filename = true;
+        } else {
+            error_setg(errp, "Can't specify 'file' and 'filename' options at "
+                             "the same time");
+            return -EINVAL;
+        }
     }
 
     /* Find the right block driver */
+    filename = qdict_get_try_str(*options, "filename");
     drvname = qdict_get_try_str(*options, "driver");
-    if (drvname) {
-        drv = bdrv_find_format(drvname);
-        if (!drv) {
-            error_setg(errp, "Unknown driver '%s'", drvname);
-        }
-        qdict_del(*options, "driver");
-    } else if (filename) {
-        drv = bdrv_find_protocol(filename, parse_filename);
-        if (!drv) {
-            error_setg(errp, "Unknown protocol");
+
+    if (drv) {
+        if (drvname) {
+            error_setg(errp, "Driver specified twice");
+            return -EINVAL;
         }
+        drvname = drv->format_name;
+        qdict_put(*options, "driver", qstring_from_str(drvname));
     } else {
-        error_setg(errp, "Must specify either driver or file");
-        drv = NULL;
-    }
+        if (!drvname && protocol) {
+            if (filename) {
+                drv = bdrv_find_protocol(filename, parse_filename);
+                if (!drv) {
+                    error_setg(errp, "Unknown protocol");
+                    return -EINVAL;
+                }
 
-    if (!drv) {
-        /* errp has been set already */
-        ret = -ENOENT;
-        goto fail;
+                drvname = drv->format_name;
+                qdict_put(*options, "driver", qstring_from_str(drvname));
+            } else {
+                error_setg(errp, "Must specify either driver or file");
+                return -EINVAL;
+            }
+        } else if (drvname) {
+            drv = bdrv_find_format(drvname);
+            if (!drv) {
+                error_setg(errp, "Unknown driver '%s'", drvname);
+                return -ENOENT;
+            }
+        }
     }
 
-    /* Parse the filename and open it */
-    if (drv->bdrv_parse_filename && parse_filename) {
+    assert(drv || !protocol);
+
+    /* Driver-specific filename parsing */
+    if (drv && drv->bdrv_parse_filename && parse_filename) {
         drv->bdrv_parse_filename(filename, *options, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
-            ret = -EINVAL;
-            goto fail;
+            return -EINVAL;
         }
 
         if (!drv->bdrv_needs_filename) {
             qdict_del(*options, "filename");
-        } else {
-            filename = qdict_get_str(*options, "filename");
         }
     }
 
-    if (!drv->bdrv_file_open) {
-        ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err);
-        *options = NULL;
-    } else {
-        ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err);
-    }
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-
-    bs->growable = 1;
     return 0;
-
-fail:
-    return ret;
 }
 
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
@@ -1162,6 +1193,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
     }
 
+    if (!bs->drv || !bs->drv->supports_backing) {
+        ret = -EINVAL;
+        error_setg(errp, "Driver doesn't support backing files");
+        QDECREF(options);
+        goto free_exit;
+    }
+
     backing_hd = bdrv_new("", errp);
 
     if (bs->backing_format[0] != '\0') {
@@ -1240,7 +1278,7 @@ done:
     return ret;
 }
 
-void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
+int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
 {
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
     char *tmp_filename = g_malloc0(PATH_MAX + 1);
@@ -1258,6 +1296,7 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
     /* Get the required size from the image */
     total_size = bdrv_getlength(bs);
     if (total_size < 0) {
+        ret = total_size;
         error_setg_errno(errp, -total_size, "Could not get image size");
         goto out;
     }
@@ -1304,33 +1343,7 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
 
 out:
     g_free(tmp_filename);
-}
-
-static QDict *parse_json_filename(const char *filename, Error **errp)
-{
-    QObject *options_obj;
-    QDict *options;
-    int ret;
-
-    ret = strstart(filename, "json:", &filename);
-    assert(ret);
-
-    options_obj = qobject_from_json(filename);
-    if (!options_obj) {
-        error_setg(errp, "Could not parse the JSON options");
-        return NULL;
-    }
-
-    if (qobject_type(options_obj) != QTYPE_QDICT) {
-        qobject_decref(options_obj);
-        error_setg(errp, "Invalid JSON object given");
-        return NULL;
-    }
-
-    options = qobject_to_qdict(options_obj);
-    qdict_flatten(options);
-
-    return options;
+    return ret;
 }
 
 /*
@@ -1396,77 +1409,62 @@ int bdrv_open(BlockDriverState **pbs, const char *filename,
         options = qdict_new();
     }
 
-    if (filename && g_str_has_prefix(filename, "json:")) {
-        QDict *json_options = parse_json_filename(filename, &local_err);
-        if (local_err) {
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        /* Options given in the filename have lower priority than options
-         * specified directly */
-        qdict_join(options, json_options, false);
-        QDECREF(json_options);
-        filename = NULL;
-    }
-
-    bs->options = options;
-    options = qdict_clone_shallow(options);
-
-    if (flags & BDRV_O_PROTOCOL) {
-        assert(!drv);
-        ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL,
-                             &local_err);
-        if (!ret) {
-            drv = bs->drv;
-            goto done;
-        } else if (bs->drv) {
-            goto close_and_fail;
-        } else {
-            goto fail;
-        }
-    }
-
-    /* Open image file without format layer */
-    if (flags & BDRV_O_RDWR) {
-        flags |= BDRV_O_ALLOW_RDWR;
-    }
-    if (flags & BDRV_O_SNAPSHOT) {
-        snapshot_flags = bdrv_temp_snapshot_flags(flags);
-        flags = bdrv_backing_flags(flags);
-    }
-
-    assert(file == NULL);
-    ret = bdrv_open_image(&file, filename, options, "file",
-                          bdrv_inherited_flags(flags),
-                          true, &local_err);
-    if (ret < 0) {
+    ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
+    if (local_err) {
         goto fail;
     }
 
     /* Find the right image format driver */
+    drv = NULL;
     drvname = qdict_get_try_str(options, "driver");
     if (drvname) {
         drv = bdrv_find_format(drvname);
         qdict_del(options, "driver");
         if (!drv) {
-            error_setg(errp, "Invalid driver: '%s'", drvname);
+            error_setg(errp, "Unknown driver: '%s'", drvname);
             ret = -EINVAL;
             goto fail;
         }
     }
 
-    if (!drv) {
-        if (file) {
-            ret = find_image_format(file, filename, &drv, &local_err);
-        } else {
-            error_setg(errp, "Must specify either driver or file");
-            ret = -EINVAL;
+    assert(drvname || !(flags & BDRV_O_PROTOCOL));
+    if (drv && !drv->bdrv_file_open) {
+        /* If the user explicitly wants a format driver here, we'll need to add
+         * another layer for the protocol in bs->file */
+        flags &= ~BDRV_O_PROTOCOL;
+    }
+
+    bs->options = options;
+    options = qdict_clone_shallow(options);
+
+    /* Open image file without format layer */
+    if ((flags & BDRV_O_PROTOCOL) == 0) {
+        if (flags & BDRV_O_RDWR) {
+            flags |= BDRV_O_ALLOW_RDWR;
+        }
+        if (flags & BDRV_O_SNAPSHOT) {
+            snapshot_flags = bdrv_temp_snapshot_flags(flags);
+            flags = bdrv_backing_flags(flags);
+        }
+
+        assert(file == NULL);
+        ret = bdrv_open_image(&file, filename, options, "file",
+                              bdrv_inherited_flags(flags),
+                              true, &local_err);
+        if (ret < 0) {
             goto fail;
         }
     }
 
-    if (!drv) {
+    /* Image format probing */
+    if (!drv && file) {
+        ret = find_image_format(file, filename, &drv, &local_err);
+        if (ret < 0) {
+            goto fail;
+        }
+    } else if (!drv) {
+        error_setg(errp, "Must specify either driver or file");
+        ret = -EINVAL;
         goto fail;
     }
 
@@ -1495,15 +1493,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename,
     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
      * temporary snapshot afterwards. */
     if (snapshot_flags) {
-        bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
         if (local_err) {
-            error_propagate(errp, local_err);
             goto close_and_fail;
         }
     }
 
-
-done:
     /* Check if any unknown options were used */
     if (options && (qdict_size(options) != 0)) {
         const QDictEntry *entry = qdict_first(options);
@@ -3489,9 +3484,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
         return -ENOTSUP;
     if (bs->read_only)
         return -EACCES;
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
-        return -EBUSY;
-    }
+
     ret = drv->bdrv_truncate(bs, offset);
     if (ret == 0) {
         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
@@ -5774,3 +5767,28 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 
     return false;
 }
+
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
+{
+    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
+    if (!to_replace_bs) {
+        error_setg(errp, "Node name '%s' not found", node_name);
+        return NULL;
+    }
+
+    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
+        return NULL;
+    }
+
+    /* We don't want arbitrary node of the BDS chain to be replaced only the top
+     * most non filter in order to prevent data corruption.
+     * Another benefit is that this tests exclude backing files which are
+     * blocked by the backing blockers.
+     */
+    if (!bdrv_is_first_non_filter(to_replace_bs)) {
+        error_setg(errp, "Only top most non filter can be replaced");
+        return NULL;
+    }
+
+    return to_replace_bs;
+}
diff --git a/block/cow.c b/block/cow.c
index a05a92cada..8f81ee6d56 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -414,6 +414,7 @@ static BlockDriver bdrv_cow = {
     .bdrv_close     = cow_close,
     .bdrv_create    = cow_create,
     .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+    .supports_backing       = true,
 
     .bdrv_read              = cow_co_read,
     .bdrv_write             = cow_co_write,
diff --git a/block/mirror.c b/block/mirror.c
index 301a04de8e..6c3ee7041c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
     RateLimit limit;
     BlockDriverState *target;
     BlockDriverState *base;
+    /* The name of the graph node to replace */
+    char *replaces;
+    /* The BDS to replace */
+    BlockDriverState *to_replace;
+    /* Used to block operations on the drive-mirror-replace target */
+    Error *replace_blocker;
     bool is_none_mode;
     BlockdevOnError on_source_error, on_target_error;
     bool synced;
@@ -324,9 +330,18 @@ static void coroutine_fn mirror_run(void *opaque)
     }
 
     s->common.len = bdrv_getlength(bs);
-    if (s->common.len <= 0) {
+    if (s->common.len < 0) {
         ret = s->common.len;
         goto immediate_exit;
+    } else if (s->common.len == 0) {
+        /* Report BLOCK_JOB_READY and wait for complete. */
+        block_job_event_ready(&s->common);
+        s->synced = true;
+        while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
+            block_job_yield(&s->common);
+        }
+        s->common.cancelled = false;
+        goto immediate_exit;
     }
 
     length = DIV_ROUND_UP(s->common.len, s->granularity);
@@ -491,10 +506,14 @@ immediate_exit:
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
-            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+        BlockDriverState *to_replace = s->common.bs;
+        if (s->to_replace) {
+            to_replace = s->to_replace;
         }
-        bdrv_swap(s->target, s->common.bs);
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+        }
+        bdrv_swap(s->target, to_replace);
         if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
             /* drop the bs loop chain formed by the swap: break the loop then
              * trigger the unref from the top one */
@@ -503,6 +522,12 @@ immediate_exit:
             bdrv_unref(p);
         }
     }
+    if (s->to_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        error_free(s->replace_blocker);
+        bdrv_unref(s->to_replace);
+    }
+    g_free(s->replaces);
     bdrv_unref(s->target);
     block_job_completed(&s->common, ret);
 }
@@ -541,6 +566,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
         return;
     }
 
+    /* check the target bs is not blocked and block all operations on it */
+    if (s->replaces) {
+        s->to_replace = check_to_replace_node(s->replaces, &local_err);
+        if (!s->to_replace) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        error_setg(&s->replace_blocker,
+                   "block device is in use by block-job-complete");
+        bdrv_op_block_all(s->to_replace, s->replace_blocker);
+        bdrv_ref(s->to_replace);
+    }
+
     s->should_complete = true;
     block_job_resume(job);
 }
@@ -563,14 +602,15 @@ static const BlockJobDriver commit_active_job_driver = {
 };
 
 static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
-                            int64_t speed, int64_t granularity,
-                            int64_t buf_size,
-                            BlockdevOnError on_source_error,
-                            BlockdevOnError on_target_error,
-                            BlockDriverCompletionFunc *cb,
-                            void *opaque, Error **errp,
-                            const BlockJobDriver *driver,
-                            bool is_none_mode, BlockDriverState *base)
+                             const char *replaces,
+                             int64_t speed, int64_t granularity,
+                             int64_t buf_size,
+                             BlockdevOnError on_source_error,
+                             BlockdevOnError on_target_error,
+                             BlockDriverCompletionFunc *cb,
+                             void *opaque, Error **errp,
+                             const BlockJobDriver *driver,
+                             bool is_none_mode, BlockDriverState *base)
 {
     MirrorBlockJob *s;
 
@@ -601,6 +641,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
     s->target = target;
@@ -622,6 +663,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 }
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  const char *replaces,
                   int64_t speed, int64_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
@@ -633,7 +675,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
-    mirror_start_job(bs, target, speed, granularity, buf_size,
+    mirror_start_job(bs, target, replaces,
+                     speed, granularity, buf_size,
                      on_source_error, on_target_error, cb, opaque, errp,
                      &mirror_job_driver, is_none_mode, base);
 }
@@ -681,7 +724,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
     }
 
     bdrv_ref(base);
-    mirror_start_job(bs, base, speed, 0, 0,
+    mirror_start_job(bs, base, NULL, speed, 0, 0,
                      on_error, on_error, cb, opaque, &local_err,
                      &commit_active_job_driver, false, base);
     if (local_err) {
diff --git a/block/nfs.c b/block/nfs.c
index ec43201817..8439e0d389 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -304,17 +304,27 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
 
     qp = query_params_parse(uri->query);
     for (i = 0; i < qp->n; i++) {
+        unsigned long long val;
         if (!qp->p[i].value) {
             error_setg(errp, "Value for NFS parameter expected: %s",
                        qp->p[i].name);
             goto fail;
         }
-        if (!strncmp(qp->p[i].name, "uid", 3)) {
-            nfs_set_uid(client->context, atoi(qp->p[i].value));
-        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
-            nfs_set_gid(client->context, atoi(qp->p[i].value));
-        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
-            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
+        if (parse_uint_full(qp->p[i].value, &val, 0)) {
+            error_setg(errp, "Illegal value for NFS parameter: %s",
+                       qp->p[i].name);
+            goto fail;
+        }
+        if (!strcmp(qp->p[i].name, "uid")) {
+            nfs_set_uid(client->context, val);
+        } else if (!strcmp(qp->p[i].name, "gid")) {
+            nfs_set_gid(client->context, val);
+        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
+            nfs_set_tcp_syncnt(client->context, val);
+#ifdef LIBNFS_FEATURE_READAHEAD
+        } else if (!strcmp(qp->p[i].name, "readahead")) {
+            nfs_set_readahead(client->context, val);
+#endif
         } else {
             error_setg(errp, "Unknown NFS parameter name: %s",
                        qp->p[i].name);
diff --git a/block/qapi.c b/block/qapi.c
index 97e16418ef..f44f6b4012 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -293,7 +293,7 @@ void bdrv_query_info(BlockDriverState *bs,
     qapi_free_BlockInfo(info);
 }
 
-BlockStats *bdrv_query_stats(const BlockDriverState *bs)
+static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
 {
     BlockStats *s;
 
@@ -360,7 +360,11 @@ BlockStatsList *qmp_query_blockstats(Error **errp)
 
      while ((bs = bdrv_next(bs))) {
         BlockStatsList *info = g_malloc0(sizeof(*info));
+        AioContext *ctx = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(ctx);
         info->value = bdrv_query_stats(bs);
+        aio_context_release(ctx);
 
         *p_next = info;
         p_next = &info->next;
diff --git a/block/qcow.c b/block/qcow.c
index 1f2bac8a5f..a874056cf3 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -941,6 +941,7 @@ static BlockDriver bdrv_qcow = {
     .bdrv_reopen_prepare    = qcow_reopen_prepare,
     .bdrv_create            = qcow_create,
     .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+    .supports_backing       = true,
 
     .bdrv_co_readv          = qcow_co_readv,
     .bdrv_co_writev         = qcow_co_writev,
diff --git a/block/qcow2.c b/block/qcow2.c
index b9d2fa6632..67e55c9667 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2418,6 +2418,7 @@ static BlockDriver bdrv_qcow2 = {
     .bdrv_save_vmstate    = qcow2_save_vmstate,
     .bdrv_load_vmstate    = qcow2_load_vmstate,
 
+    .supports_backing           = true,
     .bdrv_change_backing_file   = qcow2_change_backing_file,
 
     .bdrv_refresh_limits        = qcow2_refresh_limits,
diff --git a/block/qed.c b/block/qed.c
index 092e6fb1d2..eddae929eb 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1652,6 +1652,7 @@ static BlockDriver bdrv_qed = {
     .format_name              = "qed",
     .instance_size            = sizeof(BDRVQEDState),
     .create_opts              = &qed_create_opts,
+    .supports_backing         = true,
 
     .bdrv_probe               = bdrv_qed_probe,
     .bdrv_rebind              = bdrv_qed_rebind,
diff --git a/block/quorum.c b/block/quorum.c
index 86802d306b..d5ee9c0059 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -23,6 +23,7 @@
 
 #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
 #define QUORUM_OPT_BLKVERIFY      "blkverify"
+#define QUORUM_OPT_REWRITE        "rewrite-corrupted"
 
 /* This union holds a vote hash value */
 typedef union QuorumVoteValue {
@@ -70,6 +71,9 @@ typedef struct BDRVQuorumState {
                             * It is useful to debug other block drivers by
                             * comparing them with a reference one.
                             */
+    bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
+                            * block if Quorum is reached.
+                            */
 } BDRVQuorumState;
 
 typedef struct QuorumAIOCB QuorumAIOCB;
@@ -105,13 +109,17 @@ struct QuorumAIOCB {
     int count;                  /* number of completed AIOCB */
     int success_count;          /* number of successfully completed AIOCB */
 
+    int rewrite_count;          /* number of replica to rewrite: count down to
+                                 * zero once writes are fired
+                                 */
+
     QuorumVotes votes;
 
     bool is_read;
     int vote_ret;
 };
 
-static void quorum_vote(QuorumAIOCB *acb);
+static bool quorum_vote(QuorumAIOCB *acb);
 
 static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
 {
@@ -183,6 +191,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
     acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
     acb->count = 0;
     acb->success_count = 0;
+    acb->rewrite_count = 0;
     acb->votes.compare = quorum_sha256_compare;
     QLIST_INIT(&acb->votes.vote_list);
     acb->is_read = false;
@@ -232,11 +241,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
     return false;
 }
 
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+    QuorumAIOCB *acb = opaque;
+
+    /* one less rewrite to do */
+    acb->rewrite_count--;
+
+    /* wait until all rewrite callbacks have completed */
+    if (acb->rewrite_count) {
+        return;
+    }
+
+    quorum_aio_finalize(acb);
+}
+
 static void quorum_aio_cb(void *opaque, int ret)
 {
     QuorumChildRequest *sacb = opaque;
     QuorumAIOCB *acb = sacb->parent;
     BDRVQuorumState *s = acb->common.bs->opaque;
+    bool rewrite = false;
 
     sacb->ret = ret;
     acb->count++;
@@ -253,12 +278,15 @@ static void quorum_aio_cb(void *opaque, int ret)
 
     /* Do the vote on read */
     if (acb->is_read) {
-        quorum_vote(acb);
+        rewrite = quorum_vote(acb);
     } else {
         quorum_has_too_much_io_failed(acb);
     }
 
-    quorum_aio_finalize(acb);
+    /* if no rewrite is done the code will finish right away */
+    if (!rewrite) {
+        quorum_aio_finalize(acb);
+    }
 }
 
 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -278,6 +306,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
     }
 }
 
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+                                        QuorumVoteValue *value)
+{
+    QuorumVoteVersion *version;
+    QuorumVoteItem *item;
+    int count = 0;
+
+    /* first count the number of bad versions: done first to avoid concurrency
+     * issues.
+     */
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            count++;
+        }
+    }
+
+    /* quorum_rewrite_aio_cb will count down this to zero */
+    acb->rewrite_count = count;
+
+    /* now fire the correcting rewrites */
+    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+        if (acb->votes.compare(&version->value, value)) {
+            continue;
+        }
+        QLIST_FOREACH(item, &version->items, next) {
+            bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
+                            acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+        }
+    }
+
+    /* return true if any rewrite is done else false */
+    return count;
+}
+
 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
     int i;
@@ -468,16 +533,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
     return ret;
 }
 
-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
 {
     bool quorum = true;
+    bool rewrite = false;
     int i, j, ret;
     QuorumVoteValue hash;
     BDRVQuorumState *s = acb->common.bs->opaque;
     QuorumVoteVersion *winner;
 
     if (quorum_has_too_much_io_failed(acb)) {
-        return;
+        return false;
     }
 
     /* get the index of the first successful read */
@@ -505,7 +571,7 @@ static void quorum_vote(QuorumAIOCB *acb)
     /* Every successful read agrees */
     if (quorum) {
         quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return;
+        return false;
     }
 
     /* compute hashes for each successful read, also store indexes */
@@ -538,9 +604,15 @@ static void quorum_vote(QuorumAIOCB *acb)
     /* some versions are bad print them */
     quorum_report_bad_versions(s, acb, &winner->value);
 
+    /* corruption correction is enabled */
+    if (s->rewrite_corrupted) {
+        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+    }
+
 free_exit:
     /* free lists */
     quorum_free_vote_list(&acb->votes);
+    return rewrite;
 }
 
 static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -705,6 +777,11 @@ static QemuOptsList quorum_runtime_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "Trigger block verify mode if set",
         },
+        {
+            .name = QUORUM_OPT_REWRITE,
+            .type = QEMU_OPT_BOOL,
+            .help = "Rewrite corrupted block on read quorum",
+        },
         { /* end of list */ }
     },
 };
@@ -766,6 +843,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                 "and using two files with vote_threshold=2\n");
     }
 
+    s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+    if (s->rewrite_corrupted && s->is_blkverify) {
+        error_setg(&local_err,
+                   "rewrite-corrupted=on cannot be used with blkverify=on");
+        ret = -EINVAL;
+        goto exit;
+    }
+
     /* allocate the children BlockDriverState array */
     s->bs = g_new0(BlockDriverState *, s->num_children);
     opened = g_new0(bool, s->num_children);
diff --git a/block/vmdk.c b/block/vmdk.c
index 83dd6fe4fb..d0de0193fc 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2180,6 +2180,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_detach_aio_context      = vmdk_detach_aio_context,
     .bdrv_attach_aio_context      = vmdk_attach_aio_context,
 
+    .supports_backing             = true,
     .create_opts                  = &vmdk_create_opts,
 };
 
diff --git a/blockdev.c b/blockdev.c
index 03ab153d01..69b7c2a8c5 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1819,6 +1819,11 @@ void qmp_block_resize(bool has_device, const char *device,
         return;
     }
 
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
+        error_set(errp, QERR_DEVICE_IN_USE, device);
+        return;
+    }
+
     /* complete all in-flight operations before resizing the device */
     bdrv_drain_all();
 
@@ -2094,6 +2099,8 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
 
 void qmp_drive_mirror(const char *device, const char *target,
                       bool has_format, const char *format,
+                      bool has_node_name, const char *node_name,
+                      bool has_replaces, const char *replaces,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
@@ -2107,6 +2114,7 @@ void qmp_drive_mirror(const char *device, const char *target,
     BlockDriverState *source, *target_bs;
     BlockDriver *drv = NULL;
     Error *local_err = NULL;
+    QDict *options = NULL;
     int flags;
     int64_t size;
     int ret;
@@ -2180,6 +2188,29 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    if (has_replaces) {
+        BlockDriverState *to_replace_bs;
+
+        if (!has_node_name) {
+            error_setg(errp, "a node-name must be provided when replacing a"
+                             " named node of the graph");
+            return;
+        }
+
+        to_replace_bs = check_to_replace_node(replaces, &local_err);
+
+        if (!to_replace_bs) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        if (size != bdrv_getlength(to_replace_bs)) {
+            error_setg(errp, "cannot replace image with a mirror image of "
+                             "different size");
+            return;
+        }
+    }
+
     if ((sync == MIRROR_SYNC_MODE_FULL || !source)
         && mode != NEW_IMAGE_MODE_EXISTING)
     {
@@ -2208,18 +2239,28 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    if (has_node_name) {
+        options = qdict_new();
+        qdict_put(options, "node-name", qstring_from_str(node_name));
+    }
+
     /* Mirroring takes care of copy-on-write using the source's backing
      * file.
      */
     target_bs = NULL;
-    ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING,
-                    drv, &local_err);
+    ret = bdrv_open(&target_bs, target, NULL, options,
+                    flags | BDRV_O_NO_BACKING, drv, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return;
     }
 
-    mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
+    /* pass the node name to replace to mirror start since it's loose coupling
+     * and will allow to check whether the node still exist at mirror completion
+     */
+    mirror_start(bs, target_bs,
+                 has_replaces ? replaces : NULL,
+                 speed, granularity, buf_size, sync,
                  on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
diff --git a/blockjob.c b/blockjob.c
index 4da86cdfcd..67a64ea318 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -210,6 +210,20 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
     job->busy = true;
 }
 
+void block_job_yield(BlockJob *job)
+{
+    assert(job->busy);
+
+    /* Check cancellation *before* setting busy = false, too!  */
+    if (block_job_is_cancelled(job)) {
+        return;
+    }
+
+    job->busy = false;
+    qemu_coroutine_yield();
+    job->busy = true;
+}
+
 BlockJobInfo *block_job_query(BlockJob *job)
 {
     BlockJobInfo *info = g_new0(BlockJobInfo, 1);
@@ -256,7 +270,11 @@ void block_job_event_completed(BlockJob *job, const char *msg)
 
 void block_job_event_ready(BlockJob *job)
 {
-    qapi_event_send_block_job_ready(bdrv_get_device_name(job->bs), &error_abort);
+    qapi_event_send_block_job_ready(job->driver->job_type,
+                                    bdrv_get_device_name(job->bs),
+                                    job->len,
+                                    job->offset,
+                                    job->speed, &error_abort);
 }
 
 BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
@@ -282,7 +300,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
     default:
         abort();
     }
-    qapi_event_send_block_job_error(bdrv_get_device_name(bs),
+    qapi_event_send_block_job_error(bdrv_get_device_name(job->bs),
                                     is_read ? IO_OPERATION_TYPE_READ :
                                     IO_OPERATION_TYPE_WRITE,
                                     action, &error_abort);
diff --git a/configure b/configure
index 71029643cc..23ecb37c43 100755
--- a/configure
+++ b/configure
@@ -5273,6 +5273,18 @@ if test "$docs" = "yes" ; then
   mkdir -p QMP
 fi
 
+# set up qemu-iotests in this build directory
+iotests_common_env="tests/qemu-iotests/common.env"
+iotests_check="tests/qemu-iotests/check"
+
+echo "# Automatically generated by configure - do not modify" > "$iotests_common_env"
+echo >> "$iotests_common_env"
+echo "export PYTHON='$python'" >> "$iotests_common_env"
+
+if [ ! -e "$iotests_check" ]; then
+    symlink "$source_path/$iotests_check" "$iotests_check"
+fi
+
 # Save the configure command line for later reuse.
 cat <<EOD >config.status
 #!/bin/sh
diff --git a/docs/qapi-code-gen.txt b/docs/qapi-code-gen.txt
index 3a0c99e1da..a6197a9133 100644
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -218,10 +218,10 @@ An example command is:
 === Events ===
 
 Events are defined with the keyword 'event'.  When 'data' is also specified,
-additional info will be carried on.  Finally there will be C API generated
-in qapi-event.h; when called by QEMU code, a message with timestamp will
-be emitted on the wire.  If timestamp is -1, it means failure to retrieve host
-time.
+additional info will be included in the event.  Finally there will be C API
+generated in qapi-event.h; when called by QEMU code, a message with timestamp
+will be emitted on the wire.  If timestamp is -1, it means failure to retrieve
+host time.
 
 An example event is:
 
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt
new file mode 100644
index 0000000000..44be891261
--- /dev/null
+++ b/docs/qmp/qmp-events.txt
@@ -0,0 +1,559 @@
+                   QEMU Machine Protocol Events
+                   ============================
+
+ACPI_DEVICE_OST
+---------------
+
+Emitted when guest executes ACPI _OST method.
+
+ - data: ACPIOSTInfo type as described in qapi-schema.json
+
+{ "event": "ACPI_DEVICE_OST",
+     "data": { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0 } }
+
+BALLOON_CHANGE
+--------------
+
+Emitted when the guest changes the actual BALLOON level. This
+value is equivalent to the 'actual' field return by the
+'query-balloon' command
+
+Data:
+
+- "actual": actual level of the guest memory balloon in bytes (json-number)
+
+Example:
+
+{ "event": "BALLOON_CHANGE",
+    "data": { "actual": 944766976 },
+    "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+BLOCK_IMAGE_CORRUPTED
+---------------------
+
+Emitted when a disk image is being marked corrupt.
+
+Data:
+
+- "device": Device name (json-string)
+- "msg":    Informative message (e.g., reason for the corruption) (json-string)
+- "offset": If the corruption resulted from an image access, this is the access
+            offset into the image (json-int)
+- "size":   If the corruption resulted from an image access, this is the access
+            size (json-int)
+
+Example:
+
+{ "event": "BLOCK_IMAGE_CORRUPTED",
+    "data": { "device": "ide0-hd0",
+        "msg": "Prevented active L1 table overwrite", "offset": 196608,
+        "size": 65536 },
+    "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
+
+BLOCK_IO_ERROR
+--------------
+
+Emitted when a disk I/O error occurs.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+    "ignore": error has been ignored
+    "report": error has been reported to the device
+    "stop": the VM is going to stop because of the error
+
+Example:
+
+{ "event": "BLOCK_IO_ERROR",
+    "data": { "device": "ide0-hd1",
+              "operation": "write",
+              "action": "stop" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: If action is "stop", a STOP event will eventually follow the
+BLOCK_IO_ERROR event.
+
+BLOCK_JOB_CANCELLED
+-------------------
+
+Emitted when a block job has been cancelled.
+
+Data:
+
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
+- "device":   Device name (json-string)
+- "len":      Maximum progress value (json-int)
+- "offset":   Current progress value (json-int)
+              On success this is equal to len.
+              On failure this is less than len.
+- "speed":    Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_CANCELLED",
+     "data": { "type": "stream", "device": "virtio-disk0",
+               "len": 10737418240, "offset": 134217728,
+               "speed": 0 },
+     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+BLOCK_JOB_COMPLETED
+-------------------
+
+Emitted when a block job has completed.
+
+Data:
+
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
+- "device":   Device name (json-string)
+- "len":      Maximum progress value (json-int)
+- "offset":   Current progress value (json-int)
+              On success this is equal to len.
+              On failure this is less than len.
+- "speed":    Rate limit, bytes per second (json-int)
+- "error":    Error message (json-string, optional)
+              Only present on failure.  This field contains a human-readable
+              error message.  There are no semantics other than that streaming
+              has failed and clients should not try to interpret the error
+              string.
+
+Example:
+
+{ "event": "BLOCK_JOB_COMPLETED",
+     "data": { "type": "stream", "device": "virtio-disk0",
+               "len": 10737418240, "offset": 10737418240,
+               "speed": 0 },
+     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+BLOCK_JOB_ERROR
+---------------
+
+Emitted when a block job encounters an error.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+    "ignore": error has been ignored, the job may fail later
+    "report": error will be reported and the job canceled
+    "stop": error caused job to be paused
+
+Example:
+
+{ "event": "BLOCK_JOB_ERROR",
+    "data": { "device": "ide0-hd1",
+              "operation": "write",
+              "action": "stop" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+BLOCK_JOB_READY
+---------------
+
+Emitted when a block job is ready to complete.
+
+Data:
+
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
+- "device":   Device name (json-string)
+- "len":      Maximum progress value (json-int)
+- "offset":   Current progress value (json-int)
+              On success this is equal to len.
+              On failure this is less than len.
+- "speed":    Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_READY",
+    "data": { "device": "drive0", "type": "mirror", "speed": 0,
+              "len": 2097152, "offset": 2097152 }
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
+event.
+
+DEVICE_DELETED
+--------------
+
+Emitted whenever the device removal completion is acknowledged
+by the guest.
+At this point, it's safe to reuse the specified device ID.
+Device removal can be initiated by the guest or by HMP/QMP commands.
+
+Data:
+
+- "device": device name (json-string, optional)
+- "path": device path (json-string)
+
+{ "event": "DEVICE_DELETED",
+  "data": { "device": "virtio-net-pci-0",
+            "path": "/machine/peripheral/virtio-net-pci-0" },
+  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+DEVICE_TRAY_MOVED
+-----------------
+
+It's emitted whenever the tray of a removable device is moved by the guest
+or by HMP/QMP commands.
+
+Data:
+
+- "device": device name (json-string)
+- "tray-open": true if the tray has been opened or false if it has been closed
+               (json-bool)
+
+{ "event": "DEVICE_TRAY_MOVED",
+  "data": { "device": "ide1-cd0",
+            "tray-open": true
+  },
+  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+GUEST_PANICKED
+--------------
+
+Emitted when guest OS panic is detected.
+
+Data:
+
+- "action": Action that has been taken (json-string, currently always "pause").
+
+Example:
+
+{ "event": "GUEST_PANICKED",
+     "data": { "action": "pause" } }
+
+NIC_RX_FILTER_CHANGED
+---------------------
+
+The event is emitted once until the query command is executed,
+the first event will always be emitted.
+
+Data:
+
+- "name": net client name (json-string)
+- "path": device path (json-string)
+
+{ "event": "NIC_RX_FILTER_CHANGED",
+  "data": { "name": "vnet0",
+            "path": "/machine/peripheral/vnet0/virtio-backend" },
+  "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
+}
+
+QUORUM_FAILURE
+--------------
+
+Emitted by the Quorum block driver if it fails to establish a quorum.
+
+Data:
+
+- "reference":    device name if defined else node name.
+- "sector-num":   Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_FAILURE",
+     "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 },
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+QUORUM_REPORT_BAD
+-----------------
+
+Emitted to report a corruption of a Quorum file.
+
+Data:
+
+- "error":        Error message (json-string, optional)
+                  Only present on failure.  This field contains a human-readable
+                  error message.  There are no semantics other than that the
+                  block layer reported an error and clients should not try to
+                  interpret the error string.
+- "node-name":    The graph node name of the block driver state.
+- "sector-num":   Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_REPORT_BAD",
+     "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 },
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+RESET
+-----
+
+Emitted when the Virtual Machine is reseted.
+
+Data: None.
+
+Example:
+
+{ "event": "RESET",
+    "timestamp": { "seconds": 1267041653, "microseconds": 9518 } }
+
+RESUME
+------
+
+Emitted when the Virtual Machine resumes execution.
+
+Data: None.
+
+Example:
+
+{ "event": "RESUME",
+    "timestamp": { "seconds": 1271770767, "microseconds": 582542 } }
+
+RTC_CHANGE
+----------
+
+Emitted when the guest changes the RTC time.
+
+Data:
+
+- "offset": Offset between base RTC clock (as specified by -rtc base), and
+new RTC clock value (json-number)
+
+Example:
+
+{ "event": "RTC_CHANGE",
+    "data": { "offset": 78 },
+    "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+SHUTDOWN
+--------
+
+Emitted when the Virtual Machine is powered down.
+
+Data: None.
+
+Example:
+
+{ "event": "SHUTDOWN",
+    "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
+
+Note: If the command-line option "-no-shutdown" has been specified, a STOP
+event will eventually follow the SHUTDOWN event.
+
+SPICE_CONNECTED, SPICE_DISCONNECTED
+-----------------------------------
+
+Emitted when a SPICE client connects or disconnects.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 388707},
+  "event": "SPICE_CONNECTED",
+  "data": {
+    "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
+    "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
+}}
+
+SPICE_INITIALIZED
+-----------------
+
+Emitted after initial handshake and authentication takes place (if any)
+and the SPICE channel is up'n'running
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "port": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "connection-id": spice connection id.  All channels with the same id
+                     belong to the same spice session (json-int)
+  - "channel-type": channel type.  "1" is the main control channel, filter for
+                    this one if you want track spice sessions only (json-int)
+  - "channel-id": channel id.  Usually "0", might be different needed when
+                  multiple channels of the same type exist, such as multiple
+                  display channels in a multihead setup (json-int)
+  - "tls": whevener the channel is encrypted (json-bool)
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 417172},
+  "event": "SPICE_INITIALIZED",
+  "data": {"server": {"auth": "spice", "port": "5921",
+                      "family": "ipv4", "host": "127.0.0.1"},
+           "client": {"port": "49004", "family": "ipv4", "channel-type": 3,
+                      "connection-id": 1804289383, "host": "127.0.0.1",
+                      "channel-id": 0, "tls": true}
+}}
+
+STOP
+----
+
+Emitted when the Virtual Machine is stopped.
+
+Data: None.
+
+Example:
+
+{ "event": "STOP",
+    "timestamp": { "seconds": 1267041730, "microseconds": 281295 } }
+
+SUSPEND
+-------
+
+Emitted when guest enters S3 state.
+
+Data: None.
+
+Example:
+
+{ "event": "SUSPEND",
+     "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+
+SUSPEND_DISK
+------------
+
+Emitted when the guest makes a request to enter S4 state.
+
+Data: None.
+
+Example:
+
+{ "event": "SUSPEND_DISK",
+     "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+
+Note: QEMU shuts down when entering S4 state.
+
+VNC_CONNECTED
+-------------
+
+Emitted when a VNC client establishes a connection.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "event": "VNC_CONNECTED",
+    "data": {
+        "server": { "auth": "sasl", "family": "ipv4",
+                    "service": "5901", "host": "0.0.0.0" },
+        "client": { "family": "ipv4", "service": "58425",
+                    "host": "127.0.0.1" } },
+    "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+
+
+Note: This event is emitted before any authentication takes place, thus
+the authentication ID is not provided.
+
+VNC_DISCONNECTED
+----------------
+
+Emitted when the connection is closed.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "x509_dname": TLS dname (json-string, optional)
+  - "sasl_username": SASL username (json-string, optional)
+
+Example:
+
+{ "event": "VNC_DISCONNECTED",
+    "data": {
+        "server": { "auth": "sasl", "family": "ipv4",
+                    "service": "5901", "host": "0.0.0.0" },
+        "client": { "family": "ipv4", "service": "58425",
+                    "host": "127.0.0.1", "sasl_username": "luiz" } },
+    "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+
+VNC_INITIALIZED
+---------------
+
+Emitted after authentication takes place (if any) and the VNC session is
+made active.
+
+Data:
+
+- "server": Server information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+  - "host": IP address (json-string)
+  - "service": port number (json-string)
+  - "family": address family (json-string, "ipv4" or "ipv6")
+  - "x509_dname": TLS dname (json-string, optional)
+  - "sasl_username": SASL username (json-string, optional)
+
+Example:
+
+{ "event": "VNC_INITIALIZED",
+    "data": {
+        "server": { "auth": "sasl", "family": "ipv4",
+                    "service": "5901", "host": "0.0.0.0"},
+        "client": { "family": "ipv4", "service": "46089",
+                    "host": "127.0.0.1", "sasl_username": "luiz" } },
+        "timestamp": { "seconds": 1263475302, "microseconds": 150772 } }
+
+WAKEUP
+------
+
+Emitted when the guest has woken up from S3 and is running.
+
+Data: None.
+
+Example:
+
+{ "event": "WAKEUP",
+     "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+WATCHDOG
+--------
+
+Emitted when the watchdog device's timer is expired.
+
+Data:
+
+- "action": Action that has been taken, it's one of the following (json-string):
+            "reset", "shutdown", "poweroff", "pause", "debug", or "none"
+
+Example:
+
+{ "event": "WATCHDOG",
+     "data": { "action": "reset" },
+     "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
+followed respectively by the RESET, SHUTDOWN, or STOP events.
diff --git a/hmp.c b/hmp.c
index dc3d2795d2..6429e6b447 100644
--- a/hmp.c
+++ b/hmp.c
@@ -933,6 +933,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
     }
 
     qmp_drive_mirror(device, filename, !!format, format,
+                     false, NULL, false, NULL,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
                      true, mode, false, 0, false, 0, false, 0,
                      false, 0, false, 0, &err);
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index c10b7b70fb..09bd2c70ab 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -24,16 +24,6 @@
 #include "hw/virtio/virtio-bus.h"
 #include "qom/object_interfaces.h"
 
-typedef struct {
-    VirtIOBlockDataPlane *s;
-    QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
-    VirtQueueElement *elem;         /* saved data from the virtqueue */
-    QEMUIOVector qiov;              /* original request iovecs */
-    struct iovec bounce_iov;        /* used if guest buffers are unaligned */
-    QEMUIOVector bounce_qiov;       /* bounce buffer iovecs */
-    bool read;                      /* read or write? */
-} VirtIOBlockRequest;
-
 struct VirtIOBlockDataPlane {
     bool started;
     bool starting;
@@ -57,6 +47,8 @@ struct VirtIOBlockDataPlane {
 
     /* Operation blocker on BDS */
     Error *blocker;
+    void (*saved_complete_request)(struct VirtIOBlockReq *req,
+                                   unsigned char status);
 };
 
 /* Raise an interrupt to signal guest, if necessary */
@@ -69,215 +61,14 @@ static void notify_guest(VirtIOBlockDataPlane *s)
     event_notifier_set(s->guest_notifier);
 }
 
-static void complete_rdwr(void *opaque, int ret)
-{
-    VirtIOBlockRequest *req = opaque;
-    struct virtio_blk_inhdr hdr;
-    int len;
-
-    if (likely(ret == 0)) {
-        hdr.status = VIRTIO_BLK_S_OK;
-        len = req->qiov.size;
-    } else {
-        hdr.status = VIRTIO_BLK_S_IOERR;
-        len = 0;
-    }
-
-    trace_virtio_blk_data_plane_complete_request(req->s, req->elem->index, ret);
-
-    if (req->read && req->bounce_iov.iov_base) {
-        qemu_iovec_from_buf(&req->qiov, 0, req->bounce_iov.iov_base, len);
-    }
-
-    if (req->bounce_iov.iov_base) {
-        qemu_vfree(req->bounce_iov.iov_base);
-    }
-
-    qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
-    qemu_iovec_destroy(req->inhdr);
-    g_slice_free(QEMUIOVector, req->inhdr);
-
-    /* According to the virtio specification len should be the number of bytes
-     * written to, but for virtio-blk it seems to be the number of bytes
-     * transferred plus the status bytes.
-     */
-    vring_push(&req->s->vring, req->elem, len + sizeof(hdr));
-    notify_guest(req->s);
-    g_slice_free(VirtIOBlockRequest, req);
-}
-
-static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
-                                   QEMUIOVector *inhdr, unsigned char status)
-{
-    struct virtio_blk_inhdr hdr = {
-        .status = status,
-    };
-
-    qemu_iovec_from_buf(inhdr, 0, &hdr, sizeof(hdr));
-    qemu_iovec_destroy(inhdr);
-    g_slice_free(QEMUIOVector, inhdr);
-
-    vring_push(&s->vring, elem, sizeof(hdr));
-    notify_guest(s);
-}
-
-/* Get disk serial number */
-static void do_get_id_cmd(VirtIOBlockDataPlane *s,
-                          struct iovec *iov, unsigned int iov_cnt,
-                          VirtQueueElement *elem, QEMUIOVector *inhdr)
-{
-    char id[VIRTIO_BLK_ID_BYTES];
-
-    /* Serial number not NUL-terminated when longer than buffer */
-    strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id));
-    iov_from_buf(iov, iov_cnt, 0, id, sizeof(id));
-    complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
-}
-
-static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
-                        struct iovec *iov, unsigned iov_cnt,
-                        int64_t sector_num, VirtQueueElement *elem,
-                        QEMUIOVector *inhdr)
-{
-    VirtIOBlockRequest *req = g_slice_new0(VirtIOBlockRequest);
-    QEMUIOVector *qiov;
-    int nb_sectors;
-
-    /* Fill in virtio block metadata needed for completion */
-    req->s = s;
-    req->elem = elem;
-    req->inhdr = inhdr;
-    req->read = read;
-    qemu_iovec_init_external(&req->qiov, iov, iov_cnt);
-
-    qiov = &req->qiov;
-
-    if (!bdrv_qiov_is_aligned(s->blk->conf.bs, qiov)) {
-        void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov->size);
-
-        /* Populate bounce buffer with data for writes */
-        if (!read) {
-            qemu_iovec_to_buf(qiov, 0, bounce_buffer, qiov->size);
-        }
-
-        /* Redirect I/O to aligned bounce buffer */
-        req->bounce_iov.iov_base = bounce_buffer;
-        req->bounce_iov.iov_len = qiov->size;
-        qemu_iovec_init_external(&req->bounce_qiov, &req->bounce_iov, 1);
-        qiov = &req->bounce_qiov;
-    }
-
-    nb_sectors = qiov->size / BDRV_SECTOR_SIZE;
-
-    if (read) {
-        bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors,
-                       complete_rdwr, req);
-    } else {
-        bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors,
-                        complete_rdwr, req);
-    }
-}
-
-static void complete_flush(void *opaque, int ret)
-{
-    VirtIOBlockRequest *req = opaque;
-    unsigned char status;
-
-    if (ret == 0) {
-        status = VIRTIO_BLK_S_OK;
-    } else {
-        status = VIRTIO_BLK_S_IOERR;
-    }
-
-    complete_request_early(req->s, req->elem, req->inhdr, status);
-    g_slice_free(VirtIOBlockRequest, req);
-}
-
-static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
-                         QEMUIOVector *inhdr)
+static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
 {
-    VirtIOBlockRequest *req = g_slice_new(VirtIOBlockRequest);
-    req->s = s;
-    req->elem = elem;
-    req->inhdr = inhdr;
+    stb_p(&req->in->status, status);
 
-    bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
-}
-
-static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
-                        QEMUIOVector *inhdr)
-{
-    int status;
-
-    status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem);
-    complete_request_early(s, elem, inhdr, status);
-}
-
-static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
-{
-    struct iovec *iov = elem->out_sg;
-    struct iovec *in_iov = elem->in_sg;
-    unsigned out_num = elem->out_num;
-    unsigned in_num = elem->in_num;
-    struct virtio_blk_outhdr outhdr;
-    QEMUIOVector *inhdr;
-    size_t in_size;
-
-    /* Copy in outhdr */
-    if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
-                            sizeof(outhdr)) != sizeof(outhdr))) {
-        error_report("virtio-blk request outhdr too short");
-        return -EFAULT;
-    }
-    iov_discard_front(&iov, &out_num, sizeof(outhdr));
-
-    /* Grab inhdr for later */
-    in_size = iov_size(in_iov, in_num);
-    if (in_size < sizeof(struct virtio_blk_inhdr)) {
-        error_report("virtio_blk request inhdr too short");
-        return -EFAULT;
-    }
-    inhdr = g_slice_new(QEMUIOVector);
-    qemu_iovec_init(inhdr, 1);
-    qemu_iovec_concat_iov(inhdr, in_iov, in_num,
-            in_size - sizeof(struct virtio_blk_inhdr),
-            sizeof(struct virtio_blk_inhdr));
-    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
-
-    /* TODO Linux sets the barrier bit even when not advertised! */
-    outhdr.type &= ~VIRTIO_BLK_T_BARRIER;
-
-    switch (outhdr.type) {
-    case VIRTIO_BLK_T_IN:
-        do_rdwr_cmd(s, true, in_iov, in_num,
-                    outhdr.sector * 512 / BDRV_SECTOR_SIZE,
-                    elem, inhdr);
-        return 0;
-
-    case VIRTIO_BLK_T_OUT:
-        do_rdwr_cmd(s, false, iov, out_num,
-                    outhdr.sector * 512 / BDRV_SECTOR_SIZE,
-                    elem, inhdr);
-        return 0;
-
-    case VIRTIO_BLK_T_SCSI_CMD:
-        do_scsi_cmd(s, elem, inhdr);
-        return 0;
-
-    case VIRTIO_BLK_T_FLUSH:
-        do_flush_cmd(s, elem, inhdr);
-        return 0;
-
-    case VIRTIO_BLK_T_GET_ID:
-        do_get_id_cmd(s, in_iov, in_num, elem, inhdr);
-        return 0;
-
-    default:
-        error_report("virtio-blk unsupported request type %#x", outhdr.type);
-        qemu_iovec_destroy(inhdr);
-        g_slice_free(QEMUIOVector, inhdr);
-        return -EFAULT;
-    }
+    vring_push(&req->dev->dataplane->vring, req->elem,
+               req->qiov.size + sizeof(*req->in));
+    notify_guest(req->dev->dataplane);
+    g_slice_free(VirtIOBlockReq, req);
 }
 
 static void handle_notify(EventNotifier *e)
@@ -286,7 +77,11 @@ static void handle_notify(EventNotifier *e)
                                            host_notifier);
 
     VirtQueueElement *elem;
+    VirtIOBlockReq *req;
     int ret;
+    MultiReqBuffer mrb = {
+        .num_writes = 0,
+    };
 
     event_notifier_test_and_clear(&s->host_notifier);
     for (;;) {
@@ -303,14 +98,14 @@ static void handle_notify(EventNotifier *e)
             trace_virtio_blk_data_plane_process_request(s, elem->out_num,
                                                         elem->in_num, elem->index);
 
-            if (process_request(s, elem) < 0) {
-                vring_set_broken(&s->vring);
-                vring_free_element(elem);
-                ret = -EFAULT;
-                break;
-            }
+            req = g_slice_new(VirtIOBlockReq);
+            req->dev = VIRTIO_BLK(s->vdev);
+            req->elem = elem;
+            virtio_blk_handle_request(req, &mrb);
         }
 
+        virtio_submit_multiwrite(s->blk->conf.bs, &mrb);
+
         if (likely(ret == -EAGAIN)) { /* vring emptied */
             /* Re-enable guest->host notifies and stop processing the vring.
              * But if the guest has snuck in more descriptors, keep processing.
@@ -330,6 +125,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
                                   Error **errp)
 {
     VirtIOBlockDataPlane *s;
+    VirtIOBlock *vblk = VIRTIO_BLK(vdev);
     Error *local_err = NULL;
 
     *dataplane = NULL;
@@ -372,6 +168,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
     bdrv_op_block_all(blk->conf.bs, s->blocker);
 
     *dataplane = s;
+    s->saved_complete_request = vblk->complete_request;
+    vblk->complete_request = complete_request_vring;
 }
 
 /* Context: QEMU global mutex held */
@@ -446,10 +244,12 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
 {
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
     if (!s->started || s->stopping) {
         return;
     }
     s->stopping = true;
+    vblk->complete_request = s->saved_complete_request;
     trace_virtio_blk_data_plane_stop(s);
 
     aio_context_acquire(s->ctx);
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 08562ea390..a222e3f9a4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -12,6 +12,7 @@
  */
 
 #include "qemu-common.h"
+#include "qemu/iov.h"
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "hw/block/block.h"
@@ -27,18 +28,24 @@
 #endif
 #include "hw/virtio/virtio-bus.h"
 
-typedef struct VirtIOBlockReq
+static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
+{
+    VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq);
+    req->dev = s;
+    req->elem = g_slice_new0(VirtQueueElement);
+    return req;
+}
+
+static void virtio_blk_free_request(VirtIOBlockReq *req)
 {
-    VirtIOBlock *dev;
-    VirtQueueElement elem;
-    struct virtio_blk_inhdr *in;
-    struct virtio_blk_outhdr *out;
-    QEMUIOVector qiov;
-    struct VirtIOBlockReq *next;
-    BlockAcctCookie acct;
-} VirtIOBlockReq;
+    if (req) {
+        g_slice_free(VirtQueueElement, req->elem);
+        g_slice_free(VirtIOBlockReq, req);
+    }
+}
 
-static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
+static void virtio_blk_complete_request(VirtIOBlockReq *req,
+                                        unsigned char status)
 {
     VirtIOBlock *s = req->dev;
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
@@ -46,10 +53,15 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
     trace_virtio_blk_req_complete(req, status);
 
     stb_p(&req->in->status, status);
-    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+    virtqueue_push(s->vq, req->elem, req->qiov.size + sizeof(*req->in));
     virtio_notify(vdev, s->vq);
 }
 
+static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
+{
+    req->dev->complete_request(req, status);
+}
+
 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
     bool is_read)
 {
@@ -62,7 +74,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
     } else if (action == BLOCK_ERROR_ACTION_REPORT) {
         virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
         bdrv_acct_done(s->bs, &req->acct);
-        g_free(req);
+        virtio_blk_free_request(req);
     }
 
     bdrv_error_action(s->bs, action, is_read, error);
@@ -76,14 +88,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
     trace_virtio_blk_rw_complete(req, ret);
 
     if (ret) {
-        bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
+        bool is_read = !(ldl_p(&req->out.type) & VIRTIO_BLK_T_OUT);
         if (virtio_blk_handle_rw_error(req, -ret, is_read))
             return;
     }
 
     virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
     bdrv_acct_done(req->dev->bs, &req->acct);
-    g_free(req);
+    virtio_blk_free_request(req);
 }
 
 static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -98,27 +110,16 @@ static void virtio_blk_flush_complete(void *opaque, int ret)
 
     virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
     bdrv_acct_done(req->dev->bs, &req->acct);
-    g_free(req);
-}
-
-static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
-{
-    VirtIOBlockReq *req = g_malloc(sizeof(*req));
-    req->dev = s;
-    req->qiov.size = 0;
-    req->next = NULL;
-    return req;
+    virtio_blk_free_request(req);
 }
 
 static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
 {
     VirtIOBlockReq *req = virtio_blk_alloc_request(s);
 
-    if (req != NULL) {
-        if (!virtqueue_pop(s->vq, &req->elem)) {
-            g_free(req);
-            return NULL;
-        }
+    if (!virtqueue_pop(s->vq, req->elem)) {
+        virtio_blk_free_request(req);
+        return NULL;
     }
 
     return req;
@@ -247,17 +248,12 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 {
     int status;
 
-    status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
+    status = virtio_blk_handle_scsi_req(req->dev, req->elem);
     virtio_blk_req_complete(req, status);
-    g_free(req);
+    virtio_blk_free_request(req);
 }
 
-typedef struct MultiReqBuffer {
-    BlockRequest        blkreq[32];
-    unsigned int        num_writes;
-} MultiReqBuffer;
-
-static void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
+void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
 {
     int i, ret;
 
@@ -293,7 +289,7 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
     BlockRequest *blkreq;
     uint64_t sector;
 
-    sector = ldq_p(&req->out->sector);
+    sector = ldq_p(&req->out.sector);
 
     bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
 
@@ -327,7 +323,7 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
 {
     uint64_t sector;
 
-    sector = ldq_p(&req->out->sector);
+    sector = ldq_p(&req->out.sector);
 
     bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
 
@@ -346,26 +342,39 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
                    virtio_blk_rw_complete, req);
 }
 
-static void virtio_blk_handle_request(VirtIOBlockReq *req,
-    MultiReqBuffer *mrb)
+void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
     uint32_t type;
+    struct iovec *in_iov = req->elem->in_sg;
+    struct iovec *iov = req->elem->out_sg;
+    unsigned in_num = req->elem->in_num;
+    unsigned out_num = req->elem->out_num;
 
-    if (req->elem.out_num < 1 || req->elem.in_num < 1) {
+    if (req->elem->out_num < 1 || req->elem->in_num < 1) {
         error_report("virtio-blk missing headers");
         exit(1);
     }
 
-    if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
-        req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
-        error_report("virtio-blk header not in correct element");
+    if (unlikely(iov_to_buf(iov, out_num, 0, &req->out,
+                            sizeof(req->out)) != sizeof(req->out))) {
+        error_report("virtio-blk request outhdr too short");
+        exit(1);
+    }
+
+    iov_discard_front(&iov, &out_num, sizeof(req->out));
+
+    if (in_num < 1 ||
+        in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
+        error_report("virtio-blk request inhdr too short");
         exit(1);
     }
 
-    req->out = (void *)req->elem.out_sg[0].iov_base;
-    req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
+    req->in = (void *)in_iov[in_num - 1].iov_base
+              + in_iov[in_num - 1].iov_len
+              - sizeof(struct virtio_blk_inhdr);
+    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
 
-    type = ldl_p(&req->out->type);
+    type = ldl_p(&req->out.type);
 
     if (type & VIRTIO_BLK_T_FLUSH) {
         virtio_blk_handle_flush(req, mrb);
@@ -378,23 +387,23 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req,
          * NB: per existing s/n string convention the string is
          * terminated by '\0' only when shorter than buffer.
          */
-        strncpy(req->elem.in_sg[0].iov_base,
+        strncpy(req->elem->in_sg[0].iov_base,
                 s->blk.serial ? s->blk.serial : "",
-                MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
+                MIN(req->elem->in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
         virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
-        g_free(req);
+        virtio_blk_free_request(req);
     } else if (type & VIRTIO_BLK_T_OUT) {
-        qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
-                                 req->elem.out_num - 1);
+        qemu_iovec_init_external(&req->qiov, &req->elem->out_sg[1],
+                                 req->elem->out_num - 1);
         virtio_blk_handle_write(req, mrb);
     } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
         /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
-        qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
-                                 req->elem.in_num - 1);
+        qemu_iovec_init_external(&req->qiov, &req->elem->in_sg[0],
+                                 req->elem->in_num - 1);
         virtio_blk_handle_read(req);
     } else {
         virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
-        g_free(req);
+        virtio_blk_free_request(req);
     }
 }
 
@@ -460,7 +469,8 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
     }
 
     if (!s->bh) {
-        s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
+        s->bh = aio_bh_new(bdrv_get_aio_context(s->blk.conf.bs),
+                           virtio_blk_dma_restart_bh, s);
         qemu_bh_schedule(s->bh);
     }
 }
@@ -609,7 +619,8 @@ static void virtio_blk_save(QEMUFile *f, void *opaque)
     
     while (req) {
         qemu_put_sbyte(f, 1);
-        qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
+        qemu_put_buffer(f, (unsigned char *)req->elem,
+                        sizeof(VirtQueueElement));
         req = req->next;
     }
     qemu_put_sbyte(f, 0);
@@ -631,14 +642,15 @@ static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
 
     while (qemu_get_sbyte(f)) {
         VirtIOBlockReq *req = virtio_blk_alloc_request(s);
-        qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
+        qemu_get_buffer(f, (unsigned char *)req->elem,
+                        sizeof(VirtQueueElement));
         req->next = s->rq;
         s->rq = req;
 
-        virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
-            req->elem.in_num, 1);
-        virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
-            req->elem.out_num, 0);
+        virtqueue_map_sg(req->elem->in_sg, req->elem->in_addr,
+            req->elem->in_num, 1);
+        virtqueue_map_sg(req->elem->out_sg, req->elem->out_addr,
+            req->elem->out_num, 0);
     }
 
     return 0;
@@ -729,6 +741,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
 
     s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+    s->complete_request = virtio_blk_complete_request;
 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err);
     if (err != NULL) {
diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c
index 6c8be0fe26..54eb15f3af 100644
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -14,6 +14,7 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "hw/virtio/virtio-serial.h"
+#include "qapi-event.h"
 
 #define TYPE_VIRTIO_CONSOLE_SERIAL_PORT "virtserialport"
 #define VIRTIO_CONSOLE(obj) \
@@ -81,11 +82,16 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
 static void set_guest_connected(VirtIOSerialPort *port, int guest_connected)
 {
     VirtConsole *vcon = VIRTIO_CONSOLE(port);
+    DeviceState *dev = DEVICE(port);
 
-    if (!vcon->chr) {
-        return;
+    if (vcon->chr) {
+        qemu_chr_fe_set_open(vcon->chr, guest_connected);
+    }
+
+    if (dev->id) {
+        qapi_event_send_vserport_change(dev->id, guest_connected,
+                                        &error_abort);
     }
-    qemu_chr_fe_set_open(vcon->chr, guest_connected);
 }
 
 /* Readiness of the guest to accept data on a port */
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index de433b2e38..52c2f8afa5 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -180,7 +180,6 @@ PropertyInfo qdev_prop_chr = {
 static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
 {
     NICPeers *peers_ptr = (NICPeers *)ptr;
-    NICConf *conf = container_of(peers_ptr, NICConf, peers);
     NetClientState **ncs = peers_ptr->ncs;
     NetClientState *peers[MAX_QUEUE_NUM];
     int queues, i = 0;
@@ -219,7 +218,7 @@ static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
         ncs[i]->queue_index = i;
     }
 
-    conf->queues = queues;
+    peers_ptr->queues = queues;
 
     return 0;
 
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 76dd6f5708..0fd2a84c7b 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -437,7 +437,7 @@ static void ics_set_irq(void *opaque, int srcno, int val)
 {
     ICSState *ics = (ICSState *)opaque;
 
-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
         set_irq_lsi(ics, srcno, val);
     } else {
         set_irq_msi(ics, srcno, val);
@@ -474,7 +474,7 @@ static void ics_write_xive(ICSState *ics, int nr, int server,
 
     trace_xics_ics_write_xive(nr, srcno, server, priority);
 
-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
         write_xive_lsi(ics, srcno);
     } else {
         write_xive_msi(ics, srcno);
@@ -496,7 +496,7 @@ static void ics_resend(ICSState *ics)
 
     for (i = 0; i < ics->nr_irqs; i++) {
         /* FIXME: filter by server#? */
-        if (ics->islsi[i]) {
+        if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
             resend_lsi(ics, i);
         } else {
             resend_msi(ics, i);
@@ -511,7 +511,7 @@ static void ics_eoi(ICSState *ics, int nr)
 
     trace_xics_ics_eoi(nr);
 
-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
         irq->status &= ~XICS_STATUS_SENT;
     }
 }
@@ -520,11 +520,18 @@ static void ics_reset(DeviceState *dev)
 {
     ICSState *ics = ICS(dev);
     int i;
+    uint8_t flags[ics->nr_irqs];
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        flags[i] = ics->irqs[i].flags;
+    }
 
     memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
     for (i = 0; i < ics->nr_irqs; i++) {
         ics->irqs[i].priority = 0xff;
         ics->irqs[i].saved_priority = 0xff;
+        ics->irqs[i].flags = flags[i];
     }
 }
 
@@ -563,13 +570,14 @@ static int ics_dispatch_post_load(void *opaque, int version_id)
 
 static const VMStateDescription vmstate_ics_irq = {
     .name = "ics/irq",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(server, ICSIRQState),
         VMSTATE_UINT8(priority, ICSIRQState),
         VMSTATE_UINT8(saved_priority, ICSIRQState),
         VMSTATE_UINT8(status, ICSIRQState),
+        VMSTATE_UINT8(flags, ICSIRQState),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -606,7 +614,6 @@ static void ics_realize(DeviceState *dev, Error **errp)
         return;
     }
     ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
     ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
 }
 
@@ -633,21 +640,166 @@ static const TypeInfo ics_info = {
 /*
  * Exported functions
  */
+static int xics_find_source(XICSState *icp, int irq)
+{
+    int sources = 1;
+    int src;
+
+    /* FIXME: implement multiple sources */
+    for (src = 0; src < sources; ++src) {
+        ICSState *ics = &icp->ics[src];
+        if (ics_valid_irq(ics, irq)) {
+            return src;
+        }
+    }
+
+    return -1;
+}
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq)
 {
-    if (!ics_valid_irq(icp->ics, irq)) {
-        return NULL;
+    int src = xics_find_source(icp, irq);
+
+    if (src >= 0) {
+        ICSState *ics = &icp->ics[src];
+        return ics->qirqs[irq - ics->offset];
     }
 
-    return icp->ics->qirqs[irq - icp->ics->offset];
+    return NULL;
+}
+
+static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
+{
+    assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
+
+    ics->irqs[srcno].flags |=
+        lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
 }
 
 void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
 {
-    assert(ics_valid_irq(icp->ics, irq));
+    int src = xics_find_source(icp, irq);
+    ICSState *ics;
 
-    icp->ics->islsi[irq - icp->ics->offset] = lsi;
+    assert(src >= 0);
+
+    ics = &icp->ics[src];
+    ics_set_irq_type(ics, irq - ics->offset, lsi);
+}
+
+#define ICS_IRQ_FREE(ics, srcno)   \
+    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
+
+static int ics_find_free_block(ICSState *ics, int num, int alignnum)
+{
+    int first, i;
+
+    for (first = 0; first < ics->nr_irqs; first += alignnum) {
+        if (num > (ics->nr_irqs - first)) {
+            return -1;
+        }
+        for (i = first; i < first + num; ++i) {
+            if (!ICS_IRQ_FREE(ics, i)) {
+                break;
+            }
+        }
+        if (i == (first + num)) {
+            return first;
+        }
+    }
+
+    return -1;
+}
+
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
+{
+    ICSState *ics = &icp->ics[src];
+    int irq;
+
+    if (irq_hint) {
+        assert(src == xics_find_source(icp, irq_hint));
+        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
+            trace_xics_alloc_failed_hint(src, irq_hint);
+            return -1;
+        }
+        irq = irq_hint;
+    } else {
+        irq = ics_find_free_block(ics, 1, 1);
+        if (irq < 0) {
+            trace_xics_alloc_failed_no_left(src);
+            return -1;
+        }
+        irq += ics->offset;
+    }
+
+    ics_set_irq_type(ics, irq - ics->offset, lsi);
+    trace_xics_alloc(src, irq);
+
+    return irq;
+}
+
+/*
+ * Allocate block of consequtive IRQs, returns a number of the first.
+ * If align==true, aligns the first IRQ number to num.
+ */
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
+{
+    int i, first = -1;
+    ICSState *ics = &icp->ics[src];
+
+    assert(src == 0);
+    /*
+     * MSIMesage::data is used for storing VIRQ so
+     * it has to be aligned to num to support multiple
+     * MSI vectors. MSI-X is not affected by this.
+     * The hint is used for the first IRQ, the rest should
+     * be allocated continuously.
+     */
+    if (align) {
+        assert((num == 1) || (num == 2) || (num == 4) ||
+               (num == 8) || (num == 16) || (num == 32));
+        first = ics_find_free_block(ics, num, num);
+    } else {
+        first = ics_find_free_block(ics, num, 1);
+    }
+
+    if (first >= 0) {
+        for (i = first; i < first + num; ++i) {
+            ics_set_irq_type(ics, i, lsi);
+        }
+    }
+    first += ics->offset;
+
+    trace_xics_alloc_block(src, first, num, lsi, align);
+
+    return first;
+}
+
+static void ics_free(ICSState *ics, int srcno, int num)
+{
+    int i;
+
+    for (i = srcno; i < srcno + num; ++i) {
+        if (ICS_IRQ_FREE(ics, i)) {
+            trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
+        }
+        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
+    }
+}
+
+void xics_free(XICSState *icp, int irq, int num)
+{
+    int src = xics_find_source(icp, irq);
+
+    if (src >= 0) {
+        ICSState *ics = &icp->ics[src];
+
+        /* FIXME: implement multiple sources */
+        assert(src == 0);
+
+        trace_xics_ics_free(ics - icp->ics, irq, num);
+        ics_free(ics, irq - ics->offset, num);
+    }
 }
 
 /*
@@ -866,10 +1018,10 @@ static void xics_realize(DeviceState *dev, Error **errp)
     }
 
     /* Registration of global state belongs into realize */
-    spapr_rtas_register("ibm,set-xive", rtas_set_xive);
-    spapr_rtas_register("ibm,get-xive", rtas_get_xive);
-    spapr_rtas_register("ibm,int-off", rtas_int_off);
-    spapr_rtas_register("ibm,int-on", rtas_int_on);
+    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
+    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
+    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
+    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);
 
     spapr_register_hypercall(H_CPPR, h_cppr);
     spapr_register_hypercall(H_IPI, h_ipi);
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 09476ae34d..20b19e9d4f 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -38,10 +38,6 @@
 typedef struct KVMXICSState {
     XICSState parent_obj;
 
-    uint32_t set_xive_token;
-    uint32_t get_xive_token;
-    uint32_t int_off_token;
-    uint32_t int_on_token;
     int kernel_xics_fd;
 } KVMXICSState;
 
@@ -224,7 +220,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
             state |= KVM_XICS_MASKED;
         }
 
-        if (ics->islsi[i]) {
+        if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
             state |= KVM_XICS_LEVEL_SENSITIVE;
             if (irq->status & XICS_STATUS_ASSERTED) {
                 state |= KVM_XICS_PENDING;
@@ -253,7 +249,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val)
     int rc;
 
     args.irq = srcno + ics->offset;
-    if (!ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
         if (!val) {
             return;
         }
@@ -271,11 +267,18 @@ static void ics_kvm_reset(DeviceState *dev)
 {
     ICSState *ics = ICS(dev);
     int i;
+    uint8_t flags[ics->nr_irqs];
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        flags[i] = ics->irqs[i].flags;
+    }
 
     memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
     for (i = 0; i < ics->nr_irqs; i++) {
         ics->irqs[i].priority = 0xff;
         ics->irqs[i].saved_priority = 0xff;
+        ics->irqs[i].flags = flags[i];
     }
 
     ics_set_kvm_state(ics, 1);
@@ -290,7 +293,6 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp)
         return;
     }
     ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
     ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs);
 }
 
@@ -392,32 +394,30 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
         goto fail;
     }
 
-    icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy);
-    icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy);
-    icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy);
-    icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_dummy);
 
-    rc = kvmppc_define_rtas_kernel_token(icpkvm->set_xive_token,
-                                         "ibm,set-xive");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive");
     if (rc < 0) {
         error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,set-xive");
         goto fail;
     }
 
-    rc = kvmppc_define_rtas_kernel_token(icpkvm->get_xive_token,
-                                         "ibm,get-xive");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive");
     if (rc < 0) {
         error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,get-xive");
         goto fail;
     }
 
-    rc = kvmppc_define_rtas_kernel_token(icpkvm->int_on_token, "ibm,int-on");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on");
     if (rc < 0) {
         error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-on");
         goto fail;
     }
 
-    rc = kvmppc_define_rtas_kernel_token(icpkvm->int_off_token, "ibm,int-off");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off");
     if (rc < 0) {
         error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-off");
         goto fail;
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 7437c2e3c3..7b279c4f05 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -39,6 +39,7 @@
 #include "qemu/range.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
+#include "hw/misc/vfio.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -3649,6 +3650,39 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
 
         container->iommu_data.type1.initialized = true;
 
+    } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
+        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+        if (ret) {
+            error_report("vfio: failed to set group container: %m");
+            ret = -errno;
+            goto free_container_exit;
+        }
+
+        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU);
+        if (ret) {
+            error_report("vfio: failed to set iommu for container: %m");
+            ret = -errno;
+            goto free_container_exit;
+        }
+
+        /*
+         * The host kernel code implementing VFIO_IOMMU_DISABLE is called
+         * when container fd is closed so we do not call it explicitly
+         * in this file.
+         */
+        ret = ioctl(fd, VFIO_IOMMU_ENABLE);
+        if (ret) {
+            error_report("vfio: failed to enable container: %m");
+            ret = -errno;
+            goto free_container_exit;
+        }
+
+        container->iommu_data.type1.listener = vfio_memory_listener;
+        container->iommu_data.release = vfio_listener_release;
+
+        memory_listener_register(&container->iommu_data.type1.listener,
+                                 container->space->as);
+
     } else {
         error_report("vfio: No available IOMMU models");
         ret = -EINVAL;
@@ -4318,3 +4352,47 @@ static void register_vfio_pci_dev_type(void)
 }
 
 type_init(register_vfio_pci_dev_type)
+
+static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid,
+                                   int req, void *param)
+{
+    VFIOGroup *group;
+    VFIOContainer *container;
+    int ret = -1;
+
+    group = vfio_get_group(groupid, as);
+    if (!group) {
+        error_report("vfio: group %d not registered", groupid);
+        return ret;
+    }
+
+    container = group->container;
+    if (group->container) {
+        ret = ioctl(container->fd, req, param);
+        if (ret < 0) {
+            error_report("vfio: failed to ioctl container: ret=%d, %s",
+                         ret, strerror(errno));
+        }
+    }
+
+    vfio_put_group(group);
+
+    return ret;
+}
+
+int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
+                         int req, void *param)
+{
+    /* We allow only certain ioctls to the container */
+    switch (req) {
+    case VFIO_CHECK_EXTENSION:
+    case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
+        break;
+    default:
+        /* Return an error on unknown requests */
+        error_report("vfio: unsupported ioctl %X", req);
+        return -1;
+    }
+
+    return vfio_container_do_ioctl(as, groupid, req, param);
+}
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index aaa3ff2360..3263e3fe90 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -1217,7 +1217,6 @@ static void eepro100_write_mdi(EEPRO100State *s)
                 break;
             case 1:            /* Status Register */
                 missing("not writable");
-                data = s->mdimem[reg];
                 break;
             case 2:            /* PHY Identification Register (Word 1) */
             case 3:            /* PHY Identification Register (Word 2) */
@@ -1230,7 +1229,8 @@ static void eepro100_write_mdi(EEPRO100State *s)
             default:
                 missing("not implemented");
             }
-            s->mdimem[reg] = data;
+            s->mdimem[reg] &= eepro100_mdi_mask[reg];
+            s->mdimem[reg] |= data & ~eepro100_mdi_mask[reg];
         } else if (opcode == 2) {
             /* MDI read */
             switch (reg) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 00b5e07ddd..e51d753cee 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1542,7 +1542,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
 
     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
 
-    n->max_queues = MAX(n->nic_conf.queues, 1);
+    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
     n->vqs[0].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
     n->curr_queues = 1;
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index af49c4667d..6a72ef4814 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -153,8 +153,8 @@ static int spapr_nvram_init(VIOsPAPRDevice *dev)
         return -1;
     }
 
-    spapr_rtas_register("nvram-fetch", rtas_nvram_fetch);
-    spapr_rtas_register("nvram-store", rtas_nvram_store);
+    spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch);
+    spapr_rtas_register(RTAS_NVRAM_STORE, "nvram-store", rtas_nvram_store);
 
     return 0;
 }
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index e72fe2a70b..21f805f314 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -230,7 +230,7 @@ PCIBus *pci_pmac_init(qemu_irq *pic,
     d = UNI_NORTH_PCI_HOST_BRIDGE(dev);
     memory_region_init(&d->pci_mmio, OBJECT(d), "pci-mmio", 0x100000000ULL);
     memory_region_init_alias(&d->pci_hole, OBJECT(d), "pci-hole", &d->pci_mmio,
-                             0x80000000ULL, 0x70000000ULL);
+                             0x80000000ULL, 0x10000000ULL);
     memory_region_add_subregion(address_space_mem, 0x80000000ULL,
                                 &d->pci_hole);
 
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index ea747f0a20..edd44d03e7 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,6 +4,9 @@ obj-y += ppc.o ppc_booke.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o
+ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
+obj-y += spapr_pci_vfio.o
+endif
 # PowerPC 4xx boards
 obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
 obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index a973c18d88..bb2e75fe1b 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -316,10 +316,15 @@ static int ppce500_load_device_tree(MachineState *machine,
      * device it finds in the dt as serial output device. And we generate
      * devices in reverse order to the dt.
      */
-    dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
-                     soc, mpic, "serial1", 1, false);
-    dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
-                     soc, mpic, "serial0", 0, true);
+    if (serial_hds[1]) {
+        dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
+                         soc, mpic, "serial1", 1, false);
+    }
+
+    if (serial_hds[0]) {
+        dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
+                         soc, mpic, "serial0", 0, true);
+    }
 
     snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc,
              MPC8544_UTIL_OFFSET);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index e493dc1b4b..89d3cadf19 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -373,18 +373,11 @@ static void ppc_core99_init(MachineState *machine)
         machine_arch = ARCH_MAC99;
     }
     /* init basic PC hardware */
-    pci_vga_init(pci_bus);
-
     escc_mem = escc_init(0, pic[0x25], pic[0x24],
                          serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
     memory_region_init_alias(escc_bar, NULL, "escc-bar",
                              escc_mem, 0, memory_region_size(escc_mem));
 
-    for(i = 0; i < nb_nics; i++)
-        pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
-
-    ide_drive_get(hd, MAX_IDE_BUS);
-
     macio = pci_create(pci_bus, -1, TYPE_NEWWORLD_MACIO);
     dev = DEVICE(macio);
     qdev_connect_gpio_out(dev, 0, pic[0x19]); /* CUDA */
@@ -395,6 +388,8 @@ static void ppc_core99_init(MachineState *machine)
     macio_init(macio, pic_mem, escc_bar);
 
     /* We only emulate 2 out of 3 IDE controllers for now */
+    ide_drive_get(hd, MAX_IDE_BUS);
+
     macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
                                                         "ide[0]"));
     macio_ide_init_drives(macio_ide, hd);
@@ -420,8 +415,15 @@ static void ppc_core99_init(MachineState *machine)
         }
     }
 
-    if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8)
+    pci_vga_init(pci_bus);
+
+    if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) {
         graphic_depth = 15;
+    }
+
+    for (i = 0; i < nb_nics; i++) {
+        pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
+    }
 
     /* The NewWorld NVRAM is not located in the MacIO device */
     dev = qdev_create(NULL, TYPE_MACIO_NVRAM);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 82f183f173..a8ba916970 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -85,16 +85,16 @@
 
 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
 
+typedef struct sPAPRMachineState sPAPRMachineState;
 
-typedef struct SPAPRMachine SPAPRMachine;
 #define TYPE_SPAPR_MACHINE      "spapr-machine"
 #define SPAPR_MACHINE(obj) \
-    OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE)
+    OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
 
 /**
- * SPAPRMachine:
+ * sPAPRMachineState:
  */
-struct SPAPRMachine {
+struct sPAPRMachineState {
     /*< private >*/
     MachineState parent_obj;
 
@@ -102,76 +102,8 @@ struct SPAPRMachine {
     char *kvm_type;
 };
 
-
 sPAPREnvironment *spapr;
 
-int spapr_allocate_irq(int hint, bool lsi)
-{
-    int irq;
-
-    if (hint) {
-        irq = hint;
-        if (hint >= spapr->next_irq) {
-            spapr->next_irq = hint + 1;
-        }
-        /* FIXME: we should probably check for collisions somehow */
-    } else {
-        irq = spapr->next_irq++;
-    }
-
-    /* Configure irq type */
-    if (!xics_get_qirq(spapr->icp, irq)) {
-        return 0;
-    }
-
-    xics_set_irq_type(spapr->icp, irq, lsi);
-
-    return irq;
-}
-
-/*
- * Allocate block of consequtive IRQs, returns a number of the first.
- * If msi==true, aligns the first IRQ number to num.
- */
-int spapr_allocate_irq_block(int num, bool lsi, bool msi)
-{
-    int first = -1;
-    int i, hint = 0;
-
-    /*
-     * MSIMesage::data is used for storing VIRQ so
-     * it has to be aligned to num to support multiple
-     * MSI vectors. MSI-X is not affected by this.
-     * The hint is used for the first IRQ, the rest should
-     * be allocated continuously.
-     */
-    if (msi) {
-        assert((num == 1) || (num == 2) || (num == 4) ||
-               (num == 8) || (num == 16) || (num == 32));
-        hint = (spapr->next_irq + num - 1) & ~(num - 1);
-    }
-
-    for (i = 0; i < num; ++i) {
-        int irq;
-
-        irq = spapr_allocate_irq(hint, lsi);
-        if (!irq) {
-            return -1;
-        }
-
-        if (0 == i) {
-            first = irq;
-            hint = 0;
-        }
-
-        /* If the above doesn't create a consecutive block then that's
-         * an internal bug */
-        assert(irq == (first + i));
-    }
-
-    return first;
-}
-
 static XICSState *try_create_xics(const char *type, int nr_servers,
                                   int nr_irqs)
 {
@@ -442,6 +374,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     if (boot_device) {
         _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
     }
+    if (boot_menu) {
+        _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
+    }
     _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
     _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
     _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
@@ -956,7 +891,7 @@ static const VMStateDescription vmstate_spapr = {
     .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32(next_irq, sPAPREnvironment),
+        VMSTATE_UNUSED(4), /* used to be @next_irq */
 
         /* RTC offset */
         VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
@@ -1360,7 +1295,6 @@ static void ppc_spapr_init(MachineState *machine)
     /* Set up Interrupt Controller before we create the VCPUs */
     spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
                                   XICS_IRQS);
-    spapr->next_irq = XICS_IRQ_BASE;
 
     /* init CPUs */
     if (cpu_model == NULL) {
@@ -1619,14 +1553,14 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
 
 static char *spapr_get_kvm_type(Object *obj, Error **errp)
 {
-    SPAPRMachine *sm = SPAPR_MACHINE(obj);
+    sPAPRMachineState *sm = SPAPR_MACHINE(obj);
 
     return g_strdup(sm->kvm_type);
 }
 
 static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
 {
-    SPAPRMachine *sm = SPAPR_MACHINE(obj);
+    sPAPRMachineState *sm = SPAPR_MACHINE(obj);
 
     g_free(sm->kvm_type);
     sm->kvm_type = g_strdup(value);
@@ -1660,7 +1594,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
 static const TypeInfo spapr_machine_info = {
     .name          = TYPE_SPAPR_MACHINE,
     .parent        = TYPE_MACHINE,
-    .instance_size = sizeof(SPAPRMachine),
+    .instance_size = sizeof(sPAPRMachineState),
     .instance_init = spapr_machine_initfn,
     .class_init    = spapr_machine_class_init,
     .interfaces = (InterfaceInfo[]) {
@@ -1669,9 +1603,25 @@ static const TypeInfo spapr_machine_info = {
     },
 };
 
+static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->name = "pseries-2.1";
+    mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1";
+    mc->is_default = 0;
+}
+
+static const TypeInfo spapr_machine_2_1_info = {
+    .name          = TYPE_SPAPR_MACHINE "2.1",
+    .parent        = TYPE_SPAPR_MACHINE,
+    .class_init    = spapr_machine_2_1_class_init,
+};
+
 static void spapr_machine_register_types(void)
 {
     type_register_static(&spapr_machine_info);
+    type_register_static(&spapr_machine_2_1_info);
 }
 
 type_init(spapr_machine_register_types)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 16fa49e886..1b6157dec4 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -314,8 +314,9 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
 void spapr_events_init(sPAPREnvironment *spapr)
 {
-    spapr->epow_irq = spapr_allocate_msi(0);
+    spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false);
     spapr->epow_notifier.notify = spapr_powerdown_req;
     qemu_register_powerdown_notifier(&spapr->epow_notifier);
-    spapr_rtas_register("check-exception", check_exception);
+    spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
+                        check_exception);
 }
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 9e49ec4a5c..698ae60953 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -118,7 +118,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
         tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
                                               tcet->nb_table <<
                                               tcet->page_shift,
-                                              &tcet->fd);
+                                              &tcet->fd,
+                                              tcet->vfio_accel);
     }
 
     if (!tcet->table) {
@@ -142,7 +143,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
-                                   uint32_t nb_table)
+                                   uint32_t nb_table,
+                                   bool vfio_accel)
 {
     sPAPRTCETable *tcet;
 
@@ -161,6 +163,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
     tcet->bus_offset = bus_offset;
     tcet->page_shift = page_shift;
     tcet->nb_table = nb_table;
+    tcet->vfio_accel = vfio_accel;
 
     object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
 
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 988f8cb858..9ed39a93b7 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -220,36 +220,12 @@ static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 }
 
 /*
- * Find an entry with config_addr or returns the empty one if not found AND
- * alloc_new is set.
- * At the moment the msi_table entries are never released so there is
- * no point to look till the end of the list if we need to find the free entry.
- */
-static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
-                             bool alloc_new)
-{
-    int i;
-
-    for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
-        if (!phb->msi_table[i].nvec) {
-            break;
-        }
-        if (phb->msi_table[i].config_addr == config_addr) {
-            return i;
-        }
-    }
-    if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
-        trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
-        return i;
-    }
-
-    return -1;
-}
-
-/*
  * Set MSI/MSIX message data.
  * This is required for msi_notify()/msix_notify() which
  * will write at the addresses via spapr_msi_write().
+ *
+ * If hwaddr == 0, all entries will have .data == first_irq i.e.
+ * table will be reset.
  */
 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
                              unsigned first_irq, unsigned req_num)
@@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
         return;
     }
 
-    for (i = 0; i < req_num; ++i, ++msg.data) {
+    for (i = 0; i < req_num; ++i) {
         msix_set_message(pdev, i, msg);
         trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+        if (addr) {
+            ++msg.data;
+        }
     }
 }
 
@@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
     unsigned int seq_num = rtas_ld(args, 5);
     unsigned int ret_intr_type;
-    int ndev, irq, max_irqs = 0;
+    unsigned int irq, max_irqs = 0, num = 0;
     sPAPRPHBState *phb = NULL;
     PCIDevice *pdev = NULL;
+    bool msix = false;
+    spapr_pci_msi *msi;
+    int *config_addr_key;
 
     switch (func) {
     case RTAS_CHANGE_MSI_FN:
@@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
     /* Releasing MSIs */
     if (!req_num) {
-        ndev = spapr_msicfg_find(phb, config_addr, false);
-        if (ndev < 0) {
-            trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+        msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
+        if (!msi) {
+            trace_spapr_pci_msi("Releasing wrong config", config_addr);
             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
             return;
         }
-        trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
+
+        xics_free(spapr->icp, msi->first_irq, msi->num);
+        spapr_msi_setmsg(pdev, 0, msix, 0, num);
+        g_hash_table_remove(phb->msi, &config_addr);
+
+        trace_spapr_pci_msi("Released MSIs", config_addr);
         rtas_st(rets, 0, RTAS_OUT_SUCCESS);
         rtas_st(rets, 1, 0);
         return;
@@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
     /* Enabling MSI */
 
-    /* Find a device number in the map to add or reuse the existing one */
-    ndev = spapr_msicfg_find(phb, config_addr, true);
-    if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
-        error_report("No free entry for a new MSI device");
-        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
-        return;
-    }
-    trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
-
     /* Check if the device supports as many IRQs as requested */
     if (ret_intr_type == RTAS_TYPE_MSI) {
         max_irqs = msi_nr_vectors_allocated(pdev);
@@ -340,48 +318,47 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
         max_irqs = pdev->msix_entries_nr;
     }
     if (!max_irqs) {
-        error_report("Requested interrupt type %d is not enabled for device#%d",
-                     ret_intr_type, ndev);
+        error_report("Requested interrupt type %d is not enabled for device %x",
+                     ret_intr_type, config_addr);
         rtas_st(rets, 0, -1); /* Hardware error */
         return;
     }
     /* Correct the number if the guest asked for too many */
     if (req_num > max_irqs) {
+        trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
         req_num = max_irqs;
+        irq = 0; /* to avoid misleading trace */
+        goto out;
     }
 
-    /* Check if there is an old config and MSI number has not changed */
-    if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
-        /* Unexpected behaviour */
-        error_report("Cannot reuse MSI config for device#%d", ndev);
+    /* Allocate MSIs */
+    irq = xics_alloc_block(spapr->icp, 0, req_num, false,
+                           ret_intr_type == RTAS_TYPE_MSI);
+    if (!irq) {
+        error_report("Cannot allocate MSIs for device %x", config_addr);
         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
         return;
     }
 
-    /* There is no cached config, allocate MSIs */
-    if (!phb->msi_table[ndev].nvec) {
-        irq = spapr_allocate_irq_block(req_num, false,
-                                       ret_intr_type == RTAS_TYPE_MSI);
-        if (irq < 0) {
-            error_report("Cannot allocate MSIs for device#%d", ndev);
-            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
-            return;
-        }
-        phb->msi_table[ndev].irq = irq;
-        phb->msi_table[ndev].nvec = req_num;
-        phb->msi_table[ndev].config_addr = config_addr;
-    }
-
     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
     spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
-                     phb->msi_table[ndev].irq, req_num);
+                     irq, req_num);
 
+    /* Add MSI device to cache */
+    msi = g_new(spapr_pci_msi, 1);
+    msi->first_irq = irq;
+    msi->num = req_num;
+    config_addr_key = g_new(int, 1);
+    *config_addr_key = config_addr;
+    g_hash_table_insert(phb->msi, config_addr_key, msi);
+
+out:
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
     rtas_st(rets, 1, req_num);
     rtas_st(rets, 2, ++seq_num);
     rtas_st(rets, 3, ret_intr_type);
 
-    trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
+    trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
 }
 
 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
@@ -395,25 +372,28 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
     uint32_t config_addr = rtas_ld(args, 0);
     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
     unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
-    int ndev;
     sPAPRPHBState *phb = NULL;
+    PCIDevice *pdev = NULL;
+    spapr_pci_msi *msi;
 
-    /* Fins sPAPRPHBState */
+    /* Find sPAPRPHBState */
     phb = find_phb(spapr, buid);
-    if (!phb) {
+    if (phb) {
+        pdev = find_dev(spapr, buid, config_addr);
+    }
+    if (!phb || !pdev) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
 
     /* Find device descriptor and start IRQ */
-    ndev = spapr_msicfg_find(phb, config_addr, false);
-    if (ndev < 0) {
-        trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+    msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
+    if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
+        trace_spapr_pci_msi("Failed to return vector", config_addr);
         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
         return;
     }
-
-    intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
+    intr_src_num = msi->first_irq + ioa_intr_num;
     trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
                                                            intr_src_num);
 
@@ -634,7 +614,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < PCI_NUM_PINS; i++) {
         uint32_t irq;
 
-        irq = spapr_allocate_lsi(0);
+        irq = xics_alloc_block(spapr->icp, 0, 1, true, false);
         if (!irq) {
             error_setg(errp, "spapr_allocate_lsi failed");
             return;
@@ -649,6 +629,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     }
 
     info->finish_realize(sphb, errp);
+
+    sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
 }
 
 static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
@@ -658,7 +640,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
     tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
                                0,
                                SPAPR_TCE_PAGE_SHIFT,
-                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
+                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
     if (!tcet) {
         error_setg(errp, "Unable to create TCE table for %s",
                    sphb->dtbusname);
@@ -712,22 +694,69 @@ static const VMStateDescription vmstate_spapr_pci_lsi = {
 };
 
 static const VMStateDescription vmstate_spapr_pci_msi = {
-    .name = "spapr_pci/lsi",
+    .name = "spapr_pci/msi",
     .version_id = 1,
     .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
-        VMSTATE_UINT32(irq, struct spapr_pci_msi),
-        VMSTATE_UINT32(nvec, struct spapr_pci_msi),
-
+    .fields = (VMStateField []) {
+        VMSTATE_UINT32(key, spapr_pci_msi_mig),
+        VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig),
+        VMSTATE_UINT32(value.num, spapr_pci_msi_mig),
         VMSTATE_END_OF_LIST()
     },
 };
 
+static void spapr_pci_pre_save(void *opaque)
+{
+    sPAPRPHBState *sphb = opaque;
+    GHashTableIter iter;
+    gpointer key, value;
+    int i;
+
+    if (sphb->msi_devs) {
+        g_free(sphb->msi_devs);
+        sphb->msi_devs = NULL;
+    }
+    sphb->msi_devs_num = g_hash_table_size(sphb->msi);
+    if (!sphb->msi_devs_num) {
+        return;
+    }
+    sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig));
+
+    g_hash_table_iter_init(&iter, sphb->msi);
+    for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
+        sphb->msi_devs[i].key = *(uint32_t *) key;
+        sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
+    }
+}
+
+static int spapr_pci_post_load(void *opaque, int version_id)
+{
+    sPAPRPHBState *sphb = opaque;
+    gpointer key, value;
+    int i;
+
+    for (i = 0; i < sphb->msi_devs_num; ++i) {
+        key = g_memdup(&sphb->msi_devs[i].key,
+                       sizeof(sphb->msi_devs[i].key));
+        value = g_memdup(&sphb->msi_devs[i].value,
+                         sizeof(sphb->msi_devs[i].value));
+        g_hash_table_insert(sphb->msi, key, value);
+    }
+    if (sphb->msi_devs) {
+        g_free(sphb->msi_devs);
+        sphb->msi_devs = NULL;
+    }
+    sphb->msi_devs_num = 0;
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_spapr_pci = {
     .name = "spapr_pci",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .pre_save = spapr_pci_pre_save,
+    .post_load = spapr_pci_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
         VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
@@ -737,9 +766,9 @@ static const VMStateDescription vmstate_spapr_pci = {
         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
-        VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
-                             vmstate_spapr_pci_msi, struct spapr_pci_msi),
-
+        VMSTATE_INT32(msi_devs_num, sPAPRPHBState),
+        VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0,
+                                    vmstate_spapr_pci_msi, spapr_pci_msi_mig),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -909,14 +938,20 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
 
 void spapr_pci_rtas_init(void)
 {
-    spapr_rtas_register("read-pci-config", rtas_read_pci_config);
-    spapr_rtas_register("write-pci-config", rtas_write_pci_config);
-    spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
-    spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+    spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
+                        rtas_read_pci_config);
+    spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
+                        rtas_write_pci_config);
+    spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
+                        rtas_ibm_read_pci_config);
+    spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
+                        rtas_ibm_write_pci_config);
     if (msi_supported) {
-        spapr_rtas_register("ibm,query-interrupt-source-number",
+        spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
+                            "ibm,query-interrupt-source-number",
                             rtas_ibm_query_interrupt_source_number);
-        spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
+        spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
+                            rtas_ibm_change_msi);
     }
 }
 
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
new file mode 100644
index 0000000000..d3bddf2887
--- /dev/null
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -0,0 +1,102 @@
+/*
+ * QEMU sPAPR PCI host for VFIO
+ *
+ * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License,
+ *  or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "linux/vfio.h"
+#include "hw/misc/vfio.h"
+
+static Property spapr_phb_vfio_properties[] = {
+    DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
+{
+    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
+    struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
+    int ret;
+    sPAPRTCETable *tcet;
+    uint32_t liobn = svphb->phb.dma_liobn;
+
+    if (svphb->iommugroupid == -1) {
+        error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
+        return;
+    }
+
+    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+                               VFIO_CHECK_EXTENSION,
+                               (void *) VFIO_SPAPR_TCE_IOMMU);
+    if (ret != 1) {
+        error_setg_errno(errp, -ret,
+                         "spapr-vfio: SPAPR extension is not supported");
+        return;
+    }
+
+    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+                               VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "spapr-vfio: get info from container failed");
+        return;
+    }
+
+    tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
+                               SPAPR_TCE_PAGE_SHIFT,
+                               info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT,
+                               true);
+    if (!tcet) {
+        error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
+        return;
+    }
+
+    /* Register default 32bit DMA window */
+    memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
+                                spapr_tce_get_iommu(tcet));
+}
+
+static void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+    /* Do nothing */
+}
+
+static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
+
+    dc->props = spapr_phb_vfio_properties;
+    dc->reset = spapr_phb_vfio_reset;
+    spc->finish_realize = spapr_phb_vfio_finish_realize;
+}
+
+static const TypeInfo spapr_phb_vfio_info = {
+    .name          = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
+    .parent        = TYPE_SPAPR_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(sPAPRPHBVFIOState),
+    .class_init    = spapr_phb_vfio_class_init,
+    .class_size    = sizeof(sPAPRPHBClass),
+};
+
+static void spapr_pci_vfio_register_types(void)
+{
+    type_register_static(&spapr_phb_vfio_info);
+}
+
+type_init(spapr_pci_vfio_register_types)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 8d08539baa..9ba1ba69f9 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -36,9 +36,6 @@
 
 #include <libfdt.h>
 
-#define TOKEN_BASE      0x2000
-#define TOKEN_MAX       0x100
-
 static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                                    uint32_t token, uint32_t nargs,
                                    target_ulong args,
@@ -225,8 +222,6 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     env->msr = 0;
 }
 
-#define DIAGNOSTICS_RUN_MODE        42
-
 static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
                                           sPAPREnvironment *spapr,
                                           uint32_t token, uint32_t nargs,
@@ -236,16 +231,28 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
     target_ulong parameter = rtas_ld(args, 0);
     target_ulong buffer = rtas_ld(args, 1);
     target_ulong length = rtas_ld(args, 2);
-    target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
+    target_ulong ret = RTAS_OUT_SUCCESS;
 
     switch (parameter) {
-    case DIAGNOSTICS_RUN_MODE:
-        if (length == 1) {
-            rtas_st(buffer, 0, 0);
-            ret = RTAS_OUT_SUCCESS;
-        }
+    case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: {
+        char *param_val = g_strdup_printf("MaxEntCap=%d,MaxPlatProcs=%d",
+                                          max_cpus, smp_cpus);
+        rtas_st_buffer(buffer, length, (uint8_t *)param_val, strlen(param_val));
+        g_free(param_val);
         break;
     }
+    case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: {
+        uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED;
+
+        rtas_st_buffer(buffer, length, &param_val, sizeof(param_val));
+        break;
+    }
+    case RTAS_SYSPARM_UUID:
+        rtas_st_buffer(buffer, length, qemu_uuid, (qemu_uuid_set ? 16 : 0));
+        break;
+    default:
+        ret = RTAS_OUT_NOT_SUPPORTED;
+    }
 
     rtas_st(rets, 0, ret);
 }
@@ -260,7 +267,9 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
     target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
 
     switch (parameter) {
-    case DIAGNOSTICS_RUN_MODE:
+    case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS:
+    case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE:
+    case RTAS_SYSPARM_UUID:
         ret = RTAS_OUT_NOT_AUTHORIZED;
         break;
     }
@@ -271,17 +280,14 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
-} rtas_table[TOKEN_MAX];
-
-static struct rtas_call *rtas_next = rtas_table;
+} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE];
 
 target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets)
 {
-    if ((token >= TOKEN_BASE)
-        && ((token - TOKEN_BASE) < TOKEN_MAX)) {
-        struct rtas_call *call = rtas_table + (token - TOKEN_BASE);
+    if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) {
+        struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE);
 
         if (call->fn) {
             call->fn(cpu, spapr, token, nargs, args, nret, rets);
@@ -303,23 +309,22 @@ target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return H_PARAMETER;
 }
 
-int spapr_rtas_register(const char *name, spapr_rtas_fn fn)
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
 {
-    int i;
-
-    for (i = 0; i < (rtas_next - rtas_table); i++) {
-        if (strcmp(name, rtas_table[i].name) == 0) {
-            fprintf(stderr, "RTAS call \"%s\" registered twice\n", name);
-            exit(1);
-        }
+    if (!((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX))) {
+        fprintf(stderr, "RTAS invalid token 0x%x\n", token);
+        exit(1);
     }
 
-    assert(rtas_next < (rtas_table + TOKEN_MAX));
-
-    rtas_next->name = name;
-    rtas_next->fn = fn;
+    token -= RTAS_TOKEN_BASE;
+    if (rtas_table[token].name) {
+        fprintf(stderr, "RTAS call \"%s\" is registered already as 0x%x\n",
+                rtas_table[token].name, token);
+        exit(1);
+    }
 
-    return (rtas_next++ - rtas_table) + TOKEN_BASE;
+    rtas_table[token].name = name;
+    rtas_table[token].fn = fn;
 }
 
 int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
@@ -359,7 +364,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
         return ret;
     }
 
-    for (i = 0; i < TOKEN_MAX; i++) {
+    for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
         struct rtas_call *call = &rtas_table[i];
 
         if (!call->name) {
@@ -367,7 +372,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
         }
 
         ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name,
-                                    i + TOKEN_BASE);
+                                    i + RTAS_TOKEN_BASE);
         if (ret < 0) {
             fprintf(stderr, "Couldn't add rtas token for %s: %s\n",
                     call->name, fdt_strerror(ret));
@@ -380,18 +385,24 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
 
 static void core_rtas_register_types(void)
 {
-    spapr_rtas_register("display-character", rtas_display_character);
-    spapr_rtas_register("get-time-of-day", rtas_get_time_of_day);
-    spapr_rtas_register("set-time-of-day", rtas_set_time_of_day);
-    spapr_rtas_register("power-off", rtas_power_off);
-    spapr_rtas_register("system-reboot", rtas_system_reboot);
-    spapr_rtas_register("query-cpu-stopped-state",
+    spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
+                        rtas_display_character);
+    spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day",
+                        rtas_get_time_of_day);
+    spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day",
+                        rtas_set_time_of_day);
+    spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off);
+    spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot",
+                        rtas_system_reboot);
+    spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state",
                         rtas_query_cpu_stopped_state);
-    spapr_rtas_register("start-cpu", rtas_start_cpu);
-    spapr_rtas_register("stop-self", rtas_stop_self);
-    spapr_rtas_register("ibm,get-system-parameter",
+    spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
+    spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
+    spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
+                        "ibm,get-system-parameter",
                         rtas_ibm_get_system_parameter);
-    spapr_rtas_register("ibm,set-system-parameter",
+    spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
+                        "ibm,set-system-parameter",
                         rtas_ibm_set_system_parameter);
 }
 
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 04e16ae04d..dc9e46a7b1 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -449,7 +449,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
         dev->qdev.id = id;
     }
 
-    dev->irq = spapr_allocate_msi(dev->irq);
+    dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false);
     if (!dev->irq) {
         return -1;
     }
@@ -460,7 +460,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
                                         0,
                                         SPAPR_TCE_PAGE_SHIFT,
                                         pc->rtce_window_size >>
-                                        SPAPR_TCE_PAGE_SHIFT);
+                                        SPAPR_TCE_PAGE_SHIFT, false);
         address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
     }
 
@@ -517,8 +517,9 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
     spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq);
 
     /* RTAS calls */
-    spapr_rtas_register("ibm,set-tce-bypass", rtas_set_tce_bypass);
-    spapr_rtas_register("quiesce", rtas_quiesce);
+    spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass",
+                        rtas_set_tce_bypass);
+    spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce);
 
     return bus;
 }
diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c
index 4aebd34924..9f607d42bb 100644
--- a/hw/watchdog/watchdog.c
+++ b/hw/watchdog/watchdog.c
@@ -106,7 +106,7 @@ int select_watchdog_action(const char *p)
  */
 void watchdog_perform_action(void)
 {
-    switch(watchdog_action) {
+    switch (watchdog_action) {
     case WDT_RESET:             /* same as 'system_reset' in monitor */
         qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_RESET, &error_abort);
         qemu_system_reset_request();
diff --git a/include/block/block.h b/include/block/block.h
index d0baf4fb83..7e92f549fb 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -175,6 +175,7 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_MIRROR,
     BLOCK_OP_TYPE_RESIZE,
     BLOCK_OP_TYPE_STREAM,
+    BLOCK_OP_TYPE_REPLACE,
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
@@ -213,7 +214,7 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
                     bool allow_none, Error **errp);
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
-void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
+int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
 int bdrv_open(BlockDriverState **pbs, const char *filename,
               const char *reference, QDict *options, int flags,
               BlockDriver *drv, Error **errp);
@@ -314,6 +315,9 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
                                       BlockDriverState *candidate);
 bool bdrv_is_first_non_filter(BlockDriverState *candidate);
 
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp);
+
 /* async block I/O */
 typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
                                      int sector_num);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 715c761fad..53e77cf11e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -100,6 +100,9 @@ struct BlockDriver {
      */
     bool bdrv_needs_filename;
 
+    /* Set if a driver can support backing files */
+    bool supports_backing;
+
     /* For handling image reopen for split or non-split files */
     int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
                                BlockReopenQueue *queue, Error **errp);
@@ -486,6 +489,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  * mirror_start:
  * @bs: Block device to operate on.
  * @target: Block device to write to.
+ * @replaces: Block graph node name to replace once the mirror is done. Can
+ *            only be used when full mirroring is selected.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @granularity: The chosen granularity for the dirty bitmap.
  * @buf_size: The amount of data that can be in flight at one time.
@@ -502,6 +507,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  * @bs will be switched to read from @target.
  */
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  const char *replaces,
                   int64_t speed, int64_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index e443987ea8..f3cf63f9f5 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -147,6 +147,14 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
 void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);
 
 /**
+ * block_job_yield:
+ * @job: The job that calls the function.
+ *
+ * Yield the block job coroutine.
+ */
+void block_job_yield(BlockJob *job);
+
+/**
  * block_job_completed:
  * @job: The job being completed.
  * @ret: The status code.
@@ -217,7 +225,7 @@ void block_job_pause(BlockJob *job);
 void block_job_resume(BlockJob *job);
 
 /**
- * block_job_event_cancle:
+ * block_job_event_cancelled:
  * @job: The job whose information is requested.
  *
  * Send a BLOCK_JOB_CANCELLED event for the specified job.
diff --git a/include/block/qapi.h b/include/block/qapi.h
index e92c00daf6..03745464d6 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -39,7 +39,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
 void bdrv_query_info(BlockDriverState *bs,
                      BlockInfo **p_info,
                      Error **errp);
-BlockStats *bdrv_query_stats(const BlockDriverState *bs);
 
 void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
                         QEMUSnapshotInfo *sn);
diff --git a/include/hw/misc/vfio.h b/include/hw/misc/vfio.h
new file mode 100644
index 0000000000..0b26cd8e11
--- /dev/null
+++ b/include/hw/misc/vfio.h
@@ -0,0 +1,9 @@
+#ifndef VFIO_API_H
+#define VFIO_API_H
+
+#include "qemu/typedefs.h"
+
+extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
+                                int req, void *param);
+
+#endif
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 0934518bbd..32f0aa7d10 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -27,13 +27,15 @@
 #include "hw/pci/pci_host.h"
 #include "hw/ppc/xics.h"
 
-#define SPAPR_MSIX_MAX_DEVS 32
-
 #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge"
+#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
 
 #define SPAPR_PCI_HOST_BRIDGE(obj) \
     OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
 
+#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
+    OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
+
 #define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \
      OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE)
 #define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \
@@ -41,6 +43,7 @@
 
 typedef struct sPAPRPHBClass sPAPRPHBClass;
 typedef struct sPAPRPHBState sPAPRPHBState;
+typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
 
 struct sPAPRPHBClass {
     PCIHostBridgeClass parent_class;
@@ -48,6 +51,16 @@ struct sPAPRPHBClass {
     void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
 };
 
+typedef struct spapr_pci_msi {
+    uint32_t first_irq;
+    uint32_t num;
+} spapr_pci_msi;
+
+typedef struct spapr_pci_msi_mig {
+    uint32_t key;
+    spapr_pci_msi value;
+} spapr_pci_msi_mig;
+
 struct sPAPRPHBState {
     PCIHostState parent_obj;
 
@@ -67,15 +80,20 @@ struct sPAPRPHBState {
         uint32_t irq;
     } lsi_table[PCI_NUM_PINS];
 
-    struct spapr_pci_msi {
-        uint32_t config_addr;
-        uint32_t irq;
-        uint32_t nvec;
-    } msi_table[SPAPR_MSIX_MAX_DEVS];
+    GHashTable *msi;
+    /* Temporary cache for migration purposes */
+    int32_t msi_devs_num;
+    spapr_pci_msi_mig *msi_devs;
 
     QLIST_ENTRY(sPAPRPHBState) list;
 };
 
+struct sPAPRPHBVFIOState {
+    sPAPRPHBState phb;
+
+    int32_t iommugroupid;
+};
+
 #define SPAPR_PCI_BASE_BUID          0x800000020000000ULL
 
 #define SPAPR_PCI_WINDOW_BASE        0x10000000000ULL
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 08c301f38d..bbba51a703 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -27,7 +27,6 @@ typedef struct sPAPREnvironment {
     long rtas_size;
     void *fdt_skel;
     target_ulong entry_point;
-    uint32_t next_irq;
     uint64_t rtc_offset;
     struct PPCTimebase tb;
     bool has_graphics;
@@ -339,16 +338,6 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
 int spapr_allocate_irq(int hint, bool lsi);
 int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 
-static inline int spapr_allocate_msi(int hint)
-{
-    return spapr_allocate_irq(hint, false);
-}
-
-static inline int spapr_allocate_lsi(int hint)
-{
-    return spapr_allocate_irq(hint, true);
-}
-
 /* RTAS return codes */
 #define RTAS_OUT_SUCCESS            0
 #define RTAS_OUT_NO_ERRORS_FOUND    1
@@ -358,6 +347,58 @@ static inline int spapr_allocate_lsi(int hint)
 #define RTAS_OUT_NOT_SUPPORTED      -3
 #define RTAS_OUT_NOT_AUTHORIZED     -9002
 
+/* RTAS tokens */
+#define RTAS_TOKEN_BASE      0x2000
+
+#define RTAS_DISPLAY_CHARACTER                  (RTAS_TOKEN_BASE + 0x00)
+#define RTAS_GET_TIME_OF_DAY                    (RTAS_TOKEN_BASE + 0x01)
+#define RTAS_SET_TIME_OF_DAY                    (RTAS_TOKEN_BASE + 0x02)
+#define RTAS_POWER_OFF                          (RTAS_TOKEN_BASE + 0x03)
+#define RTAS_SYSTEM_REBOOT                      (RTAS_TOKEN_BASE + 0x04)
+#define RTAS_QUERY_CPU_STOPPED_STATE            (RTAS_TOKEN_BASE + 0x05)
+#define RTAS_START_CPU                          (RTAS_TOKEN_BASE + 0x06)
+#define RTAS_STOP_SELF                          (RTAS_TOKEN_BASE + 0x07)
+#define RTAS_IBM_GET_SYSTEM_PARAMETER           (RTAS_TOKEN_BASE + 0x08)
+#define RTAS_IBM_SET_SYSTEM_PARAMETER           (RTAS_TOKEN_BASE + 0x09)
+#define RTAS_IBM_SET_XIVE                       (RTAS_TOKEN_BASE + 0x0A)
+#define RTAS_IBM_GET_XIVE                       (RTAS_TOKEN_BASE + 0x0B)
+#define RTAS_IBM_INT_OFF                        (RTAS_TOKEN_BASE + 0x0C)
+#define RTAS_IBM_INT_ON                         (RTAS_TOKEN_BASE + 0x0D)
+#define RTAS_CHECK_EXCEPTION                    (RTAS_TOKEN_BASE + 0x0E)
+#define RTAS_EVENT_SCAN                         (RTAS_TOKEN_BASE + 0x0F)
+#define RTAS_IBM_SET_TCE_BYPASS                 (RTAS_TOKEN_BASE + 0x10)
+#define RTAS_QUIESCE                            (RTAS_TOKEN_BASE + 0x11)
+#define RTAS_NVRAM_FETCH                        (RTAS_TOKEN_BASE + 0x12)
+#define RTAS_NVRAM_STORE                        (RTAS_TOKEN_BASE + 0x13)
+#define RTAS_READ_PCI_CONFIG                    (RTAS_TOKEN_BASE + 0x14)
+#define RTAS_WRITE_PCI_CONFIG                   (RTAS_TOKEN_BASE + 0x15)
+#define RTAS_IBM_READ_PCI_CONFIG                (RTAS_TOKEN_BASE + 0x16)
+#define RTAS_IBM_WRITE_PCI_CONFIG               (RTAS_TOKEN_BASE + 0x17)
+#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER  (RTAS_TOKEN_BASE + 0x18)
+#define RTAS_IBM_CHANGE_MSI                     (RTAS_TOKEN_BASE + 0x19)
+#define RTAS_SET_INDICATOR                      (RTAS_TOKEN_BASE + 0x1A)
+#define RTAS_SET_POWER_LEVEL                    (RTAS_TOKEN_BASE + 0x1B)
+#define RTAS_GET_POWER_LEVEL                    (RTAS_TOKEN_BASE + 0x1C)
+#define RTAS_GET_SENSOR_STATE                   (RTAS_TOKEN_BASE + 0x1D)
+#define RTAS_IBM_CONFIGURE_CONNECTOR            (RTAS_TOKEN_BASE + 0x1E)
+#define RTAS_IBM_OS_TERM                        (RTAS_TOKEN_BASE + 0x1F)
+#define RTAS_IBM_EXTENDED_OS_TERM               (RTAS_TOKEN_BASE + 0x20)
+
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x21)
+
+/* RTAS ibm,get-system-parameter token values */
+#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
+#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE        42
+#define RTAS_SYSPARM_UUID                        48
+
+/* Possible values for the platform-processor-diagnostics-run-mode parameter
+ * of the RTAS ibm,get-system-parameter call.
+ */
+#define DIAGNOSTICS_RUN_MODE_DISABLED  0
+#define DIAGNOSTICS_RUN_MODE_STAGGERED 1
+#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2
+#define DIAGNOSTICS_RUN_MODE_PERIODIC  3
+
 static inline uint64_t ppc64_phys_to_real(uint64_t addr)
 {
     return addr & ~0xF000000000000000ULL;
@@ -373,11 +414,24 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val)
     stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val);
 }
 
+
+static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len,
+                                  uint8_t *buffer, uint16_t buffer_len)
+{
+    if (phys_len < 2) {
+        return;
+    }
+    stw_be_phys(&address_space_memory,
+                ppc64_phys_to_real(phys), buffer_len);
+    cpu_physical_memory_write(ppc64_phys_to_real(phys + 2),
+                              buffer, MIN(buffer_len, phys_len - 2));
+}
+
 typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                               uint32_t token,
                               uint32_t nargs, target_ulong args,
                               uint32_t nret, target_ulong rets);
-int spapr_rtas_register(const char *name, spapr_rtas_fn fn);
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
 target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets);
@@ -407,6 +461,7 @@ struct sPAPRTCETable {
     uint32_t page_shift;
     uint64_t *table;
     bool bypass;
+    bool vfio_accel;
     int fd;
     MemoryRegion iommu;
     QLIST_ENTRY(sPAPRTCETable) list;
@@ -418,7 +473,8 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
-                                   uint32_t nb_table);
+                                   uint32_t nb_table,
+                                   bool vfio_accel);
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 85e4c8a3dd..a214dd7f28 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -136,7 +136,6 @@ struct ICSState {
     uint32_t nr_irqs;
     uint32_t offset;
     qemu_irq *qirqs;
-    bool *islsi;
     ICSIRQState *irqs;
     XICSState *icp;
 };
@@ -150,12 +149,20 @@ struct ICSIRQState {
 #define XICS_STATUS_REJECTED           0x4
 #define XICS_STATUS_MASKED_PENDING     0x8
     uint8_t status;
+/* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */
+#define XICS_FLAGS_IRQ_LSI             0x1
+#define XICS_FLAGS_IRQ_MSI             0x2
+#define XICS_FLAGS_IRQ_MASK            0x3
+    uint8_t flags;
 };
 
 #define XICS_IRQS               1024
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq);
 void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
+void xics_free(XICSState *icp, int irq, int num);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
 
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 4bc9b549ad..d0fb26f963 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -17,6 +17,7 @@
 #include "hw/virtio/virtio.h"
 #include "hw/block/block.h"
 #include "sysemu/iothread.h"
+#include "block/block.h"
 
 #define TYPE_VIRTIO_BLK "virtio-blk-device"
 #define VIRTIO_BLK(obj) \
@@ -116,6 +117,7 @@ struct VirtIOBlkConf
 
 struct VirtIOBlockDataPlane;
 
+struct VirtIOBlockReq;
 typedef struct VirtIOBlock {
     VirtIODevice parent_obj;
     BlockDriverState *bs;
@@ -127,12 +129,29 @@ typedef struct VirtIOBlock {
     unsigned short sector_mask;
     bool original_wce;
     VMChangeStateEntry *change;
+    /* Function to push to vq and notify guest */
+    void (*complete_request)(struct VirtIOBlockReq *req, unsigned char status);
 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     Notifier migration_state_notifier;
     struct VirtIOBlockDataPlane *dataplane;
 #endif
 } VirtIOBlock;
 
+typedef struct MultiReqBuffer {
+    BlockRequest        blkreq[32];
+    unsigned int        num_writes;
+} MultiReqBuffer;
+
+typedef struct VirtIOBlockReq {
+    VirtIOBlock *dev;
+    VirtQueueElement *elem;
+    struct virtio_blk_inhdr *in;
+    struct virtio_blk_outhdr out;
+    QEMUIOVector qiov;
+    struct VirtIOBlockReq *next;
+    BlockAcctCookie acct;
+} VirtIOBlockReq;
+
 #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
         DEFINE_VIRTIO_COMMON_FEATURES(_state, _field)
 
@@ -158,4 +177,8 @@ void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk);
 int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
                                VirtQueueElement *elem);
 
+void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb);
+
+void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb);
+
 #endif
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 71a8a95641..9a001bd28f 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -101,6 +101,7 @@ enum VMStateFlags {
     VMS_VARRAY_UINT8     = 0x400,  /* Array with size in uint8_t field*/
     VMS_VARRAY_UINT32    = 0x800,  /* Array with size in uint32_t field*/
     VMS_MUST_EXIST       = 0x1000, /* Field must exist in input */
+    VMS_ALLOC            = 0x2000, /* Alloc a buffer on the destination */
 };
 
 typedef struct {
@@ -429,6 +430,16 @@ extern const VMStateInfo vmstate_info_bitmap;
     .offset     = offsetof(_state, _field),                          \
 }
 
+#define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) {\
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .vmsd       = &(_vmsd),                                          \
+    .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_STRUCT|VMS_VARRAY_INT32|VMS_ALLOC|VMS_POINTER, \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),     \
+}
+
 #define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \
     .name         = (stringify(_field)),                             \
     .version_id   = (_version),                                      \
diff --git a/include/net/net.h b/include/net/net.h
index 8b189da5ee..ed594f9bdb 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -24,13 +24,13 @@ struct MACAddr {
 
 typedef struct NICPeers {
     NetClientState *ncs[MAX_QUEUE_NUM];
+    int32_t queues;
 } NICPeers;
 
 typedef struct NICConf {
     MACAddr macaddr;
     NICPeers peers;
     int32_t bootindex;
-    int32_t queues;
 } NICConf;
 
 #define DEFINE_NIC_PROPERTIES(_state, _conf)                            \
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 1248eda272..60777fecf6 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -736,6 +736,14 @@ enum {
 
     QEMU_PPC_FEATURE_TRUE_LE = 0x00000002,
     QEMU_PPC_FEATURE_PPC_LE = 0x00000001,
+
+    /* Feature definitions in AT_HWCAP2.  */
+    QEMU_PPC_FEATURE2_ARCH_2_07 = 0x80000000, /* ISA 2.07 */
+    QEMU_PPC_FEATURE2_HAS_HTM = 0x40000000, /* Hardware Transactional Memory */
+    QEMU_PPC_FEATURE2_HAS_DSCR = 0x20000000, /* Data Stream Control Register */
+    QEMU_PPC_FEATURE2_HAS_EBB = 0x10000000, /* Event Base Branching */
+    QEMU_PPC_FEATURE2_HAS_ISEL = 0x08000000, /* Integer Select */
+    QEMU_PPC_FEATURE2_HAS_TAR = 0x04000000, /* Target Address Register */
 };
 
 #define ELF_HWCAP get_elf_hwcap()
@@ -749,6 +757,8 @@ static uint32_t get_elf_hwcap(void)
        Altivec/FP/SPE support.  Anything else is just a bonus.  */
 #define GET_FEATURE(flag, feature)                                      \
     do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flag, feature)                                      \
+    do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
     GET_FEATURE(PPC_64B, QEMU_PPC_FEATURE_64);
     GET_FEATURE(PPC_FLOAT, QEMU_PPC_FEATURE_HAS_FPU);
     GET_FEATURE(PPC_ALTIVEC, QEMU_PPC_FEATURE_HAS_ALTIVEC);
@@ -757,7 +767,36 @@ static uint32_t get_elf_hwcap(void)
     GET_FEATURE(PPC_SPE_DOUBLE, QEMU_PPC_FEATURE_HAS_EFP_DOUBLE);
     GET_FEATURE(PPC_BOOKE, QEMU_PPC_FEATURE_BOOKE);
     GET_FEATURE(PPC_405_MAC, QEMU_PPC_FEATURE_HAS_4xxMAC);
+    GET_FEATURE2(PPC2_DFP, QEMU_PPC_FEATURE_HAS_DFP);
+    GET_FEATURE2(PPC2_VSX, QEMU_PPC_FEATURE_HAS_VSX);
+    GET_FEATURE2((PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 |
+                  PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206),
+                  QEMU_PPC_FEATURE_ARCH_2_06);
+#undef GET_FEATURE
+#undef GET_FEATURE2
+
+    return features;
+}
+
+#define ELF_HWCAP2 get_elf_hwcap2()
+
+static uint32_t get_elf_hwcap2(void)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(thread_cpu);
+    uint32_t features = 0;
+
+#define GET_FEATURE(flag, feature)                                      \
+    do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flag, feature)                                      \
+    do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
+
+    GET_FEATURE(PPC_ISEL, QEMU_PPC_FEATURE2_HAS_ISEL);
+    GET_FEATURE2(PPC2_BCTAR_ISA207, QEMU_PPC_FEATURE2_HAS_TAR);
+    GET_FEATURE2((PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+                  PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07);
+
 #undef GET_FEATURE
+#undef GET_FEATURE2
 
     return features;
 }
@@ -774,8 +813,9 @@ static uint32_t get_elf_hwcap(void)
 #define DLINFO_ARCH_ITEMS       5
 #define ARCH_DLINFO                                     \
     do {                                                \
-        NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20);              \
-        NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20);              \
+        PowerPCCPU *cpu = POWERPC_CPU(thread_cpu);              \
+        NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \
+        NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \
         NEW_AUX_ENT(AT_UCACHEBSIZE, 0);                 \
         /*                                              \
          * Now handle glibc compatibility.              \
diff --git a/monitor.c b/monitor.c
index a8ab600c88..5718d0b60a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -570,6 +570,7 @@ monitor_qapi_event_throttle(QAPIEvent event, int64_t rate)
 
     trace_monitor_protocol_event_throttle(event, rate);
     evstate->event = event;
+    assert(rate * SCALE_MS <= INT64_MAX);
     evstate->rate = rate * SCALE_MS;
     evstate->last = 0;
     evstate->data = NULL;
@@ -585,9 +586,9 @@ static void monitor_qapi_event_init(void)
     monitor_qapi_event_throttle(QAPI_EVENT_RTC_CHANGE, 1000);
     monitor_qapi_event_throttle(QAPI_EVENT_WATCHDOG, 1000);
     monitor_qapi_event_throttle(QAPI_EVENT_BALLOON_CHANGE, 1000);
-    /* limit the rate of quorum events to avoid hammering the management */
     monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_REPORT_BAD, 1000);
     monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_FAILURE, 1000);
+    monitor_qapi_event_throttle(QAPI_EVENT_VSERPORT_CHANGE, 1000);
 
     qmp_event_set_func_emit(monitor_qapi_event_queue);
 }
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 301f6b6b51..a06ba59dad 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
 common-obj-y += socket.o
 common-obj-y += dump.o
 common-obj-y += eth.o
+common-obj-$(CONFIG_LINUX) += l2tpv3.o
 common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
 common-obj-$(CONFIG_LINUX) += tap-linux.o
 common-obj-$(CONFIG_WIN32) += tap-win32.o
diff --git a/net/clients.h b/net/clients.h
index 7f3d4ae9f3..2e8fedad8d 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -47,6 +47,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
 int net_init_bridge(const NetClientOptions *opts, const char *name,
                     NetClientState *peer);
 
+int net_init_l2tpv3(const NetClientOptions *opts, const char *name,
+                    NetClientState *peer);
 #ifdef CONFIG_VDE
 int net_init_vde(const NetClientOptions *opts, const char *name,
                  NetClientState *peer);
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
new file mode 100644
index 0000000000..528d95b641
--- /dev/null
+++ b/net/l2tpv3.c
@@ -0,0 +1,757 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2012-2014 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <netdb.h>
+#include "config-host.h"
+#include "net/net.h"
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+
+/* The buffer size needs to be investigated for optimum numbers and
+ * optimum means of paging in on different systems. This size is
+ * chosen to be sufficient to accommodate one packet with some headers
+ */
+
+#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
+#define BUFFER_SIZE 2048
+#define IOVSIZE 2
+#define MAX_L2TPV3_MSGCNT 64
+#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
+
+/* Header set to 0x30000 signifies a data packet */
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+typedef struct NetL2TPV3State {
+    NetClientState nc;
+    int fd;
+
+    /*
+     * these are used for xmit - that happens packet a time
+     * and for first sign of life packet (easier to parse that once)
+     */
+
+    uint8_t *header_buf;
+    struct iovec *vec;
+
+    /*
+     * these are used for receive - try to "eat" up to 32 packets at a time
+     */
+
+    struct mmsghdr *msgvec;
+
+    /*
+     * peer address
+     */
+
+    struct sockaddr_storage *dgram_dst;
+    uint32_t dst_size;
+
+    /*
+     * L2TPv3 parameters
+     */
+
+    uint64_t rx_cookie;
+    uint64_t tx_cookie;
+    uint32_t rx_session;
+    uint32_t tx_session;
+    uint32_t header_size;
+    uint32_t counter;
+
+    /*
+    * DOS avoidance in error handling
+    */
+
+    bool header_mismatch;
+
+    /*
+     * Ring buffer handling
+     */
+
+    int queue_head;
+    int queue_tail;
+    int queue_depth;
+
+    /*
+     * Precomputed offsets
+     */
+
+    uint32_t offset;
+    uint32_t cookie_offset;
+    uint32_t counter_offset;
+    uint32_t session_offset;
+
+    /* Poll Control */
+
+    bool read_poll;
+    bool write_poll;
+
+    /* Flags */
+
+    bool ipv6;
+    bool udp;
+    bool has_counter;
+    bool pin_counter;
+    bool cookie;
+    bool cookie_is_64;
+
+} NetL2TPV3State;
+
+static int l2tpv3_can_send(void *opaque);
+static void net_l2tpv3_send(void *opaque);
+static void l2tpv3_writable(void *opaque);
+
+static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
+{
+    qemu_set_fd_handler2(s->fd,
+                         s->read_poll ? l2tpv3_can_send : NULL,
+                         s->read_poll ? net_l2tpv3_send     : NULL,
+                         s->write_poll ? l2tpv3_writable : NULL,
+                         s);
+}
+
+static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
+{
+    if (s->read_poll != enable) {
+        s->read_poll = enable;
+        l2tpv3_update_fd_handler(s);
+    }
+}
+
+static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
+{
+    if (s->write_poll != enable) {
+        s->write_poll = enable;
+        l2tpv3_update_fd_handler(s);
+    }
+}
+
+static void l2tpv3_writable(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+    l2tpv3_write_poll(s, false);
+    qemu_flush_queued_packets(&s->nc);
+}
+
+static int l2tpv3_can_send(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+
+    return qemu_can_send_packet(&s->nc);
+}
+
+static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    l2tpv3_read_poll(s, true);
+}
+
+static void l2tpv3_poll(NetClientState *nc, bool enable)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    l2tpv3_write_poll(s, enable);
+    l2tpv3_read_poll(s, enable);
+}
+
+static void l2tpv3_form_header(NetL2TPV3State *s)
+{
+    uint32_t *counter;
+
+    if (s->udp) {
+        stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
+    }
+    stl_be_p(
+            (uint32_t *) (s->header_buf + s->session_offset),
+            s->tx_session
+        );
+    if (s->cookie) {
+        if (s->cookie_is_64) {
+            stq_be_p(
+                (uint64_t *)(s->header_buf + s->cookie_offset),
+                s->tx_cookie
+            );
+        } else {
+            stl_be_p(
+                (uint32_t *) (s->header_buf + s->cookie_offset),
+                s->tx_cookie
+            );
+        }
+    }
+    if (s->has_counter) {
+        counter = (uint32_t *)(s->header_buf + s->counter_offset);
+        if (s->pin_counter) {
+            *counter = 0;
+        } else {
+            stl_be_p(counter, ++s->counter);
+        }
+    }
+}
+
+static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
+                    const struct iovec *iov,
+                    int iovcnt)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    struct msghdr message;
+    int ret;
+
+    if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
+        error_report(
+            "iovec too long %d > %d, change l2tpv3.h",
+            iovcnt, MAX_L2TPV3_IOVCNT
+        );
+        return -1;
+    }
+    l2tpv3_form_header(s);
+    memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
+    s->vec->iov_base = s->header_buf;
+    s->vec->iov_len = s->offset;
+    message.msg_name = s->dgram_dst;
+    message.msg_namelen = s->dst_size;
+    message.msg_iov = s->vec;
+    message.msg_iovlen = iovcnt + 1;
+    message.msg_control = NULL;
+    message.msg_controllen = 0;
+    message.msg_flags = 0;
+    do {
+        ret = sendmsg(s->fd, &message, 0);
+    } while ((ret == -1) && (errno == EINTR));
+    if (ret > 0) {
+        ret -= s->offset;
+    } else if (ret == 0) {
+        /* belt and braces - should not occur on DGRAM
+        * we should get an error and never a 0 send
+        */
+        ret = iov_size(iov, iovcnt);
+    } else {
+        /* signal upper layer that socket buffer is full */
+        ret = -errno;
+        if (ret == -EAGAIN || ret == -ENOBUFS) {
+            l2tpv3_write_poll(s, true);
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
+                    const uint8_t *buf,
+                    size_t size)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    struct iovec *vec;
+    struct msghdr message;
+    ssize_t ret = 0;
+
+    l2tpv3_form_header(s);
+    vec = s->vec;
+    vec->iov_base = s->header_buf;
+    vec->iov_len = s->offset;
+    vec++;
+    vec->iov_base = (void *) buf;
+    vec->iov_len = size;
+    message.msg_name = s->dgram_dst;
+    message.msg_namelen = s->dst_size;
+    message.msg_iov = s->vec;
+    message.msg_iovlen = 2;
+    message.msg_control = NULL;
+    message.msg_controllen = 0;
+    message.msg_flags = 0;
+    do {
+        ret = sendmsg(s->fd, &message, 0);
+    } while ((ret == -1) && (errno == EINTR));
+    if (ret > 0) {
+        ret -= s->offset;
+    } else if (ret == 0) {
+        /* belt and braces - should not occur on DGRAM
+        * we should get an error and never a 0 send
+        */
+        ret = size;
+    } else {
+        ret = -errno;
+        if (ret == -EAGAIN || ret == -ENOBUFS) {
+            /* signal upper layer that socket buffer is full */
+            l2tpv3_write_poll(s, true);
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
+{
+
+    uint32_t *session;
+    uint64_t cookie;
+
+    if ((!s->udp) && (!s->ipv6)) {
+        buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
+    }
+
+    /* we do not do a strict check for "data" packets as per
+    * the RFC spec because the pure IP spec does not have
+    * that anyway.
+    */
+
+    if (s->cookie) {
+        if (s->cookie_is_64) {
+            cookie = ldq_be_p(buf + s->cookie_offset);
+        } else {
+            cookie = ldl_be_p(buf + s->cookie_offset);
+        }
+        if (cookie != s->rx_cookie) {
+            if (!s->header_mismatch) {
+                error_report("unknown cookie id");
+            }
+            return -1;
+        }
+    }
+    session = (uint32_t *) (buf + s->session_offset);
+    if (ldl_be_p(session) != s->rx_session) {
+        if (!s->header_mismatch) {
+            error_report("session mismatch");
+        }
+        return -1;
+    }
+    return 0;
+}
+
+static void net_l2tpv3_process_queue(NetL2TPV3State *s)
+{
+    int size = 0;
+    struct iovec *vec;
+    bool bad_read;
+    int data_size;
+    struct mmsghdr *msgvec;
+
+    /* go into ring mode only if there is a "pending" tail */
+    if (s->queue_depth > 0) {
+        do {
+            msgvec = s->msgvec + s->queue_tail;
+            if (msgvec->msg_len > 0) {
+                data_size = msgvec->msg_len - s->header_size;
+                vec = msgvec->msg_hdr.msg_iov;
+                if ((data_size > 0) &&
+                    (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
+                    vec++;
+                    /* Use the legacy delivery for now, we will
+                     * switch to using our own ring as a queueing mechanism
+                     * at a later date
+                     */
+                    size = qemu_send_packet_async(
+                            &s->nc,
+                            vec->iov_base,
+                            data_size,
+                            l2tpv3_send_completed
+                        );
+                    if (size == 0) {
+                        l2tpv3_read_poll(s, false);
+                    }
+                    bad_read = false;
+                } else {
+                    bad_read = true;
+                    if (!s->header_mismatch) {
+                        /* report error only once */
+                        error_report("l2tpv3 header verification failed");
+                        s->header_mismatch = true;
+                    }
+                }
+            } else {
+                bad_read = true;
+            }
+            s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
+            s->queue_depth--;
+        } while (
+                (s->queue_depth > 0) &&
+                 qemu_can_send_packet(&s->nc) &&
+                ((size > 0) || bad_read)
+            );
+    }
+}
+
+static void net_l2tpv3_send(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+    int target_count, count;
+    struct mmsghdr *msgvec;
+
+    /* go into ring mode only if there is a "pending" tail */
+
+    if (s->queue_depth) {
+
+        /* The ring buffer we use has variable intake
+         * count of how much we can read varies - adjust accordingly
+         */
+
+        target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
+
+        /* Ensure we do not overrun the ring when we have
+         * a lot of enqueued packets
+         */
+
+        if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
+            target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
+        }
+    } else {
+
+        /* we do not have any pending packets - we can use
+        * the whole message vector linearly instead of using
+        * it as a ring
+        */
+
+        s->queue_head = 0;
+        s->queue_tail = 0;
+        target_count = MAX_L2TPV3_MSGCNT;
+    }
+
+    msgvec = s->msgvec + s->queue_head;
+    if (target_count > 0) {
+        do {
+            count = recvmmsg(
+                s->fd,
+                msgvec,
+                target_count, MSG_DONTWAIT, NULL);
+        } while ((count == -1) && (errno == EINTR));
+        if (count < 0) {
+            /* Recv error - we still need to flush packets here,
+             * (re)set queue head to current position
+             */
+            count = 0;
+        }
+        s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
+        s->queue_depth += count;
+    }
+    net_l2tpv3_process_queue(s);
+}
+
+static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
+{
+    int i, j;
+    struct iovec *iov;
+    struct mmsghdr *cleanup = msgvec;
+    if (cleanup) {
+        for (i = 0; i < count; i++) {
+            if (cleanup->msg_hdr.msg_iov) {
+                iov = cleanup->msg_hdr.msg_iov;
+                for (j = 0; j < iovcount; j++) {
+                    g_free(iov->iov_base);
+                    iov++;
+                }
+                g_free(cleanup->msg_hdr.msg_iov);
+            }
+            cleanup++;
+        }
+        g_free(msgvec);
+    }
+}
+
+static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
+{
+    int i;
+    struct iovec *iov;
+    struct mmsghdr *msgvec, *result;
+
+    msgvec = g_malloc(sizeof(struct mmsghdr) * count);
+    result = msgvec;
+    for (i = 0; i < count ; i++) {
+        msgvec->msg_hdr.msg_name = NULL;
+        msgvec->msg_hdr.msg_namelen = 0;
+        iov =  g_malloc(sizeof(struct iovec) * IOVSIZE);
+        msgvec->msg_hdr.msg_iov = iov;
+        iov->iov_base = g_malloc(s->header_size);
+        iov->iov_len = s->header_size;
+        iov++ ;
+        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
+        iov->iov_len = BUFFER_SIZE;
+        msgvec->msg_hdr.msg_iovlen = 2;
+        msgvec->msg_hdr.msg_control = NULL;
+        msgvec->msg_hdr.msg_controllen = 0;
+        msgvec->msg_hdr.msg_flags = 0;
+        msgvec++;
+    }
+    return result;
+}
+
+static void net_l2tpv3_cleanup(NetClientState *nc)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    qemu_purge_queued_packets(nc);
+    l2tpv3_read_poll(s, false);
+    l2tpv3_write_poll(s, false);
+    if (s->fd > 0) {
+        close(s->fd);
+    }
+    destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
+    g_free(s->vec);
+    g_free(s->header_buf);
+    g_free(s->dgram_dst);
+}
+
+static NetClientInfo net_l2tpv3_info = {
+    .type = NET_CLIENT_OPTIONS_KIND_L2TPV3,
+    .size = sizeof(NetL2TPV3State),
+    .receive = net_l2tpv3_receive_dgram,
+    .receive_iov = net_l2tpv3_receive_dgram_iov,
+    .poll = l2tpv3_poll,
+    .cleanup = net_l2tpv3_cleanup,
+};
+
+int net_init_l2tpv3(const NetClientOptions *opts,
+                    const char *name,
+                    NetClientState *peer)
+{
+
+
+    const NetdevL2TPv3Options *l2tpv3;
+    NetL2TPV3State *s;
+    NetClientState *nc;
+    int fd = -1, gairet;
+    struct addrinfo hints;
+    struct addrinfo *result = NULL;
+    char *srcport, *dstport;
+
+    nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
+
+    s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    s->queue_head = 0;
+    s->queue_tail = 0;
+    s->header_mismatch = false;
+
+    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3);
+    l2tpv3 = opts->l2tpv3;
+
+    if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
+        s->ipv6 = l2tpv3->ipv6;
+    } else {
+        s->ipv6 = false;
+    }
+
+    if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
+        error_report("l2tpv3_open : offset must be less than 256 bytes");
+        goto outerr;
+    }
+
+    if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
+        if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
+            s->cookie = true;
+        } else {
+            goto outerr;
+        }
+    } else {
+        s->cookie = false;
+    }
+
+    if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
+        s->cookie_is_64  = true;
+    } else {
+        s->cookie_is_64  = false;
+    }
+
+    if (l2tpv3->has_udp && l2tpv3->udp) {
+        s->udp = true;
+        if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
+            error_report("l2tpv3_open : need both src and dst port for udp");
+            goto outerr;
+        } else {
+            srcport = l2tpv3->srcport;
+            dstport = l2tpv3->dstport;
+        }
+    } else {
+        s->udp = false;
+        srcport = NULL;
+        dstport = NULL;
+    }
+
+
+    s->offset = 4;
+    s->session_offset = 0;
+    s->cookie_offset = 4;
+    s->counter_offset = 4;
+
+    s->tx_session = l2tpv3->txsession;
+    if (l2tpv3->has_rxsession) {
+        s->rx_session = l2tpv3->rxsession;
+    } else {
+        s->rx_session = s->tx_session;
+    }
+
+    if (s->cookie) {
+        s->rx_cookie = l2tpv3->rxcookie;
+        s->tx_cookie = l2tpv3->txcookie;
+        if (s->cookie_is_64 == true) {
+            /* 64 bit cookie */
+            s->offset += 8;
+            s->counter_offset += 8;
+        } else {
+            /* 32 bit cookie */
+            s->offset += 4;
+            s->counter_offset += 4;
+        }
+    }
+
+    memset(&hints, 0, sizeof(hints));
+
+    if (s->ipv6) {
+        hints.ai_family = AF_INET6;
+    } else {
+        hints.ai_family = AF_INET;
+    }
+    if (s->udp) {
+        hints.ai_socktype = SOCK_DGRAM;
+        hints.ai_protocol = 0;
+        s->offset += 4;
+        s->counter_offset += 4;
+        s->session_offset += 4;
+        s->cookie_offset += 4;
+    } else {
+        hints.ai_socktype = SOCK_RAW;
+        hints.ai_protocol = IPPROTO_L2TP;
+    }
+
+    gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
+
+    if ((gairet != 0) || (result == NULL)) {
+        error_report(
+            "l2tpv3_open : could not resolve src, errno = %s",
+            gai_strerror(gairet)
+        );
+        goto outerr;
+    }
+    fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
+    if (fd == -1) {
+        fd = -errno;
+        error_report("l2tpv3_open : socket creation failed, errno = %d", -fd);
+        freeaddrinfo(result);
+        goto outerr;
+    }
+    if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
+        error_report("l2tpv3_open :  could not bind socket err=%i", errno);
+        goto outerr;
+    }
+    if (result) {
+        freeaddrinfo(result);
+    }
+
+    memset(&hints, 0, sizeof(hints));
+
+    if (s->ipv6) {
+        hints.ai_family = AF_INET6;
+    } else {
+        hints.ai_family = AF_INET;
+    }
+    if (s->udp) {
+        hints.ai_socktype = SOCK_DGRAM;
+        hints.ai_protocol = 0;
+    } else {
+        hints.ai_socktype = SOCK_RAW;
+        hints.ai_protocol = IPPROTO_L2TP;
+    }
+
+    result = NULL;
+    gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
+    if ((gairet != 0) || (result == NULL)) {
+        error_report(
+            "l2tpv3_open : could not resolve dst, error = %s",
+            gai_strerror(gairet)
+        );
+        goto outerr;
+    }
+
+    s->dgram_dst = g_malloc(sizeof(struct sockaddr_storage));
+    memset(s->dgram_dst, '\0' , sizeof(struct sockaddr_storage));
+    memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
+    s->dst_size = result->ai_addrlen;
+
+    if (result) {
+        freeaddrinfo(result);
+    }
+
+    if (l2tpv3->has_counter && l2tpv3->counter) {
+        s->has_counter = true;
+        s->offset += 4;
+    } else {
+        s->has_counter = false;
+    }
+
+    if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
+        s->has_counter = true;  /* pin counter implies that there is counter */
+        s->pin_counter = true;
+    } else {
+        s->pin_counter = false;
+    }
+
+    if (l2tpv3->has_offset) {
+        /* extra offset */
+        s->offset += l2tpv3->offset;
+    }
+
+    if ((s->ipv6) || (s->udp)) {
+        s->header_size = s->offset;
+    } else {
+        s->header_size = s->offset + sizeof(struct iphdr);
+    }
+
+    s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
+    s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT);
+    s->header_buf = g_malloc(s->header_size);
+
+    qemu_set_nonblock(fd);
+
+    s->fd = fd;
+    s->counter = 0;
+
+    l2tpv3_read_poll(s, true);
+
+    snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+             "l2tpv3: connected");
+    return 0;
+outerr:
+    qemu_del_net_client(nc);
+    if (fd > 0) {
+        close(fd);
+    }
+    if (result) {
+        freeaddrinfo(result);
+    }
+    return -1;
+}
+
diff --git a/net/net.c b/net/net.c
index 3dac29b844..0869c60bee 100644
--- a/net/net.c
+++ b/net/net.c
@@ -250,7 +250,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
 {
     NetClientState **peers = conf->peers.ncs;
     NICState *nic;
-    int i, queues = MAX(1, conf->queues);
+    int i, queues = MAX(1, conf->peers.queues);
 
     assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC);
     assert(info->size >= sizeof(NICState));
@@ -363,7 +363,7 @@ void qemu_del_net_client(NetClientState *nc)
 
 void qemu_del_nic(NICState *nic)
 {
-    int i, queues = MAX(nic->conf->queues, 1);
+    int i, queues = MAX(nic->conf->peers.queues, 1);
 
     /* If this is a peer NIC and peer has already been deleted, free it now. */
     if (nic->peer_deleted) {
@@ -806,6 +806,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
 #ifdef CONFIG_VHOST_NET_USED
         [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
 #endif
+#ifdef CONFIG_LINUX
+        [NET_CLIENT_OPTIONS_KIND_L2TPV3]    = net_init_l2tpv3,
+#endif
 };
 
 
@@ -842,6 +845,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
 #ifdef CONFIG_VHOST_NET_USED
         case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
 #endif
+#ifdef CONFIG_LINUX
+        case NET_CLIENT_OPTIONS_KIND_L2TPV3:
+#endif
             break;
 
         default:
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 1c7f7640fc..603e19e003 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differdiff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 5ee3fcb38f..fa54abb4d3 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -9,8 +9,12 @@
  */
 
 #include "s390-ccw.h"
+#include "bootmap.h"
+#include "virtio.h"
 
-// #define DEBUG_FALLBACK
+#ifdef DEBUG
+/* #define DEBUG_FALLBACK */
+#endif
 
 #ifdef DEBUG_FALLBACK
 #define dputs(txt) \
@@ -20,43 +24,8 @@
     do { } while (0)
 #endif
 
-struct scsi_blockptr {
-    uint64_t blockno;
-    uint16_t size;
-    uint16_t blockct;
-    uint8_t reserved[4];
-} __attribute__ ((packed));
-
-struct component_entry {
-    struct scsi_blockptr data;
-    uint8_t pad[7];
-    uint8_t component_type;
-    uint64_t load_address;
-} __attribute((packed));
-
-struct component_header {
-    uint8_t magic[4];
-    uint8_t type;
-    uint8_t reserved[27];
-} __attribute((packed));
-
-struct mbr {
-    uint8_t magic[4];
-    uint32_t version_id;
-    uint8_t reserved[8];
-    struct scsi_blockptr blockptr;
-} __attribute__ ((packed));
-
-#define ZIPL_MAGIC			"zIPL"
-
-#define ZIPL_COMP_HEADER_IPL		0x00
-#define ZIPL_COMP_HEADER_DUMP		0x01
-
-#define ZIPL_COMP_ENTRY_LOAD		0x02
-#define ZIPL_COMP_ENTRY_EXEC		0x01
-
 /* Scratch space */
-static uint8_t sec[SECTOR_SIZE] __attribute__((__aligned__(SECTOR_SIZE)));
+static uint8_t sec[MAX_SECTOR_SIZE*4] __attribute__((__aligned__(PAGE_SIZE)));
 
 typedef struct ResetInfo {
     uint32_t ipl_mask;
@@ -104,43 +73,243 @@ static void jump_to_IPL_code(uint64_t address)
     virtio_panic("\n! IPL returns !\n");
 }
 
-/* Check for ZIPL magic. Returns 0 if not matched. */
-static int zipl_magic(uint8_t *ptr)
+/***********************************************************************
+ * IPL an ECKD DASD (CDL or LDL/CMS format)
+ */
+
+static unsigned char _bprs[8*1024]; /* guessed "max" ECKD sector size */
+const int max_bprs_entries = sizeof(_bprs) / sizeof(ExtEckdBlockPtr);
+
+static inline void verify_boot_info(BootInfo *bip)
+{
+    IPL_assert(magic_match(bip->magic, ZIPL_MAGIC), "No zIPL magic");
+    IPL_assert(bip->version == BOOT_INFO_VERSION, "Wrong zIPL version");
+    IPL_assert(bip->bp_type == BOOT_INFO_BP_TYPE_IPL, "DASD is not for IPL");
+    IPL_assert(bip->dev_type == BOOT_INFO_DEV_TYPE_ECKD, "DASD is not ECKD");
+    IPL_assert(bip->flags == BOOT_INFO_FLAGS_ARCH, "Not for this arch");
+    IPL_assert(block_size_ok(bip->bp.ipl.bm_ptr.eckd.bptr.size),
+               "Bad block size in zIPL section of the 1st record.");
+}
+
+static bool eckd_valid_address(BootMapPointer *p)
 {
-    uint32_t *p = (void*)ptr;
-    uint32_t *z = (void*)ZIPL_MAGIC;
+    const uint64_t cylinder = p->eckd.cylinder
+                            + ((p->eckd.head & 0xfff0) << 12);
+    const uint64_t head = p->eckd.head & 0x000f;
+
+    if (head >= virtio_get_heads()
+        ||  p->eckd.sector > virtio_get_sectors()
+        ||  p->eckd.sector <= 0) {
+        return false;
+    }
 
-    if (*p != *z) {
-        debug_print_int("invalid magic", *p);
-        virtio_panic("invalid magic");
+    if (!virtio_guessed_disk_nature() && cylinder >= virtio_get_cylinders()) {
+        return false;
     }
 
-    return 1;
+    return true;
 }
 
-#define FREE_SPACE_FILLER '\xAA'
+static block_number_t eckd_block_num(BootMapPointer *p)
+{
+    const uint64_t sectors = virtio_get_sectors();
+    const uint64_t heads = virtio_get_heads();
+    const uint64_t cylinder = p->eckd.cylinder
+                            + ((p->eckd.head & 0xfff0) << 12);
+    const uint64_t head = p->eckd.head & 0x000f;
+    const block_number_t block = sectors * heads * cylinder
+                               + sectors * head
+                               + p->eckd.sector
+                               - 1; /* block nr starts with zero */
+    return block;
+}
 
-static inline bool unused_space(const void *p, unsigned int size)
+static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
 {
-    int i;
-    const unsigned char *m = p;
+    block_number_t block_nr;
+    int j, rc;
+    BootMapPointer *bprs = (void *)_bprs;
+    bool more_data;
+
+    memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs));
+    read_block(blk, bprs, "BPRS read failed");
+
+    do {
+        more_data = false;
+        for (j = 0;; j++) {
+            block_nr = eckd_block_num((void *)&(bprs[j].xeckd));
+            if (is_null_block_number(block_nr)) { /* end of chunk */
+                break;
+            }
+
+            /* we need the updated blockno for the next indirect entry
+             * in the chain, but don't want to advance address
+             */
+            if (j == (max_bprs_entries - 1)) {
+                break;
+            }
+
+            IPL_assert(block_size_ok(bprs[j].xeckd.bptr.size),
+                       "bad chunk block size");
+            IPL_assert(eckd_valid_address(&bprs[j]), "bad chunk ECKD addr");
+
+            if ((bprs[j].xeckd.bptr.count == 0) && unused_space(&(bprs[j+1]),
+                sizeof(EckdBlockPtr))) {
+                /* This is a "continue" pointer.
+                 * This ptr should be the last one in the current
+                 * script section.
+                 * I.e. the next ptr must point to the unused memory area
+                 */
+                memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs));
+                read_block(block_nr, bprs, "BPRS continuation read failed");
+                more_data = true;
+                break;
+            }
 
-    for (i = 0; i < size; i++) {
-        if (m[i] != FREE_SPACE_FILLER) {
-            return false;
+            /* Load (count+1) blocks of code at (block_nr)
+             * to memory (address).
+             */
+            rc = virtio_read_many(block_nr, (void *)(*address),
+                                  bprs[j].xeckd.bptr.count+1);
+            IPL_assert(rc == 0, "code chunk read failed");
+
+            *address += (bprs[j].xeckd.bptr.count+1) * virtio_get_block_size();
         }
+    } while (more_data);
+    return block_nr;
+}
+
+static void run_eckd_boot_script(block_number_t mbr_block_nr)
+{
+    int i;
+    block_number_t block_nr;
+    uint64_t address;
+    ScsiMbr *scsi_mbr = (void *)sec;
+    BootMapScript *bms = (void *)sec;
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(mbr_block_nr, sec, "Cannot read MBR");
+
+    block_nr = eckd_block_num((void *)&(scsi_mbr->blockptr));
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(block_nr, sec, "Cannot read Boot Map Script");
+
+    for (i = 0; bms->entry[i].type == BOOT_SCRIPT_LOAD; i++) {
+        address = bms->entry[i].address.load_address;
+        block_nr = eckd_block_num(&(bms->entry[i].blkptr));
+
+        do {
+            block_nr = load_eckd_segments(block_nr, &address);
+        } while (block_nr != -1);
+    }
+
+    IPL_assert(bms->entry[i].type == BOOT_SCRIPT_EXEC,
+               "Unknown script entry type");
+    jump_to_IPL_code(bms->entry[i].address.load_address); /* no return */
+}
+
+static void ipl_eckd_cdl(void)
+{
+    XEckdMbr *mbr;
+    Ipl2 *ipl2 = (void *)sec;
+    IplVolumeLabel *vlbl = (void *)sec;
+    block_number_t block_nr;
+
+    /* we have just read the block #0 and recognized it as "IPL1" */
+    sclp_print("CDL\n");
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(1, ipl2, "Cannot read IPL2 record at block 1");
+    IPL_assert(magic_match(ipl2, IPL2_MAGIC), "No IPL2 record");
+
+    mbr = &ipl2->u.x.mbr;
+    IPL_assert(magic_match(mbr, ZIPL_MAGIC), "No zIPL section in IPL2 record.");
+    IPL_assert(block_size_ok(mbr->blockptr.xeckd.bptr.size),
+               "Bad block size in zIPL section of IPL2 record.");
+    IPL_assert(mbr->dev_type == DEV_TYPE_ECKD,
+               "Non-ECKD device type in zIPL section of IPL2 record.");
+
+    /* save pointer to Boot Script */
+    block_nr = eckd_block_num((void *)&(mbr->blockptr));
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(2, vlbl, "Cannot read Volume Label at block 2");
+    IPL_assert(magic_match(vlbl->key, VOL1_MAGIC),
+               "Invalid magic of volume label block");
+    IPL_assert(magic_match(vlbl->f.key, VOL1_MAGIC),
+               "Invalid magic of volser block");
+    print_volser(vlbl->f.volser);
+
+    run_eckd_boot_script(block_nr);
+    /* no return */
+}
+
+static void ipl_eckd_ldl(ECKD_IPL_mode_t mode)
+{
+    LDL_VTOC *vlbl = (void *)sec; /* already read, 3rd block */
+    char msg[4] = { '?', '.', '\n', '\0' };
+    block_number_t block_nr;
+    BootInfo *bip;
+
+    sclp_print((mode == ECKD_CMS) ? "CMS" : "LDL");
+    sclp_print(" version ");
+    switch (vlbl->LDL_version) {
+    case LDL1_VERSION:
+        msg[0] = '1';
+        break;
+    case LDL2_VERSION:
+        msg[0] = '2';
+        break;
+    default:
+        msg[0] = vlbl->LDL_version;
+        msg[0] &= 0x0f; /* convert EBCDIC   */
+        msg[0] |= 0x30; /* to ASCII (digit) */
+        msg[1] = '?';
+        break;
+    }
+    sclp_print(msg);
+    print_volser(vlbl->volser);
+
+    /* DO NOT read BootMap pointer (only one, xECKD) at block #2 */
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(0, sec, "Cannot read block 0");
+    bip = (void *)(sec + 0x70); /* "boot info" is "eckd mbr" for LDL */
+    verify_boot_info(bip);
+
+    block_nr = eckd_block_num((void *)&(bip->bp.ipl.bm_ptr.eckd.bptr));
+    run_eckd_boot_script(block_nr);
+    /* no return */
+}
+
+static void ipl_eckd(ECKD_IPL_mode_t mode)
+{
+    switch (mode) {
+    case ECKD_CDL:
+        ipl_eckd_cdl(); /* no return */
+    case ECKD_CMS:
+    case ECKD_LDL:
+        ipl_eckd_ldl(mode); /* no return */
+    default:
+        virtio_panic("\n! Unknown ECKD IPL mode !\n");
     }
-    return true;
 }
 
-static int zipl_load_segment(struct component_entry *entry)
+/***********************************************************************
+ * IPL a SCSI disk
+ */
+
+static void zipl_load_segment(ComponentEntry *entry)
 {
-    const int max_entries = (SECTOR_SIZE / sizeof(struct scsi_blockptr));
-    struct scsi_blockptr *bprs = (void*)sec;
+    const int max_entries = (MAX_SECTOR_SIZE / sizeof(ScsiBlockPtr));
+    ScsiBlockPtr *bprs = (void *)sec;
     const int bprs_size = sizeof(sec);
-    uint64_t blockno;
-    long address;
+    block_number_t blockno;
+    uint64_t address;
     int i;
+    char err_msg[] = "zIPL failed to read BPRS at 0xZZZZZZZZZZZZZZZZ";
+    char *blk_no = &err_msg[30]; /* where to print blockno in (those ZZs) */
 
     blockno = entry->data.blockno;
     address = entry->load_address;
@@ -150,25 +319,25 @@ static int zipl_load_segment(struct component_entry *entry)
 
     do {
         memset(bprs, FREE_SPACE_FILLER, bprs_size);
-        if (virtio_read(blockno, (uint8_t *)bprs)) {
-            debug_print_int("failed reading bprs at", blockno);
-            goto fail;
-        }
+        fill_hex_val(blk_no, &blockno, sizeof(blockno));
+        read_block(blockno, bprs, err_msg);
 
         for (i = 0;; i++) {
-            u64 *cur_desc = (void*)&bprs[i];
+            uint64_t *cur_desc = (void *)&bprs[i];
 
             blockno = bprs[i].blockno;
-            if (!blockno)
+            if (!blockno) {
                 break;
+            }
 
             /* we need the updated blockno for the next indirect entry in the
                chain, but don't want to advance address */
-            if (i == (max_entries - 1))
+            if (i == (max_entries - 1)) {
                 break;
+            }
 
             if (bprs[i].blockct == 0 && unused_space(&bprs[i + 1],
-                sizeof(struct scsi_blockptr))) {
+                sizeof(ScsiBlockPtr))) {
                 /* This is a "continue" pointer.
                  * This ptr is the last one in the current script section.
                  * I.e. the next ptr must point to the unused memory area.
@@ -178,102 +347,66 @@ static int zipl_load_segment(struct component_entry *entry)
                 break;
             }
             address = virtio_load_direct(cur_desc[0], cur_desc[1], 0,
-                                         (void*)address);
-            if (address == -1)
-                goto fail;
+                                         (void *)address);
+            IPL_assert(address != -1, "zIPL load segment failed");
         }
     } while (blockno);
-
-    return 0;
-
-fail:
-    sclp_print("failed loading segment\n");
-    return -1;
 }
 
 /* Run a zipl program */
-static int zipl_run(struct scsi_blockptr *pte)
+static void zipl_run(ScsiBlockPtr *pte)
 {
-    struct component_header *header;
-    struct component_entry *entry;
-    uint8_t tmp_sec[SECTOR_SIZE];
+    ComponentHeader *header;
+    ComponentEntry *entry;
+    uint8_t tmp_sec[MAX_SECTOR_SIZE];
 
-    virtio_read(pte->blockno, tmp_sec);
-    header = (struct component_header *)tmp_sec;
+    read_block(pte->blockno, tmp_sec, "Cannot read header");
+    header = (ComponentHeader *)tmp_sec;
 
-    if (!zipl_magic(tmp_sec)) {
-        goto fail;
-    }
-
-    if (header->type != ZIPL_COMP_HEADER_IPL) {
-        goto fail;
-    }
+    IPL_assert(magic_match(tmp_sec, ZIPL_MAGIC), "No zIPL magic");
+    IPL_assert(header->type == ZIPL_COMP_HEADER_IPL, "Bad header type");
 
     dputs("start loading images\n");
 
     /* Load image(s) into RAM */
-    entry = (struct component_entry *)(&header[1]);
+    entry = (ComponentEntry *)(&header[1]);
     while (entry->component_type == ZIPL_COMP_ENTRY_LOAD) {
-        if (zipl_load_segment(entry) < 0) {
-            goto fail;
-        }
+        zipl_load_segment(entry);
 
         entry++;
 
-        if ((uint8_t*)(&entry[1]) > (tmp_sec + SECTOR_SIZE)) {
-            goto fail;
-        }
+        IPL_assert((uint8_t *)(&entry[1]) <= (tmp_sec + MAX_SECTOR_SIZE),
+                   "Wrong entry value");
     }
 
-    if (entry->component_type != ZIPL_COMP_ENTRY_EXEC) {
-        goto fail;
-    }
+    IPL_assert(entry->component_type == ZIPL_COMP_ENTRY_EXEC, "No EXEC entry");
 
     /* should not return */
     jump_to_IPL_code(entry->load_address);
-
-    return 0;
-
-fail:
-    sclp_print("failed running zipl\n");
-    return -1;
 }
 
-int zipl_load(void)
+static void ipl_scsi(void)
 {
-    struct mbr *mbr = (void*)sec;
+    ScsiMbr *mbr = (void *)sec;
     uint8_t *ns, *ns_end;
     int program_table_entries = 0;
-    int pte_len = sizeof(struct scsi_blockptr);
-    struct scsi_blockptr *prog_table_entry;
-    const char *error = "";
-
-    /* Grab the MBR */
-    virtio_read(0, (void*)mbr);
-
-    dputs("checking magic\n");
+    const int pte_len = sizeof(ScsiBlockPtr);
+    ScsiBlockPtr *prog_table_entry;
 
-    if (!zipl_magic(mbr->magic)) {
-        error = "zipl_magic 1";
-        goto fail;
-    }
+    /* The 0-th block (MBR) was already read into sec[] */
 
+    sclp_print("Using SCSI scheme.\n");
     debug_print_int("program table", mbr->blockptr.blockno);
 
     /* Parse the program table */
-    if (virtio_read(mbr->blockptr.blockno, sec)) {
-        error = "virtio_read";
-        goto fail;
-    }
+    read_block(mbr->blockptr.blockno, sec,
+               "Error reading Program Table");
 
-    if (!zipl_magic(sec)) {
-        error = "zipl_magic 2";
-        goto fail;
-    }
+    IPL_assert(magic_match(sec, ZIPL_MAGIC), "No zIPL magic");
 
-    ns_end = sec + SECTOR_SIZE;
+    ns_end = sec + virtio_get_block_size();
     for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns++) {
-        prog_table_entry = (struct scsi_blockptr *)ns;
+        prog_table_entry = (ScsiBlockPtr *)ns;
         if (!prog_table_entry->blockno) {
             break;
         }
@@ -283,19 +416,55 @@ int zipl_load(void)
 
     debug_print_int("program table entries", program_table_entries);
 
-    if (!program_table_entries) {
-        goto fail;
-    }
+    IPL_assert(program_table_entries != 0, "Empty Program Table");
 
     /* Run the default entry */
 
-    prog_table_entry = (struct scsi_blockptr *)(sec + pte_len);
+    prog_table_entry = (ScsiBlockPtr *)(sec + pte_len);
 
-    return zipl_run(prog_table_entry);
+    zipl_run(prog_table_entry); /* no return */
+}
 
-fail:
-    sclp_print("failed loading zipl: ");
-    sclp_print(error);
-    sclp_print("\n");
-    return -1;
+/***********************************************************************
+ * IPL starts here
+ */
+
+void zipl_load(void)
+{
+    ScsiMbr *mbr = (void *)sec;
+    LDL_VTOC *vlbl = (void *)sec;
+
+    /* Grab the MBR */
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(0, mbr, "Cannot read block 0");
+
+    dputs("checking magic\n");
+
+    if (magic_match(mbr->magic, ZIPL_MAGIC)) {
+        ipl_scsi(); /* no return */
+    }
+
+    /* We have failed to follow the SCSI scheme, so */
+    sclp_print("Using ECKD scheme.\n");
+    if (virtio_guessed_disk_nature()) {
+        sclp_print("Using guessed DASD geometry.\n");
+        virtio_assume_eckd();
+    }
+
+    if (magic_match(mbr->magic, IPL1_MAGIC)) {
+        ipl_eckd(ECKD_CDL); /* no return */
+    }
+
+    /* LDL/CMS? */
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(2, vlbl, "Cannot read block 2");
+
+    if (magic_match(vlbl->magic, CMS1_MAGIC)) {
+        ipl_eckd(ECKD_CMS); /* no return */
+    }
+    if (magic_match(vlbl->magic, LNX1_MAGIC)) {
+        ipl_eckd(ECKD_LDL); /* no return */
+    }
+
+    virtio_panic("\n* invalid MBR magic *\n");
 }
diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h
new file mode 100644
index 0000000000..30ef22fe61
--- /dev/null
+++ b/pc-bios/s390-ccw/bootmap.h
@@ -0,0 +1,344 @@
+/*
+ * QEMU S390 bootmap interpreter -- declarations
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef _PC_BIOS_S390_CCW_BOOTMAP_H
+#define _PC_BIOS_S390_CCW_BOOTMAP_H
+
+#include "s390-ccw.h"
+#include "virtio.h"
+
+typedef uint64_t block_number_t;
+#define NULL_BLOCK_NR 0xffffffffffffffff
+
+#define FREE_SPACE_FILLER '\xAA'
+
+typedef struct ScsiBlockPtr {
+    uint64_t blockno;
+    uint16_t size;
+    uint16_t blockct;
+    uint8_t reserved[4];
+} __attribute__ ((packed)) ScsiBlockPtr;
+
+typedef struct FbaBlockPtr {
+    uint32_t blockno;
+    uint16_t size;
+    uint16_t blockct;
+} __attribute__ ((packed)) FbaBlockPtr;
+
+typedef struct EckdBlockPtr {
+    uint16_t cylinder; /* cylinder/head/sector is an address of the block */
+    uint16_t head;
+    uint8_t sector;
+    uint16_t size;
+    uint8_t count; /* (size_in_blocks-1);
+                    * it's 0 for TablePtr, ScriptPtr, and SectionPtr */
+} __attribute__ ((packed)) EckdBlockPtr;
+
+typedef struct ExtEckdBlockPtr {
+    EckdBlockPtr bptr;
+    uint8_t reserved[8];
+} __attribute__ ((packed)) ExtEckdBlockPtr;
+
+typedef union BootMapPointer {
+    ScsiBlockPtr scsi;
+    FbaBlockPtr fba;
+    EckdBlockPtr eckd;
+    ExtEckdBlockPtr xeckd;
+} __attribute__ ((packed)) BootMapPointer;
+
+typedef struct ComponentEntry {
+    ScsiBlockPtr data;
+    uint8_t pad[7];
+    uint8_t component_type;
+    uint64_t load_address;
+} __attribute((packed)) ComponentEntry;
+
+typedef struct ComponentHeader {
+    uint8_t magic[4];   /* == "zIPL"                    */
+    uint8_t type;       /* == ZIPL_COMP_HEADER_*        */
+    uint8_t reserved[27];
+} __attribute((packed)) ComponentHeader;
+
+typedef struct ScsiMbr {
+    uint8_t magic[4];
+    uint32_t version_id;
+    uint8_t reserved[8];
+    ScsiBlockPtr blockptr;
+} __attribute__ ((packed)) ScsiMbr;
+
+#define ZIPL_MAGIC              "zIPL"
+#define IPL1_MAGIC "\xc9\xd7\xd3\xf1" /* == "IPL1" in EBCDIC */
+#define IPL2_MAGIC "\xc9\xd7\xd3\xf2" /* == "IPL2" in EBCDIC */
+#define VOL1_MAGIC "\xe5\xd6\xd3\xf1" /* == "VOL1" in EBCDIC */
+#define LNX1_MAGIC "\xd3\xd5\xe7\xf1" /* == "LNX1" in EBCDIC */
+#define CMS1_MAGIC "\xc3\xd4\xe2\xf1" /* == "CMS1" in EBCDIC */
+
+#define LDL1_VERSION '\x40' /* == ' ' in EBCDIC */
+#define LDL2_VERSION '\xf2' /* == '2' in EBCDIC */
+
+#define ZIPL_COMP_HEADER_IPL    0x00
+#define ZIPL_COMP_HEADER_DUMP   0x01
+
+#define ZIPL_COMP_ENTRY_LOAD    0x02
+#define ZIPL_COMP_ENTRY_EXEC    0x01
+
+typedef struct XEckdMbr {
+    uint8_t magic[4];   /* == "xIPL"        */
+    uint8_t version;
+    uint8_t bp_type;
+    uint8_t dev_type;   /* == DEV_TYPE_*    */
+#define DEV_TYPE_ECKD 0x00
+#define DEV_TYPE_FBA  0x01
+    uint8_t flags;
+    BootMapPointer blockptr;
+    uint8_t reserved[8];
+} __attribute__ ((packed)) XEckdMbr; /* see also BootInfo */
+
+typedef struct BootMapScriptEntry {
+    BootMapPointer blkptr;
+    uint8_t pad[7];
+    uint8_t type;   /* == BOOT_SCRIPT_* */
+#define BOOT_SCRIPT_EXEC 0x01
+#define BOOT_SCRIPT_LOAD 0x02
+    union {
+        uint64_t load_address;
+        uint64_t load_psw;
+    } address;
+} __attribute__ ((packed)) BootMapScriptEntry;
+
+typedef struct BootMapScriptHeader {
+    uint32_t magic;
+    uint8_t type;
+#define BOOT_SCRIPT_HDR_IPL 0x00
+    uint8_t reserved[27];
+} __attribute__ ((packed)) BootMapScriptHeader;
+
+typedef struct BootMapScript {
+    BootMapScriptHeader header;
+    BootMapScriptEntry  entry[0];
+} __attribute__ ((packed)) BootMapScript;
+
+/*
+ * These aren't real VTOCs, but referred to this way in some docs.
+ * They are "volume labels" actually.
+ *
+ * Some structures looks similar to described above, but left
+ * separate as there is no indication that they are the same.
+ * So, the value definitions are left separate too.
+ */
+typedef struct LDL_VTOC {       /* @ rec.3 cyl.0 trk.0 for ECKD */
+    char magic[4];              /* "LNX1", EBCDIC               */
+    char volser[6];             /* volser, EBCDIC               */
+    uint8_t reserved[69];       /* reserved, 0x40               */
+    uint8_t LDL_version;        /* 0x40 or 0xF2                 */
+    uint64_t formatted_blocks;  /* if LDL_version >= 0xF2       */
+} __attribute__ ((packed)) LDL_VTOC;
+
+typedef struct format_date {
+    uint8_t YY;
+    uint8_t MM;
+    uint8_t DD;
+    uint8_t hh;
+    uint8_t mm;
+    uint8_t ss;
+} __attribute__ ((packed)) format_date_t;
+
+typedef struct CMS_VTOC {       /* @ rec.3 cyl.0 trk.0 for ECKD */
+                                /* @ blk.1 (zero based) for FBA */
+    char magic[4];              /* 'CMS1', EBCDIC               */
+    char volser[6];             /* volser, EBCDIC               */
+    uint16_t version;           /* = 0                          */
+    uint32_t block_size;        /* = 512, 1024, 2048, or 4096   */
+    uint32_t disk_origin;       /* = 4 or 5                     */
+    uint32_t blocks;            /* Number of usable cyls/blocks */
+    uint32_t formatted;         /* Max number of fmtd cyls/blks */
+    uint32_t CMS_blocks;        /* disk size in CMS blocks      */
+    uint32_t CMS_used;          /* Number of CMS blocks in use  */
+    uint32_t FST_size;          /* = 64, bytes                  */
+    uint32_t FST_per_CMS_blk;   /*                              */
+    format_date_t format_date;  /* YYMMDDhhmmss as 6 bytes      */
+    uint8_t reserved1[2];       /* = 0                          */
+    uint32_t offset;            /* disk offset when reserved    */
+    uint32_t next_hole;         /* block nr                     */
+    uint32_t HBLK_hole_offset;  /* >> HBLK data of next hole    */
+    uint32_t alloc_map_usr_off; /* >> user part of Alloc map    */
+    uint8_t reserved2[4];       /* = 0                          */
+    char shared_seg_name[8];    /*                              */
+} __attribute__ ((packed)) CMS_VTOC;
+
+/* from zipl/include/boot.h */
+typedef struct BootInfoBpIpl {
+    union {
+        ExtEckdBlockPtr eckd;
+        ScsiBlockPtr linr;
+    } bm_ptr;
+    uint8_t unused[16];
+} __attribute__ ((packed)) BootInfoBpIpl;
+
+typedef struct EckdDumpParam {
+    uint32_t        start_blk;
+    uint32_t        end_blk;
+    uint16_t        blocksize;
+    uint8_t         num_heads;
+    uint8_t         bpt;
+    char            reserved[4];
+} __attribute((packed, may_alias)) EckdDumpParam;
+
+typedef struct FbaDumpParam {
+    uint64_t        start_blk;
+    uint64_t        blockct;
+} __attribute((packed)) FbaDumpParam;
+
+typedef struct BootInfoBpDump {
+    union {
+        EckdDumpParam eckd;
+        FbaDumpParam fba;
+    } param;
+    uint8_t         unused[16];
+} __attribute__ ((packed)) BootInfoBpDump;
+
+typedef struct BootInfo {          /* @ 0x70, record #0    */
+    unsigned char magic[4]; /* = 'zIPL', ASCII      */
+    uint8_t version;        /* = 1                  */
+#define BOOT_INFO_VERSION               1
+    uint8_t bp_type;        /* = 0                  */
+#define BOOT_INFO_BP_TYPE_IPL           0x00
+#define BOOT_INFO_BP_TYPE_DUMP          0x01
+    uint8_t dev_type;       /* = 0                  */
+#define BOOT_INFO_DEV_TYPE_ECKD         0x00
+#define BOOT_INFO_DEV_TYPE_FBA          0x01
+    uint8_t flags;          /* = 1                  */
+#ifdef __s390x__
+#define BOOT_INFO_FLAGS_ARCH            0x01
+#else
+#define BOOT_INFO_FLAGS_ARCH            0x00
+#endif
+    union {
+        BootInfoBpDump dump;
+        BootInfoBpIpl ipl;
+    } bp;
+} __attribute__ ((packed)) BootInfo; /* see also XEckdMbr   */
+
+typedef struct Ipl1 {
+    unsigned char key[4]; /* == "IPL1" */
+    unsigned char data[24];
+} __attribute__((packed)) Ipl1;
+
+typedef struct Ipl2 {
+    unsigned char key[4]; /* == "IPL2" */
+    union {
+        unsigned char data[144];
+        struct {
+            unsigned char reserved1[92-4];
+            XEckdMbr mbr;
+            unsigned char reserved2[144-(92-4)-sizeof(XEckdMbr)];
+        } x;
+    } u;
+} __attribute__((packed)) Ipl2;
+
+typedef struct IplVolumeLabel {
+    unsigned char key[4]; /* == "VOL1" */
+    union {
+        unsigned char data[80];
+        struct {
+            unsigned char key[4]; /* == "VOL1" */
+            unsigned char volser[6];
+            unsigned char reserved[6];
+        } f;
+    };
+} __attribute__((packed)) IplVolumeLabel;
+
+typedef enum {
+    ECKD_NO_IPL,
+    ECKD_CDL,
+    ECKD_CMS,
+    ECKD_LDL,
+} ECKD_IPL_mode_t;
+
+/* utility code below */
+
+static inline void IPL_assert(bool term, const char *message)
+{
+    if (!term) {
+        sclp_print("\n! ");
+        sclp_print(message);
+        virtio_panic(" !\n"); /* no return */
+    }
+}
+
+static const unsigned char ebc2asc[256] =
+      /* 0123456789abcdef0123456789abcdef */
+        "................................" /* 1F */
+        "................................" /* 3F */
+        " ...........<(+|&.........!$*);." /* 5F first.chr.here.is.real.space */
+        "-/.........,%_>?.........`:#@'=\""/* 7F */
+        ".abcdefghi.......jklmnopqr......" /* 9F */
+        "..stuvwxyz......................" /* BF */
+        ".ABCDEFGHI.......JKLMNOPQR......" /* DF */
+        "..STUVWXYZ......0123456789......";/* FF */
+
+static inline void ebcdic_to_ascii(const char *src,
+                                   char *dst,
+                                   unsigned int size)
+{
+    unsigned int i;
+    for (i = 0; i < size; i++) {
+        unsigned c = src[i];
+        dst[i] = ebc2asc[c];
+    }
+}
+
+static inline void print_volser(const void *volser)
+{
+    char ascii[8];
+
+    ebcdic_to_ascii((char *)volser, ascii, 6);
+    ascii[6] = '\0';
+    sclp_print("VOLSER=[");
+    sclp_print(ascii);
+    sclp_print("]\n");
+}
+
+static inline bool unused_space(const void *p, size_t size)
+{
+    size_t i;
+    const unsigned char *m = p;
+
+    for (i = 0; i < size; i++) {
+        if (m[i] != FREE_SPACE_FILLER) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static inline bool is_null_block_number(block_number_t x)
+{
+    return x == NULL_BLOCK_NR;
+}
+
+static inline void read_block(block_number_t blockno,
+                              void *buffer,
+                              const char *errmsg)
+{
+    IPL_assert(virtio_read(blockno, buffer) == 0, errmsg);
+}
+
+static inline bool block_size_ok(uint32_t block_size)
+{
+    return block_size == virtio_get_block_size();
+}
+
+static inline bool magic_match(const void *data, const void *magic)
+{
+    return *((uint32_t *)data) == *((uint32_t *)magic);
+}
+
+#endif /* _PC_BIOS_S390_CCW_BOOTMAP_H */
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 5c33766533..dbfb40e685 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -9,6 +9,7 @@
  */
 
 #include "s390-ccw.h"
+#include "virtio.h"
 
 char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
 uint64_t boot_value;
@@ -64,6 +65,10 @@ static void virtio_setup(uint64_t dev_info)
     }
 
     virtio_setup_block(blk_schid);
+
+    if (!virtio_ipl_disk_is_valid()) {
+        virtio_panic("No valid hard disk detected.\n");
+    }
 }
 
 int main(void)
@@ -72,8 +77,8 @@ int main(void)
     debug_print_int("boot reg[7] ", boot_value);
     virtio_setup(boot_value);
 
-    if (zipl_load() < 0)
-        sclp_print("Failed to load OS from hard disk\n");
-    disabled_wait();
-    while (1) { }
+    zipl_load(); /* no return */
+
+    virtio_panic("Failed to load OS from hard disk\n");
+    return 0; /* make compiler happy */
 }
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index 5e871ac84c..959aed0d0b 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -34,10 +34,10 @@ typedef unsigned long long __u64;
 #define PAGE_SIZE 4096
 
 #ifndef EIO
-#define EIO	1
+#define EIO     1
 #endif
 #ifndef EBUSY
-#define EBUSY	2
+#define EBUSY   2
 #endif
 #ifndef NULL
 #define NULL    0
@@ -57,14 +57,14 @@ void sclp_setup(void);
 
 /* virtio.c */
 unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
-				 ulong subchan_id, void *load_addr);
+                                 ulong subchan_id, void *load_addr);
 bool virtio_is_blk(struct subchannel_id schid);
 void virtio_setup_block(struct subchannel_id schid);
 int virtio_read(ulong sector, void *load_addr);
 int enable_mss_facility(void);
 
 /* bootmap.c */
-int zipl_load(void);
+void zipl_load(void);
 
 static inline void *memset(void *s, int c, size_t n)
 {
@@ -86,15 +86,21 @@ static inline void fill_hex(char *out, unsigned char val)
     out[1] = hex[val & 0xf];
 }
 
-static inline void print_int(const char *desc, u64 addr)
+static inline void fill_hex_val(char *out, void *ptr, unsigned size)
 {
-    unsigned char *addr_c = (unsigned char*)&addr;
-    char out[] = ": 0xffffffffffffffff\n";
+    unsigned char *value = ptr;
     unsigned int i;
 
-    for (i = 0; i < sizeof(addr); i++) {
-        fill_hex(&out[4 + (i*2)], addr_c[i]);
+    for (i = 0; i < size; i++) {
+        fill_hex(&out[i*2], value[i]);
     }
+}
+
+static inline void print_int(const char *desc, u64 addr)
+{
+    char out[] = ": 0xffffffffffffffff\n";
+
+    fill_hex_val(&out[4], &addr, sizeof(addr));
 
     sclp_print(desc);
     sclp_print(out);
@@ -118,18 +124,18 @@ static inline void debug_print_addr(const char *desc, void *p)
  *           Hypercall functions               *
  ***********************************************/
 
-#define KVM_S390_VIRTIO_NOTIFY		0
-#define KVM_S390_VIRTIO_RESET		1
-#define KVM_S390_VIRTIO_SET_STATUS	2
+#define KVM_S390_VIRTIO_NOTIFY          0
+#define KVM_S390_VIRTIO_RESET           1
+#define KVM_S390_VIRTIO_SET_STATUS      2
 #define KVM_S390_VIRTIO_CCW_NOTIFY      3
 
 static inline void yield(void)
 {
-	asm volatile ("diag 0,0,0x44"
-                      : :
-		      : "memory", "cc");
+    asm volatile ("diag 0,0,0x44"
+                  : :
+                  : "memory", "cc");
 }
 
-#define SECTOR_SIZE 512
+#define MAX_SECTOR_SIZE 4096
 
 #endif /* S390_CCW_H */
diff --git a/pc-bios/s390-ccw/sclp-ascii.c b/pc-bios/s390-ccw/sclp-ascii.c
index 1c93937104..761fb44ff5 100644
--- a/pc-bios/s390-ccw/sclp-ascii.c
+++ b/pc-bios/s390-ccw/sclp-ascii.c
@@ -33,7 +33,7 @@ static int sclp_service_call(unsigned int command, void *sccb)
 
 static void sclp_set_write_mask(void)
 {
-    WriteEventMask *sccb = (void*)_sccb;
+    WriteEventMask *sccb = (void *)_sccb;
 
     sccb->h.length = sizeof(WriteEventMask);
     sccb->mask_length = sizeof(unsigned int);
@@ -68,7 +68,7 @@ static void _memcpy(char *dest, const char *src, int len)
 void sclp_print(const char *str)
 {
     int len = _strlen(str);
-    WriteEventData *sccb = (void*)_sccb;
+    WriteEventData *sccb = (void *)_sccb;
 
     sccb->h.length = sizeof(WriteEventData) + len;
     sccb->h.function_code = SCLP_FC_NORMAL_WRITE;
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index bbb3c4f36d..31b23b086c 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -18,22 +18,22 @@ static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
 static long kvm_hypercall(unsigned long nr, unsigned long param1,
                           unsigned long param2)
 {
-	register ulong r_nr asm("1") = nr;
-	register ulong r_param1 asm("2") = param1;
-	register ulong r_param2 asm("3") = param2;
-	register long retval asm("2");
+    register ulong r_nr asm("1") = nr;
+    register ulong r_param1 asm("2") = param1;
+    register ulong r_param2 asm("3") = param2;
+    register long retval asm("2");
 
-	asm volatile ("diag 2,4,0x500"
-		      : "=d" (retval)
-		      : "d" (r_nr), "0" (r_param1), "r"(r_param2)
-		      : "memory", "cc");
+    asm volatile ("diag 2,4,0x500"
+                  : "=d" (retval)
+                  : "d" (r_nr), "0" (r_param1), "r"(r_param2)
+                  : "memory", "cc");
 
-	return retval;
+    return retval;
 }
 
 static void virtio_notify(struct subchannel_id schid)
 {
-    kvm_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY, *(u32*)&schid, 0);
+    kvm_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY, *(u32 *)&schid, 0);
 }
 
 /***********************************************
@@ -202,7 +202,7 @@ static int vring_wait_reply(struct vring *vr, int timeout)
  *               Virtio block                  *
  ***********************************************/
 
-static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
+int virtio_read_many(ulong sector, void *load_addr, int sec_num)
 {
     struct virtio_blk_outhdr out_hdr;
     u8 status;
@@ -211,12 +211,12 @@ static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
     /* Tell the host we want to read */
     out_hdr.type = VIRTIO_BLK_T_IN;
     out_hdr.ioprio = 99;
-    out_hdr.sector = sector;
+    out_hdr.sector = virtio_sector_adjust(sector);
 
     vring_send_buf(&block, &out_hdr, sizeof(out_hdr), VRING_DESC_F_NEXT);
 
     /* This is where we want to receive data */
-    vring_send_buf(&block, load_addr, SECTOR_SIZE * sec_num,
+    vring_send_buf(&block, load_addr, virtio_get_block_size() * sec_num,
                    VRING_DESC_F_WRITE | VRING_HIDDEN_IS_CHAIN |
                    VRING_DESC_F_NEXT);
 
@@ -236,7 +236,7 @@ static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
 }
 
 unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
-				 ulong subchan_id, void *load_addr)
+                                 ulong subchan_id, void *load_addr)
 {
     u8 status;
     int sec = rec_list1;
@@ -244,16 +244,16 @@ unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
     int sec_len = rec_list2 >> 48;
     ulong addr = (ulong)load_addr;
 
-    if (sec_len != SECTOR_SIZE) {
+    if (sec_len != virtio_get_block_size()) {
         return -1;
     }
 
     sclp_print(".");
-    status = virtio_read_many(sec, (void*)addr, sec_num);
+    status = virtio_read_many(sec, (void *)addr, sec_num);
     if (status) {
         virtio_panic("I/O Error");
     }
-    addr += sec_num * SECTOR_SIZE;
+    addr += sec_num * virtio_get_block_size();
 
     return addr;
 }
@@ -263,18 +263,98 @@ int virtio_read(ulong sector, void *load_addr)
     return virtio_read_many(sector, load_addr, 1);
 }
 
+static VirtioBlkConfig blk_cfg = {};
+static bool guessed_disk_nature;
+
+bool virtio_guessed_disk_nature(void)
+{
+    return guessed_disk_nature;
+}
+
+void virtio_assume_scsi(void)
+{
+    guessed_disk_nature = true;
+    blk_cfg.blk_size = 512;
+}
+
+void virtio_assume_eckd(void)
+{
+    guessed_disk_nature = true;
+    blk_cfg.blk_size = 4096;
+
+    /* this must be here to calculate code segment position */
+    blk_cfg.geometry.heads = 15;
+    blk_cfg.geometry.sectors = 12;
+}
+
+bool virtio_disk_is_scsi(void)
+{
+    if (guessed_disk_nature) {
+        return (blk_cfg.blk_size  == 512);
+    }
+    return (blk_cfg.geometry.heads == 255)
+        && (blk_cfg.geometry.sectors == 63)
+        && (blk_cfg.blk_size  == 512);
+}
+
+bool virtio_disk_is_eckd(void)
+{
+    if (guessed_disk_nature) {
+        return (blk_cfg.blk_size  == 4096);
+    }
+    return (blk_cfg.geometry.heads == 15)
+        && (blk_cfg.geometry.sectors == 12)
+        && (blk_cfg.blk_size  == 4096);
+}
+
+bool virtio_ipl_disk_is_valid(void)
+{
+    return blk_cfg.blk_size && (virtio_disk_is_scsi() || virtio_disk_is_eckd());
+}
+
+int virtio_get_block_size(void)
+{
+    return blk_cfg.blk_size;
+}
+
+uint16_t virtio_get_cylinders(void)
+{
+    return blk_cfg.geometry.cylinders;
+}
+
+uint8_t virtio_get_heads(void)
+{
+    return blk_cfg.geometry.heads;
+}
+
+uint8_t virtio_get_sectors(void)
+{
+    return blk_cfg.geometry.sectors;
+}
+
 void virtio_setup_block(struct subchannel_id schid)
 {
     struct vq_info_block info;
     struct vq_config_block config = {};
 
+    blk_cfg.blk_size = 0; /* mark "illegal" - setup started... */
+
     virtio_reset(schid);
 
+    /*
+     * Skipping CCW_CMD_READ_FEAT. We're not doing anything fancy, and
+     * we'll just stop dead anyway if anything does not work like we
+     * expect it.
+     */
+
     config.index = 0;
     if (run_ccw(schid, CCW_CMD_READ_VQ_CONF, &config, sizeof(config))) {
+        virtio_panic("Could not get block device VQ configuration\n");
+    }
+    if (run_ccw(schid, CCW_CMD_READ_CONF, &blk_cfg, sizeof(blk_cfg))) {
         virtio_panic("Could not get block device configuration\n");
     }
-    vring_init(&block, config.num, (void*)(100 * 1024 * 1024),
+    vring_init(&block, config.num, (void *)(100 * 1024 * 1024),
                KVM_S390_VIRTIO_RING_ALIGN);
 
     info.queue = (100ULL * 1024ULL* 1024ULL);
@@ -286,6 +366,12 @@ void virtio_setup_block(struct subchannel_id schid)
     if (!run_ccw(schid, CCW_CMD_SET_VQ, &info, sizeof(info))) {
         virtio_set_status(schid, VIRTIO_CONFIG_S_DRIVER_OK);
     }
+
+    if (!virtio_ipl_disk_is_valid()) {
+        /* make sure all getters but blocksize return 0 for invalid IPL disk */
+        memset(&blk_cfg, 0, sizeof(blk_cfg));
+        virtio_assume_scsi();
+    }
 }
 
 bool virtio_is_blk(struct subchannel_id schid)
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index 772a63f152..f1fb1b08fa 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -66,7 +66,7 @@ struct virtio_dev {
     char *config;
 };
 
-#define KVM_S390_VIRTIO_RING_ALIGN	4096
+#define KVM_S390_VIRTIO_RING_ALIGN  4096
 
 #define VRING_USED_F_NO_NOTIFY  1
 
@@ -161,4 +161,52 @@ struct virtio_blk_outhdr {
         u64 sector;
 };
 
+typedef struct VirtioBlkConfig {
+    u64 capacity; /* in 512-byte sectors */
+    u32 size_max; /* max segment size (if VIRTIO_BLK_F_SIZE_MAX) */
+    u32 seg_max;  /* max number of segments (if VIRTIO_BLK_F_SEG_MAX) */
+
+    struct virtio_blk_geometry {
+        u16 cylinders;
+        u8 heads;
+        u8 sectors;
+    } geometry; /* (if VIRTIO_BLK_F_GEOMETRY) */
+
+    u32 blk_size; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+
+    /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY  */
+    u8 physical_block_exp; /* exponent for physical block per logical block */
+    u8 alignment_offset;   /* alignment offset in logical blocks */
+    u16 min_io_size;       /* min I/O size without performance penalty
+                              in logical blocks */
+    u32 opt_io_size;       /* optimal sustained I/O size in logical blocks */
+
+    u8 wce; /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
+} __attribute__((packed)) VirtioBlkConfig;
+
+bool virtio_guessed_disk_nature(void);
+void virtio_assume_scsi(void);
+void virtio_assume_eckd(void);
+
+extern bool virtio_disk_is_scsi(void);
+extern bool virtio_disk_is_eckd(void);
+extern bool virtio_ipl_disk_is_valid(void);
+extern int virtio_get_block_size(void);
+extern uint16_t virtio_get_cylinders(void);
+extern uint8_t virtio_get_heads(void);
+extern uint8_t virtio_get_sectors(void);
+extern int virtio_read_many(ulong sector, void *load_addr, int sec_num);
+
+#define VIRTIO_SECTOR_SIZE 512
+
+static inline ulong virtio_eckd_sector_adjust(ulong sector)
+{
+     return sector * (virtio_get_block_size() / VIRTIO_SECTOR_SIZE);
+}
+
+static inline ulong virtio_sector_adjust(ulong sector)
+{
+    return virtio_disk_is_eckd() ? virtio_eckd_sector_adjust(sector) : sector;
+}
+
 #endif /* VIRTIO_H */
diff --git a/qapi-schema.json b/qapi-schema.json
index e7727a1153..a83befc05b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -8,6 +8,9 @@
 # QAPI block definitions
 { 'include': 'qapi/block.json' }
 
+# QAPI event definitions
+{ 'include': 'qapi/event.json' }
+
 ##
 # LostTickPolicy:
 #
@@ -211,12 +214,18 @@
 #
 # @filename: the filename of the character device
 #
+# @frontend-open: shows whether the frontend device attached to this backend
+#                 (eg. with the chardev=... option) is in open or closed state
+#                 (since 2.1)
+#
 # Notes: @filename is encoded using the QEMU command line character device
 #        encoding.  See the QEMU man page for details.
 #
 # Since: 0.14.0
 ##
-{ 'type': 'ChardevInfo', 'data': {'label': 'str', 'filename': 'str'} }
+{ 'type': 'ChardevInfo', 'data': {'label': 'str',
+                                  'filename': 'str',
+                                  'frontend-open': 'bool'} }
 
 ##
 # @query-chardev:
@@ -654,8 +663,9 @@
 #
 # @host: IP address
 #
-# @service: The service name of vnc port. This may depend on the host system's
-#           service database so symbolic names should not be relied on.
+# @service: The service name of the vnc port. This may depend on the host
+#           system's service database so symbolic names should not be relied
+#           on.
 #
 # @family: address family
 #
@@ -694,7 +704,7 @@
 ##
 { 'type': 'VncClientInfo',
   'base': 'VncBasicInfo',
-  'data': { '*x509_dname'   : 'str', '*sasl_username': 'str' } }
+  'data': { '*x509_dname': 'str', '*sasl_username': 'str' } }
 
 ##
 # @VncInfo:
@@ -2040,6 +2050,62 @@
     '*udp':       'str' } }
 
 ##
+# @NetdevL2TPv3Options
+#
+# Connect the VLAN to Ethernet over L2TPv3 Static tunnel
+#
+# @src: source address
+#
+# @dst: destination address
+#
+# @srcport: #optional source port - mandatory for udp, optional for ip
+#
+# @dstport: #optional destination port - mandatory for udp, optional for ip
+#
+# @ipv6: #optional - force the use of ipv6
+#
+# @udp: #optional - use the udp version of l2tpv3 encapsulation
+#
+# @cookie64: #optional - use 64 bit coookies
+#
+# @counter: #optional have sequence counter
+#
+# @pincounter: #optional pin sequence counter to zero -
+#              workaround for buggy implementations or
+#              networks with packet reorder
+#
+# @txcookie: #optional 32 or 64 bit transmit cookie
+#
+# @rxcookie: #optional 32 or 64 bit receive cookie
+#
+# @txsession: 32 bit transmit session
+#
+# @rxsession: #optional 32 bit receive session - if not specified
+#             set to the same value as transmit
+#
+# @offset: #optional additional offset - allows the insertion of
+#          additional application-specific data before the packet payload
+#
+# Since 2.1
+##
+{ 'type': 'NetdevL2TPv3Options',
+  'data': {
+    'src':          'str',
+    'dst':          'str',
+    '*srcport':     'str',
+    '*dstport':     'str',
+    '*ipv6':        'bool',
+    '*udp':         'bool',
+    '*cookie64':    'bool',
+    '*counter':     'bool',
+    '*pincounter':  'bool',
+    '*txcookie':    'uint64',
+    '*rxcookie':    'uint64',
+    'txsession':    'uint32',
+    '*rxsession':   'uint32',
+    '*offset':      'uint32' } }
+
+##
 # @NetdevVdeOptions
 #
 # Connect the VLAN to a vde switch running on the host.
@@ -2150,6 +2216,9 @@
 # A discriminated record of network device traits.
 #
 # Since 1.2
+#
+# 'l2tpv3' - since 2.1
+#
 ##
 { 'union': 'NetClientOptions',
   'data': {
@@ -2157,6 +2226,7 @@
     'nic':      'NetLegacyNicOptions',
     'user':     'NetdevUserOptions',
     'tap':      'NetdevTapOptions',
+    'l2tpv3':   'NetdevL2TPv3Options',
     'socket':   'NetdevSocketOptions',
     'vde':      'NetdevVdeOptions',
     'dump':     'NetdevDumpOptions',
@@ -3398,5 +3468,3 @@
 ##
 { 'enum': 'GuestPanicAction',
   'data': [ 'pause' ] }
-
-{ 'include': 'qapi-event.json' }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index af6b436540..faf394cc76 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -765,6 +765,13 @@
 # @format: #optional the format of the new destination, default is to
 #          probe if @mode is 'existing', else the format of the source
 #
+# @node-name: #optional the new block driver state node name in the graph
+#             (Since 2.1)
+#
+# @replaces: #optional with sync=full graph node name to be replaced by the new
+#            image when a whole image copy is done. This can be used to repair
+#            broken Quorum files. (Since 2.1)
+#
 # @mode: #optional whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
 #
@@ -797,6 +804,7 @@
 ##
 { 'command': 'drive-mirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+            '*node-name': 'str', '*replaces': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
@@ -1329,12 +1337,15 @@
 #
 # @vote-threshold: the vote limit under which a read will fail
 #
+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
+#                     (Since 2.1)
+#
 # Since: 2.0
 ##
 { 'type': 'BlockdevOptionsQuorum',
   'data': { '*blkverify': 'bool',
             'children': [ 'BlockdevRef' ],
-            'vote-threshold': 'int' } }
+            'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
 
 ##
 # @BlockdevOptions
@@ -1436,7 +1447,8 @@
 # @device: device name
 #
 # @msg: informative message for human consumption, such as the kind of
-#       corruption being detected
+#       corruption being detected. It should not be parsed by machine as it is
+#       not guaranteed to be stable
 #
 # @offset: #optional, if the corruption resulted from an image access, this is
 #          the access offset into the image
@@ -1544,19 +1556,32 @@
 { 'event': 'BLOCK_JOB_ERROR',
   'data': { 'device'   : 'str',
             'operation': 'IoOperationType',
-            'action'   : 'BlockdevOnError' } }
+            'action'   : 'BlockErrorAction' } }
 
 ##
 # @BLOCK_JOB_READY
 #
 # Emitted when a block job is ready to complete
 #
+# @type: job type
+#
 # @device: device name
 #
+# @len: maximum progress value
+#
+# @offset: current progress value. On success this is equal to len.
+#          On failure this is less than len
+#
+# @speed: rate limit, bytes per second
+#
 # Note: The "ready to complete" status is always reset by a @BLOCK_JOB_ERROR
 # event
 #
 # Since: 1.3
 ##
 { 'event': 'BLOCK_JOB_READY',
-  'data': { 'device': 'str' } }
+  'data': { 'type'  : 'BlockJobType',
+            'device': 'str',
+            'len'   : 'int',
+            'offset': 'int',
+            'speed' : 'int' } }
diff --git a/qapi-event.json b/qapi/event.json
index e7a47f9927..ff97aeb377 100644
--- a/qapi-event.json
+++ b/qapi/event.json
@@ -1,8 +1,8 @@
 ##
 # @SHUTDOWN
 #
-# Emitted when the virtual machine has shutdown, possibly indicating that QEMU
-# is about about to exit.
+# Emitted when the virtual machine has shut down, indicating that qemu is
+# about to exit.
 #
 # Note: If the command-line option "-no-shutdown" has been specified, qemu will
 # not exit, and a STOP event will eventually follow the SHUTDOWN event
@@ -316,3 +316,17 @@
 { 'event': 'QUORUM_REPORT_BAD',
   'data': { '*error': 'str', 'node-name': 'str',
             'sector-num': 'int', 'sector-count': 'int' } }
+
+##
+# @VSERPORT_CHANGE
+#
+# Emitted when the guest opens or closes a virtio-serial port.
+#
+# @id: device identifier of the virtio-serial port
+#
+# @open: true if the guest has opened the virtio-serial port
+#
+# Since: 2.1
+##
+{ 'event': 'VSERPORT_CHANGE',
+  'data': { 'id': 'str', 'open': 'bool' } }
diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c
index 6a0974eb48..36eb3bcfd6 100644
--- a/qemu-bridge-helper.c
+++ b/qemu-bridge-helper.c
@@ -229,7 +229,7 @@ int main(int argc, char **argv)
     unsigned long ifargs[4];
 #endif
     int ifindex;
-    int fd, ctlfd, unixfd = -1;
+    int fd = -1, ctlfd = -1, unixfd = -1;
     int use_vnet = 0;
     int mtu;
     const char *bridge = NULL;
@@ -436,7 +436,12 @@ int main(int argc, char **argv)
     /* profit! */
 
 cleanup:
-
+    if (fd >= 0) {
+        close(fd);
+    }
+    if (ctlfd >= 0) {
+        close(ctlfd);
+    }
     while ((acl_rule = QSIMPLEQ_FIRST(&acl_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&acl_list, entry);
         g_free(acl_rule);
diff --git a/qemu-char.c b/qemu-char.c
index cbd6b9a025..51917de462 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -3705,6 +3705,7 @@ ChardevInfoList *qmp_query_chardev(Error **errp)
         info->value = g_malloc0(sizeof(*info->value));
         info->value->label = g_strdup(chr->label);
         info->value->filename = g_strdup(chr->filename);
+        info->value->frontend_open = chr->fe_open;
 
         info->next = chr_list;
         chr_list = info;
diff --git a/qemu-options.hx b/qemu-options.hx
index ff76ad4830..9e5468678b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1433,6 +1433,29 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "                (default=" DEFAULT_BRIDGE_INTERFACE ") using the program 'helper'\n"
     "                (default=" DEFAULT_BRIDGE_HELPER ")\n"
 #endif
+#ifdef __linux__
+    "-net l2tpv3[,vlan=n][,name=str],src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6=on/off][,udp=on/off][,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]\n"
+    "                connect the VLAN to an Ethernet over L2TPv3 pseudowire\n"
+    "                Linux kernel 3.3+ as well as most routers can talk\n"
+    "                L2TPv3. This transport allows to connect a VM to a VM,\n"
+    "                VM to a router and even VM to Host. It is a nearly-universal\n"
+    "                standard (RFC3391). Note - this implementation uses static\n"
+    "                pre-configured tunnels (same as the Linux kernel).\n"
+    "                use 'src=' to specify source address\n"
+    "                use 'dst=' to specify destination address\n"
+    "                use 'udp=on' to specify udp encapsulation\n"
+    "                use 'dstport=' to specify destination udp port\n"
+    "                use 'dstport=' to specify destination udp port\n"
+    "                use 'ipv6=on' to force v6\n"
+    "                L2TPv3 uses cookies to prevent misconfiguration as\n"
+    "                well as a weak security measure\n"
+    "                use 'rxcookie=0x012345678' to specify a rxcookie\n"
+    "                use 'txcookie=0x012345678' to specify a txcookie\n"
+    "                use 'cookie64=on' to set cookie size to 64 bit, otherwise 32\n"
+    "                use 'counter=off' to force a 'cut-down' L2TPv3 with no counter\n"
+    "                use 'pincounter=on' to work around broken counter handling in peer\n"
+    "                use 'offset=X' to add an extra offset between header and data\n"
+#endif
     "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n"
     "                connect the vlan 'n' to another VLAN using a socket connection\n"
     "-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]]\n"
@@ -1778,6 +1801,65 @@ qemu-system-i386 linux.img \
                  -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4
 @end example
 
+@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+@item -net l2tpv3[,vlan=@var{n}][,name=@var{name}],src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+Connect VLAN @var{n} to L2TPv3 pseudowire. L2TPv3 (RFC3391) is a popular
+protocol to transport Ethernet (and other Layer 2) data frames between
+two systems. It is present in routers, firewalls and the Linux kernel
+(from version 3.3 onwards).
+
+This transport allows a VM to communicate to another VM, router or firewall directly.
+
+@item src=@var{srcaddr}
+    source address (mandatory)
+@item dst=@var{dstaddr}
+    destination address (mandatory)
+@item udp
+    select udp encapsulation (default is ip).
+@item srcport=@var{srcport}
+    source udp port.
+@item dstport=@var{dstport}
+    destination udp port.
+@item ipv6
+    force v6, otherwise defaults to v4.
+@item rxcookie=@var{rxcookie}
+@item txcookie=@var{txcookie}
+    Cookies are a weak form of security in the l2tpv3 specification.
+Their function is mostly to prevent misconfiguration. By default they are 32
+bit.
+@item cookie64
+    Set cookie size to 64 bit instead of the default 32
+@item counter=off
+    Force a 'cut-down' L2TPv3 with no counter as in
+draft-mkonstan-l2tpext-keyed-ipv6-tunnel-00
+@item pincounter=on
+    Work around broken counter handling in peer. This may also help on
+networks which have packet reorder.
+@item offset=@var{offset}
+    Add an extra offset between header and data
+
+For example, to attach a VM running on host 4.3.2.1 via L2TPv3 to the bridge br-lan
+on the remote Linux host 1.2.3.4:
+@example
+# Setup tunnel on linux host using raw ip as encapsulation
+# on 1.2.3.4
+ip l2tp add tunnel remote 4.3.2.1 local 1.2.3.4 tunnel_id 1 peer_tunnel_id 1 \
+    encap udp udp_sport 16384 udp_dport 16384
+ip l2tp add session tunnel_id 1 name vmtunnel0 session_id \
+    0xFFFFFFFF peer_session_id 0xFFFFFFFF
+ifconfig vmtunnel0 mtu 1500
+ifconfig vmtunnel0 up
+brctl addif br-lan vmtunnel0
+
+
+# on 4.3.2.1
+# launch QEMU instance - if your network has reorder or is very lossy add ,pincounter
+
+qemu-system-i386 linux.img -net nic -net l2tpv3,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter
+
+
+@end example
+
 @item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
 @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
 Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and
diff --git a/qmp-commands.hx b/qmp-commands.hx
index e4a1c80434..65218bc147 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1293,6 +1293,7 @@ EQMP
     {
         .name       = "drive-mirror",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
+                      "node-name:s?,replaces:s?,"
                       "on-source-error:s?,on-target-error:s?,"
                       "granularity:i?,buf-size:i?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
@@ -1314,6 +1315,10 @@ Arguments:
 - "device": device name to operate on (json-string)
 - "target": name of new image file (json-string)
 - "format": format of new image (json-string, optional)
+- "node-name": the name of the new block driver state in the node graph
+               (json-string, optional)
+- "replaces": the block driver node name to replace when finished
+              (json-string, optional)
 - "mode": how an image file should be created into the target
   file/device (NewImageMode, optional, default 'absolute-paths')
 - "speed": maximum speed of the streaming job, in bytes per second
@@ -1921,19 +1926,28 @@ Each json-object contain the following:
 
 - "label": device's label (json-string)
 - "filename": device's file (json-string)
+- "frontend-open": open/closed state of the frontend device attached to this
+                   backend (json-bool)
 
 Example:
 
 -> { "execute": "query-chardev" }
 <- {
-      "return":[
+      "return": [
+         {
+            "label": "charchannel0",
+            "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.agent,server",
+            "frontend-open": false
+         },
          {
-            "label":"monitor",
-            "filename":"stdio"
+            "label": "charmonitor",
+            "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.monitor,server",
+            "frontend-open": true
          },
          {
-            "label":"serial0",
-            "filename":"vc"
+            "label": "charserial0",
+            "filename": "pty:/dev/pts/2",
+            "frontend-open": true
          }
       ]
    }
diff --git a/scripts/qapi-event.py b/scripts/qapi-event.py
index 3a1cd61914..601e3076ab 100644
--- a/scripts/qapi-event.py
+++ b/scripts/qapi-event.py
@@ -26,9 +26,8 @@ def _generate_event_api_name(event_name, params):
                 api_name += "bool has_%s,\n" % c_var(argname)
                 api_name += "".ljust(l)
 
-            if argentry == "str":
-                api_name += "const "
-            api_name += "%s %s,\n" % (c_type(argentry), c_var(argname))
+            api_name += "%s %s,\n" % (c_type(argentry, is_param=True),
+                                      c_var(argname))
             api_name += "".ljust(l)
 
     api_name += "Error **errp)"
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 54b97cb48e..f2c6d1f840 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -255,7 +255,7 @@ def check_event(expr, expr_info):
             if structured:
                 raise QAPIExprError(expr_info,
                                     "Nested structure define in event is not "
-                                    "supported now, event '%s', argname '%s'"
+                                    "supported, event '%s', argname '%s'"
                                     % (expr['event'], argname))
 
 def check_union(expr, expr_info):
diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index 97a81d8660..9a91af9dbf 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1138,6 +1138,8 @@
                 "POWER7 v2.3")
     POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,            POWER7P,
                 "POWER7+ v2.1")
+    POWERPC_DEF("POWER8E_v1.0",  CPU_POWERPC_POWER8E_v10,            POWER8E,
+                "POWER8E v1.0")
     POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10,             POWER8,
                 "POWER8 v1.0")
     POWERPC_DEF("970",           CPU_POWERPC_970,                    970,
@@ -1386,6 +1388,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
     { "POWER5gs", "POWER5+" },
     { "POWER7", "POWER7_v2.3" },
     { "POWER7+", "POWER7+_v2.1" },
+    { "POWER8E", "POWER8E_v1.0" },
     { "POWER8", "POWER8_v1.0" },
     { "970fx", "970fx_v3.1" },
     { "970mp", "970mp_v1.1" },
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index db75896012..c39d03a504 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -559,9 +559,12 @@ enum {
     CPU_POWERPC_POWER7P_BASE       = 0x004A0000,
     CPU_POWERPC_POWER7P_MASK       = 0xFFFF0000,
     CPU_POWERPC_POWER7P_v21        = 0x004A0201,
-    CPU_POWERPC_POWER8_BASE        = 0x004B0000,
+    CPU_POWERPC_POWER8E_BASE       = 0x004B0000,
+    CPU_POWERPC_POWER8E_MASK       = 0xFFFF0000,
+    CPU_POWERPC_POWER8E_v10        = 0x004B0100,
+    CPU_POWERPC_POWER8_BASE        = 0x004D0000,
     CPU_POWERPC_POWER8_MASK        = 0xFFFF0000,
-    CPU_POWERPC_POWER8_v10         = 0x004B0100,
+    CPU_POWERPC_POWER8_v10         = 0x004D0100,
     CPU_POWERPC_970                = 0x00390202,
     CPU_POWERPC_970FX_v10          = 0x00391100,
     CPU_POWERPC_970FX_v20          = 0x003C0200,
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 13c7031265..f1f0a52998 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -119,7 +119,9 @@ void ppc_cpu_dump_statistics(CPUState *cpu, FILE *f,
                              fprintf_function cpu_fprintf, int flags);
 hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int ppc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
+int ppc_cpu_gdb_read_register_apple(CPUState *cpu, uint8_t *buf, int reg);
 int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
 int ppc64_cpu_write_elf64_qemunote(WriteCoreDumpFunction f,
                                    CPUState *cpu, void *opaque);
 int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 74407ee209..08ae527cb6 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2012,7 +2012,7 @@ enum {
                         PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | \
                         PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
                         PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
-                        PPC2_ALTIVEC_207 | PPC2_ISA207S)
+                        PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP)
 };
 
 /*****************************************************************************/
diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c
index 381a3c7e31..694d303e19 100644
--- a/target-ppc/gdbstub.c
+++ b/target-ppc/gdbstub.c
@@ -21,6 +21,31 @@
 #include "qemu-common.h"
 #include "exec/gdbstub.h"
 
+static int ppc_gdb_register_len_apple(int n)
+{
+    switch (n) {
+    case 0 ... 31:
+        /* gprs */
+        return 8;
+    case 32 ... 63:
+        /* fprs */
+        return 8;
+    case 64 ... 95:
+        return 16;
+    case 64+32: /* nip */
+    case 65+32: /* msr */
+    case 67+32: /* lr */
+    case 68+32: /* ctr */
+    case 69+32: /* xer */
+    case 70+32: /* fpscr */
+        return 8;
+    case 66+32: /* cr */
+        return 4;
+    default:
+        return 0;
+    }
+}
+
 static int ppc_gdb_register_len(int n)
 {
     switch (n) {
@@ -132,6 +157,65 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
     return r;
 }
 
+int ppc_cpu_gdb_read_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    int r = ppc_gdb_register_len_apple(n);
+
+    if (!r) {
+        return r;
+    }
+
+    if (n < 32) {
+        /* gprs */
+        gdb_get_reg64(mem_buf, env->gpr[n]);
+    } else if (n < 64) {
+        /* fprs */
+        stfq_p(mem_buf, env->fpr[n-32]);
+    } else if (n < 96) {
+        /* Altivec */
+        stq_p(mem_buf, n - 64);
+        stq_p(mem_buf + 8, 0);
+    } else {
+        switch (n) {
+        case 64 + 32:
+            gdb_get_reg64(mem_buf, env->nip);
+            break;
+        case 65 + 32:
+            gdb_get_reg64(mem_buf, env->msr);
+            break;
+        case 66 + 32:
+            {
+                uint32_t cr = 0;
+                int i;
+                for (i = 0; i < 8; i++) {
+                    cr |= env->crf[i] << (32 - ((i + 1) * 4));
+                }
+                gdb_get_reg32(mem_buf, cr);
+                break;
+            }
+        case 67 + 32:
+            gdb_get_reg64(mem_buf, env->lr);
+            break;
+        case 68 + 32:
+            gdb_get_reg64(mem_buf, env->ctr);
+            break;
+        case 69 + 32:
+            gdb_get_reg64(mem_buf, env->xer);
+            break;
+        case 70 + 32:
+            gdb_get_reg64(mem_buf, env->fpscr);
+            break;
+        }
+    }
+    if (msr_le) {
+        /* If cpu is in LE mode, convert memory contents to LE. */
+        ppc_gdb_swap_register(mem_buf, n, r);
+    }
+    return r;
+}
+
 int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -185,3 +269,56 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     }
     return r;
 }
+int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    int r = ppc_gdb_register_len_apple(n);
+
+    if (!r) {
+        return r;
+    }
+    if (msr_le) {
+        /* If cpu is in LE mode, convert memory contents to LE. */
+        ppc_gdb_swap_register(mem_buf, n, r);
+    }
+    if (n < 32) {
+        /* gprs */
+        env->gpr[n] = ldq_p(mem_buf);
+    } else if (n < 64) {
+        /* fprs */
+        env->fpr[n-32] = ldfq_p(mem_buf);
+    } else {
+        switch (n) {
+        case 64 + 32:
+            env->nip = ldq_p(mem_buf);
+            break;
+        case 65 + 32:
+            ppc_store_msr(env, ldq_p(mem_buf));
+            break;
+        case 66 + 32:
+            {
+                uint32_t cr = ldl_p(mem_buf);
+                int i;
+                for (i = 0; i < 8; i++) {
+                    env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
+                }
+                break;
+            }
+        case 67 + 32:
+            env->lr = ldq_p(mem_buf);
+            break;
+        case 68 + 32:
+            env->ctr = ldq_p(mem_buf);
+            break;
+        case 69 + 32:
+            env->xer = ldq_p(mem_buf);
+            break;
+        case 70 + 32:
+            /* fpscr */
+            store_fpscr(env, ldq_p(mem_buf), 0xffffffff);
+            break;
+        }
+    }
+    return r;
+}
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 561f8ccf2f..2d87108d8b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -63,6 +63,7 @@ static int cap_ppc_smt;
 static int cap_ppc_rma;
 static int cap_spapr_tce;
 static int cap_spapr_multitce;
+static int cap_spapr_vfio;
 static int cap_hior;
 static int cap_one_reg;
 static int cap_epr;
@@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s)
     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
+    cap_spapr_vfio = false;
     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
@@ -1660,7 +1662,8 @@ bool kvmppc_spapr_use_multitce(void)
     return cap_spapr_multitce;
 }
 
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+                              bool vfio_accel)
 {
     struct kvm_create_spapr_tce args = {
         .liobn = liobn,
@@ -1674,7 +1677,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
      * destroying the table, which the upper layers -will- do
      */
     *pfd = -1;
-    if (!cap_spapr_tce) {
+    if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
         return NULL;
     }
 
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 412cc7f3c1..1118122d89 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -33,7 +33,8 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
 #ifndef CONFIG_USER_ONLY
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
 bool kvmppc_spapr_use_multitce(void);
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+                              bool vfio_accel);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
@@ -144,7 +145,8 @@ static inline bool kvmppc_spapr_use_multitce(void)
 }
 
 static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
-                                            uint32_t window_size, int *fd)
+                                            uint32_t window_size, int *fd,
+                                            bool vfio_accel)
 {
     return NULL;
 }
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 48017219a4..b23933f7bd 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -426,7 +426,6 @@ static inline uint32_t SPR(uint32_t opcode)
     return ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5);
 }
 /***                              Get constants                            ***/
-EXTRACT_HELPER(IMM, 12, 8);
 /* 16 bits signed immediate value */
 EXTRACT_SHELPER(SIMM, 0, 16);
 /* 16 bits unsigned immediate value */
@@ -459,8 +458,6 @@ EXTRACT_HELPER(FPFLM, 17, 8);
 EXTRACT_HELPER(FPW, 16, 1);
 
 /***                            Jump target decoding                       ***/
-/* Displacement */
-EXTRACT_SHELPER(d, 0, 16);
 /* Immediate address */
 static inline target_ulong LI(uint32_t opcode)
 {
@@ -2665,11 +2662,6 @@ static inline void gen_qemu_ld8u(DisasContext *ctx, TCGv arg1, TCGv arg2)
     tcg_gen_qemu_ld8u(arg1, arg2, ctx->mem_idx);
 }
 
-static inline void gen_qemu_ld8s(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    tcg_gen_qemu_ld8s(arg1, arg2, ctx->mem_idx);
-}
-
 static inline void gen_qemu_ld16u(DisasContext *ctx, TCGv arg1, TCGv arg2)
 {
     TCGMemOp op = MO_UW | ctx->default_tcg_memop_mask;
@@ -4123,8 +4115,9 @@ static void gen_mcrxr(DisasContext *ctx)
     tcg_gen_trunc_tl_i32(t0, cpu_so);
     tcg_gen_trunc_tl_i32(t1, cpu_ov);
     tcg_gen_trunc_tl_i32(dst, cpu_ca);
-    tcg_gen_shri_i32(t0, t0, 2);
-    tcg_gen_shri_i32(t1, t1, 1);
+    tcg_gen_shli_i32(t0, t0, 3);
+    tcg_gen_shli_i32(t1, t1, 2);
+    tcg_gen_shli_i32(dst, dst, 1);
     tcg_gen_or_i32(dst, dst, t0);
     tcg_gen_or_i32(dst, dst, t1);
     tcg_temp_free_i32(t0);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 85581c9537..a3bb336e16 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -34,6 +34,7 @@
 //#define PPC_DUMP_CPU
 //#define PPC_DEBUG_SPR
 //#define PPC_DUMP_SPR_ACCESSES
+/* #define USE_APPLE_GDB */
 
 /* For user-mode emulation, we don't emulate any IRQ controller */
 #if defined(CONFIG_USER_ONLY)
@@ -8188,16 +8189,16 @@ static void init_proc_POWER8(CPUPPCState *env)
     init_proc_book3s_64(env, BOOK3S_CPU_POWER8);
 }
 
-POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+POWERPC_FAMILY(POWER8E)(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
 
     dc->fw_name = "PowerPC,POWER8";
-    dc->desc = "POWER8";
+    dc->desc = "POWER8E";
     dc->props = powerpc_servercpu_properties;
-    pcc->pvr = CPU_POWERPC_POWER8_BASE;
-    pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
+    pcc->pvr = CPU_POWERPC_POWER8E_BASE;
+    pcc->pvr_mask = CPU_POWERPC_POWER8E_MASK;
     pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
     pcc->init_proc = init_proc_POWER8;
     pcc->check_pow = check_pow_nocheck;
@@ -8251,6 +8252,18 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->l1_icache_size = 0x8000;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
 }
+
+POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    ppc_POWER8E_cpu_family_class_init(oc, data);
+
+    dc->desc = "POWER8";
+    pcc->pvr = CPU_POWERPC_POWER8_BASE;
+    pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
+}
 #endif /* defined (TARGET_PPC64) */
 
 
@@ -9667,6 +9680,13 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
 #endif
 
     cc->gdb_num_core_regs = 71;
+
+#ifdef USE_APPLE_GDB
+    cc->gdb_read_register = ppc_cpu_gdb_read_register_apple;
+    cc->gdb_write_register = ppc_cpu_gdb_write_register_apple;
+    cc->gdb_num_core_regs = 71 + 32;
+#endif
+
 #if defined(TARGET_PPC64)
     cc->gdb_core_xml_file = "power64-core.xml";
 #else
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index dd84e76127..203027eb3e 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -1719,6 +1719,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 # define LINK_AREA_SIZE                (6 * SZR)
 # define LR_OFFSET                     (1 * SZR)
 # define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
+#elif defined(TCG_TARGET_CALL_DARWIN)
+# define LINK_AREA_SIZE                (6 * SZR)
+# define LR_OFFSET                     (2 * SZR)
 #elif TCG_TARGET_REG_BITS == 64
 # if defined(_CALL_ELF) && _CALL_ELF == 2
 #  define LINK_AREA_SIZE               (4 * SZR)
@@ -1728,9 +1731,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 # if defined(_CALL_SYSV)
 #  define LINK_AREA_SIZE               (2 * SZR)
 #  define LR_OFFSET                    (1 * SZR)
-# elif defined(TCG_TARGET_CALL_DARWIN)
-#  define LINK_AREA_SIZE               24
-#  define LR_OFFSET                    8
 # endif
 #endif
 #ifndef LR_OFFSET
diff --git a/tests/qapi-schema/event-nest-struct.err b/tests/qapi-schema/event-nest-struct.err
index e4a0faac9c..91bde1c967 100644
--- a/tests/qapi-schema/event-nest-struct.err
+++ b/tests/qapi-schema/event-nest-struct.err
@@ -1 +1 @@
-tests/qapi-schema/event-nest-struct.json:1: Nested structure define in event is not supported now, event 'EVENT_A', argname 'a'
+tests/qapi-schema/event-nest-struct.json:1: Nested structure define in event is not supported, event 'EVENT_A', argname 'a'
diff --git a/tests/qemu-iotests/031 b/tests/qemu-iotests/031
index 1d920ea87a..2a77ba8cbb 100755
--- a/tests/qemu-iotests/031
+++ b/tests/qemu-iotests/031
@@ -56,22 +56,22 @@ for IMGOPTS in "compat=0.10" "compat=1.1"; do
     echo === Create image with unknown header extension ===
     echo
     _make_test_img 64M
-    ./qcow2.py "$TEST_IMG" add-header-ext 0x12345678 "This is a test header extension"
-    ./qcow2.py "$TEST_IMG" dump-header
+    $PYTHON qcow2.py "$TEST_IMG" add-header-ext 0x12345678 "This is a test header extension"
+    $PYTHON qcow2.py "$TEST_IMG" dump-header
     _check_test_img
 
     echo
     echo === Rewrite header with no backing file ===
     echo
     $QEMU_IMG rebase -u -b "" "$TEST_IMG"
-    ./qcow2.py "$TEST_IMG" dump-header
+    $PYTHON qcow2.py "$TEST_IMG" dump-header
     _check_test_img
 
     echo
     echo === Add a backing file and format ===
     echo
     $QEMU_IMG rebase -u -b "/some/backing/file/path" -F host_device "$TEST_IMG"
-    ./qcow2.py "$TEST_IMG" dump-header
+    $PYTHON qcow2.py "$TEST_IMG" dump-header
 done
 
 # success, all done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 03b6aa9de7..a77365347d 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -53,15 +53,15 @@ IMGOPTS="compat=1.1"
 echo === Create image with unknown autoclear feature bit ===
 echo
 _make_test_img 64M
-./qcow2.py "$TEST_IMG" set-feature-bit autoclear 63
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 63
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 
 echo
 echo === Repair image ===
 echo
 _check_test_img -r all
 
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 
 # success, all done
 echo "*** done"
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index 27fe4bdacc..84c9167660 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -63,7 +63,7 @@ _make_test_img $size
 $QEMU_IO -c "write -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io
 
 # The dirty bit must not be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 _check_test_img
 
 echo
@@ -75,7 +75,7 @@ _make_test_img $size
 _no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io
 
 # The dirty bit must be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 _check_test_img
 
 echo
@@ -84,7 +84,7 @@ echo "== Read-only access must still work =="
 $QEMU_IO -r -c "read -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io
 
 # The dirty bit must be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 echo
 echo "== Repairing the image file must succeed =="
@@ -92,7 +92,7 @@ echo "== Repairing the image file must succeed =="
 _check_test_img -r all
 
 # The dirty bit must not be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 echo
 echo "== Data should still be accessible after repair =="
@@ -108,12 +108,12 @@ _make_test_img $size
 _no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io
 
 # The dirty bit must be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 $QEMU_IO -c "write 0 512" "$TEST_IMG" | _filter_qemu_io
 
 # The dirty bit must not be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 echo
 echo "== Creating an image file with lazy_refcounts=off =="
@@ -124,7 +124,7 @@ _make_test_img $size
 _no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io
 
 # The dirty bit must not be set since lazy_refcounts=off
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 _check_test_img
 
 echo
@@ -140,8 +140,8 @@ $QEMU_IO -c "write 0 512" "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG commit "$TEST_IMG"
 
 # The dirty bit must not be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
-./qcow2.py "$TEST_IMG".base dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG".base dump-header | grep incompatible_features
 
 _check_test_img
 TEST_IMG="$TEST_IMG".base _check_test_img
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 734b6a6bb4..d1668109dd 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -35,12 +35,13 @@ test_img = os.path.join(iotests.test_dir, 'test.img')
 class ImageCommitTestCase(iotests.QMPTestCase):
     '''Abstract base class for image commit test cases'''
 
-    def run_commit_test(self, top, base):
+    def run_commit_test(self, top, base, need_ready=False):
         self.assert_no_active_block_jobs()
         result = self.vm.qmp('block-commit', device='drive0', top=top, base=base)
         self.assert_qmp(result, 'return', {})
 
         completed = False
+        ready = False
         while not completed:
             for event in self.vm.get_qmp_events(wait=True):
                 if event['event'] == 'BLOCK_JOB_COMPLETED':
@@ -48,8 +49,11 @@ class ImageCommitTestCase(iotests.QMPTestCase):
                     self.assert_qmp(event, 'data/device', 'drive0')
                     self.assert_qmp(event, 'data/offset', self.image_len)
                     self.assert_qmp(event, 'data/len', self.image_len)
+                    if need_ready:
+                        self.assertTrue(ready, "Expecting BLOCK_JOB_COMPLETED event")
                     completed = True
                 elif event['event'] == 'BLOCK_JOB_READY':
+                    ready = True
                     self.assert_qmp(event, 'data/type', 'commit')
                     self.assert_qmp(event, 'data/device', 'drive0')
                     self.assert_qmp(event, 'data/len', self.image_len)
@@ -63,7 +67,7 @@ class TestSingleDrive(ImageCommitTestCase):
     test_len = 1 * 1024 * 256
 
     def setUp(self):
-        iotests.create_image(backing_img, TestSingleDrive.image_len)
+        iotests.create_image(backing_img, self.image_len)
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
         qemu_io('-c', 'write -P 0xab 0 524288', backing_img)
@@ -105,7 +109,7 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
 
     def test_top_is_active(self):
-        self.run_commit_test(test_img, backing_img)
+        self.run_commit_test(test_img, backing_img, need_ready=True)
         self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
         self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
 
@@ -238,6 +242,8 @@ class TestSetSpeed(ImageCommitTestCase):
 
         self.cancel_and_wait(resume=True)
 
+class TestActiveZeroLengthImage(TestSingleDrive):
+    image_len = 0
 
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
index b6f257674e..42314e9c00 100644
--- a/tests/qemu-iotests/040.out
+++ b/tests/qemu-iotests/040.out
@@ -1,5 +1,5 @@
-................
+........................
 ----------------------------------------------------------------------
-Ran 16 tests
+Ran 24 tests
 
 OK
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index ec470b2007..0815e19274 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -28,6 +28,12 @@ target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img')
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
 
+quorum_img1 = os.path.join(iotests.test_dir, 'quorum1.img')
+quorum_img2 = os.path.join(iotests.test_dir, 'quorum2.img')
+quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img')
+quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img')
+quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img')
+
 class ImageMirroringTestCase(iotests.QMPTestCase):
     '''Abstract base class for image mirroring test cases'''
 
@@ -42,8 +48,8 @@ class ImageMirroringTestCase(iotests.QMPTestCase):
                     ready = True
 
     def wait_ready_and_cancel(self, drive='drive0'):
-        self.wait_ready(drive)
-        event = self.cancel_and_wait()
+        self.wait_ready(drive=drive)
+        event = self.cancel_and_wait(drive=drive)
         self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
         self.assert_qmp(event, 'data/type', 'mirror')
         self.assert_qmp(event, 'data/offset', self.image_len)
@@ -52,19 +58,19 @@ class ImageMirroringTestCase(iotests.QMPTestCase):
     def complete_and_wait(self, drive='drive0', wait_ready=True):
         '''Complete a block job and wait for it to finish'''
         if wait_ready:
-            self.wait_ready()
+            self.wait_ready(drive=drive)
 
         result = self.vm.qmp('block-job-complete', device=drive)
         self.assert_qmp(result, 'return', {})
 
-        event = self.wait_until_completed()
+        event = self.wait_until_completed(drive=drive)
         self.assert_qmp(event, 'data/type', 'mirror')
 
 class TestSingleDrive(ImageMirroringTestCase):
     image_len = 1 * 1024 * 1024 # MB
 
     def setUp(self):
-        iotests.create_image(backing_img, TestSingleDrive.image_len)
+        iotests.create_image(backing_img, self.image_len)
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img)
         self.vm = iotests.VM().add_drive(test_img)
         self.vm.launch()
@@ -163,7 +169,7 @@ class TestSingleDrive(ImageMirroringTestCase):
         self.assert_no_active_block_jobs()
 
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d'
-                        % (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img)
+                        % (self.image_len, self.image_len), target_img)
         result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
                              buf_size=65536, mode='existing', target=target_img)
         self.assert_qmp(result, 'return', {})
@@ -179,7 +185,7 @@ class TestSingleDrive(ImageMirroringTestCase):
         self.assert_no_active_block_jobs()
 
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
-                        % (TestSingleDrive.image_len, backing_img), target_img)
+                        % (self.image_len, backing_img), target_img)
         result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
                              mode='existing', target=target_img)
         self.assert_qmp(result, 'return', {})
@@ -206,6 +212,11 @@ class TestSingleDrive(ImageMirroringTestCase):
                              target=target_img)
         self.assert_qmp(result, 'error/class', 'DeviceNotFound')
 
+class TestSingleDriveZeroLength(TestSingleDrive):
+    image_len = 0
+    test_small_buffer2 = None
+    test_large_cluster = None
+
 class TestMirrorNoBacking(ImageMirroringTestCase):
     image_len = 2 * 1024 * 1024 # MB
 
@@ -718,5 +729,187 @@ class TestUnbackedSource(ImageMirroringTestCase):
         self.complete_and_wait()
         self.assert_no_active_block_jobs()
 
+class TestRepairQuorum(ImageMirroringTestCase):
+    """ This class test quorum file repair using drive-mirror.
+        It's mostly a fork of TestSingleDrive """
+    image_len = 1 * 1024 * 1024 # MB
+    IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
+
+    def setUp(self):
+        self.vm = iotests.VM()
+
+        # Add each individual quorum images
+        for i in self.IMAGES:
+            qemu_img('create', '-f', iotests.imgfmt, i,
+                     str(TestSingleDrive.image_len))
+            # Assign a node name to each quorum image in order to manipulate
+            # them
+            opts = "node-name=img%i" % self.IMAGES.index(i)
+            self.vm = self.vm.add_drive(i, opts)
+
+        self.vm.launch()
+
+        #assemble the quorum block device from the individual files
+        args = { "options" : { "driver": "quorum", "id": "quorum0",
+                 "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } }
+        result = self.vm.qmp("blockdev-add", **args)
+        self.assert_qmp(result, 'return', {})
+
+
+    def tearDown(self):
+        self.vm.shutdown()
+        for i in self.IMAGES + [ quorum_repair_img ]:
+            # Do a try/except because the test may have deleted some images
+            try:
+                os.remove(i)
+            except OSError:
+                pass
+
+    def test_complete(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name="repair0",
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+        # TODO: a better test requiring some QEMU infrastructure will be added
+        #       to check that this file is really driven by quorum
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_cancel(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name="repair0",
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.cancel_and_wait(drive="quorum0", force=True)
+        # here we check that the last registered quorum file has not been
+        # swapped out and unref
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_img3)
+        self.vm.shutdown()
+
+    def test_cancel_after_ready(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name="repair0",
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_ready_and_cancel(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        # here we check that the last registered quorum file has not been
+        # swapped out and unref
+        self.assert_qmp(result, 'return[0]/file', quorum_img3)
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_pause(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name="repair0",
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-pause', device='quorum0')
+        self.assert_qmp(result, 'return', {})
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        offset = self.dictpath(result, 'return[0]/offset')
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/offset', offset)
+
+        result = self.vm.qmp('block-job-resume', device='quorum0')
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+                        'target image does not match source after mirroring')
+
+    def test_medium_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full',
+                             node_name='repair0',
+                             replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_image_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name='repair0',
+                             replaces='img1',
+                             mode='existing',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_device_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full',
+                             node_name='repair0',
+                             replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_wrong_sync_mode(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0',
+                             node_name='repair0',
+                             replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_no_node_name(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_unexistant_replaces(self):
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name='repair0',
+                             replaces='img77',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_after_a_quorum_snapshot(self):
+        result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1',
+                             snapshot_file=quorum_snapshot_file,
+                             snapshot_node_name="snap1");
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name='repair0',
+                             replaces="img1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+                             node_name='repair0',
+                             replaces="snap1",
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait(drive="quorum0")
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+        # TODO: a better test requiring some QEMU infrastructure will be added
+        #       to check that this file is really driven by quorum
+        self.vm.shutdown()
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 6d9bee1a4b..42147c0b58 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-...........................
+..............................................
 ----------------------------------------------------------------------
-Ran 27 tests
+Ran 46 tests
 
 OK
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index c4af131a66..a41334e022 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -92,6 +92,7 @@ echo
 
 run_qemu -drive file="$TEST_IMG",format=foo
 run_qemu -drive file="$TEST_IMG",driver=foo
+run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2
 
 echo
 echo === Overriding backing file ===
@@ -99,6 +100,11 @@ echo
 
 echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults
 
+# Drivers that don't support backing files
+run_qemu -drive file="$TEST_IMG",driver=raw,backing.file.filename="$TEST_IMG.orig"
+run_qemu -drive file="$TEST_IMG",file.backing.driver=file,file.backing.filename="$TEST_IMG.orig"
+run_qemu -drive file="$TEST_IMG",file.backing.driver=qcow2,file.backing.file.filename="$TEST_IMG.orig"
+
 echo
 echo === Enable and disable lazy refcounting on the command line, plus some invalid values ===
 echo
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 31e329e893..d7b0f503af 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -38,7 +38,10 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=foo
 QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=foo: 'foo' invalid format
 
 Testing: -drive file=TEST_DIR/t.qcow2,driver=foo
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Invalid driver: 'foo'
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Unknown driver 'foo'
+
+Testing: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2: could not open disk image TEST_DIR/t.qcow2: Driver specified twice
 
 
 === Overriding backing file ===
@@ -50,6 +53,15 @@ ide0-hd0: TEST_DIR/t.qcow2 (qcow2)
     Backing file:     TEST_DIR/t.qcow2.orig (chain depth: 1)
 (qemu) qququiquit
 
+Testing: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files
+
+Testing: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files
+
+Testing: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files
+
 
 === Enable and disable lazy refcounting on the command line, plus some invalid values ===
 
diff --git a/tests/qemu-iotests/054 b/tests/qemu-iotests/054
index c8b7082b4e..bd94153d66 100755
--- a/tests/qemu-iotests/054
+++ b/tests/qemu-iotests/054
@@ -49,7 +49,7 @@ _make_test_img $((1024*1024))T
 echo
 echo "creating too large image (1 EB) using qcow2.py"
 _make_test_img 4G
-./qcow2.py "$TEST_IMG" set-header size $((1024 ** 6))
+$PYTHON qcow2.py "$TEST_IMG" set-header size $((1024 ** 6))
 _check_test_img
 
 # success, all done
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index f0116aab1d..3cffc12fec 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -68,13 +68,13 @@ poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
 _check_test_img
 
 # The corrupt bit should not be set anyway
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Try to write something, thereby forcing the corrupt bit to be set
 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
 
 # The corrupt bit must now be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Try to open the image R/W (which should fail)
 $QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
@@ -99,19 +99,19 @@ poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
 # Redirect new data cluster onto refcount block
 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
 _check_test_img
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Try to fix it
 _check_test_img -r all
 
 # The corrupt bit should be cleared
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Look if it's really really fixed
 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 echo
 echo "=== Testing cluster data reference into inactive L2 table ==="
@@ -124,13 +124,13 @@ $QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
 poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
                       "\x80\x00\x00\x00\x00\x04\x00\x00"
 _check_test_img
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 $QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 _check_test_img -r all
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 $QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 
 # Check data
 $QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index d3a6b388b5..ab98def6d4 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -48,9 +48,9 @@ echo "=== Testing version downgrade with zero expansion ==="
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
 $QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
@@ -59,9 +59,9 @@ echo "=== Testing dirty version downgrade ==="
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
@@ -69,11 +69,11 @@ echo
 echo "=== Testing version downgrade with unknown compat/autoclear flags ==="
 echo
 IMGOPTS="compat=1.1" _make_test_img 64M
-./qcow2.py "$TEST_IMG" set-feature-bit compatible 42
-./qcow2.py "$TEST_IMG" set-feature-bit autoclear 42
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" set-feature-bit compatible 42
+$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 42
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 _check_test_img
 
 echo
@@ -81,9 +81,9 @@ echo "=== Testing version upgrade and resize ==="
 echo
 IMGOPTS="compat=0.10" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 42M 64k" "$TEST_IMG" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=1.1,lazy_refcounts=on,size=128M" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0x2a 42M 64k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
@@ -92,9 +92,9 @@ echo "=== Testing dirty lazy_refcounts=off ==="
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "lazy_refcounts=off" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065
index ab5445f62d..e89b61d70b 100755
--- a/tests/qemu-iotests/065
+++ b/tests/qemu-iotests/065
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 #
 # Test for additional information emitted by qemu-img info on qcow2
 # images
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index b512d00cc8..7ae4be2053 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -134,15 +134,28 @@ run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
 EOF
 
 echo
+echo "== using quorum rewrite corrupted mode =="
+
+quorum="$quorum,file.rewrite-corrupted=on"
+
+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
+
+echo
+echo "== checking that quorum has corrected the corrupted file =="
+
+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
+echo
 echo "== breaking quorum =="
 
 $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
 echo
 echo "== checking that quorum is broken =="
 
 $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
 
-
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 2241cec148..073515ea50 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -40,9 +40,19 @@ read 10485760/10485760 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
 
 
+== using quorum rewrite corrupted mode ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== checking that quorum has corrected the corrupted file ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
 == breaking quorum ==
 wrote 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 == checking that quorum is broken ==
 qemu-io: can't open: Could not read image for determining its format: Input/output error
diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083
index f764534782..991a9d91db 100755
--- a/tests/qemu-iotests/083
+++ b/tests/qemu-iotests/083
@@ -44,7 +44,7 @@ choose_tcp_port() {
 
 wait_for_tcp_port() {
 	while ! (netstat --tcp --listening --numeric | \
-		 grep "$1.*0.0.0.0:\*.*LISTEN") 2>&1 >/dev/null; do
+		 grep "$1.*0\\.0\\.0\\.0:\\*.*LISTEN") 2>&1 >/dev/null; do
 		sleep 0.1
 	done
 }
@@ -55,8 +55,8 @@ filter_nbd() {
 	# callbacks sometimes, making them unreliable.
 	#
 	# Filter out the TCP port number since this changes between runs.
-	sed -e 's#^nbd.c:.*##g' \
-	    -e 's#nbd:127.0.0.1:[^:]*:#nbd:127.0.0.1:PORT:#g'
+	sed -e 's#^.*nbd\.c:.*##g' \
+	    -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g'
 }
 
 check_disconnect() {
@@ -81,8 +81,8 @@ EOF
 		nbd_url="nbd:127.0.0.1:$port:exportname=foo"
 	fi
 
-	./nbd-fault-injector.py $extra_args "127.0.0.1:$port" "$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null &
-	wait_for_tcp_port "127.0.0.1:$port"
+	$PYTHON nbd-fault-injector.py $extra_args "127.0.0.1:$port" "$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null &
+	wait_for_tcp_port "127\\.0\\.0\\.1:$port"
 	$QEMU_IO -c "read 0 512" "$nbd_url" 2>&1 | _filter_qemu_io | filter_nbd
 
 	echo
diff --git a/tests/qemu-iotests/095 b/tests/qemu-iotests/095
new file mode 100755
index 0000000000..acc7dbf182
--- /dev/null
+++ b/tests/qemu-iotests/095
@@ -0,0 +1,86 @@
+#!/bin/bash
+#
+# Test for commit of larger active layer
+#
+# This tests live snapshots of images on a running QEMU instance, using
+# QMP commands.  Both single disk snapshots, and transactional group
+# snapshots are performed.
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# creator
+owner=jcody@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_qemu
+    rm  -f "${TEST_IMG}.base" "${TEST_IMG}.snp1"
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+size_smaller=5M
+size_larger=100M
+
+_make_test_img $size_smaller
+mv "${TEST_IMG}" "${TEST_IMG}.base"
+
+_make_test_img -b "${TEST_IMG}.base" $size_larger
+mv "${TEST_IMG}" "${TEST_IMG}.snp1"
+
+_make_test_img -b "${TEST_IMG}.snp1" $size_larger
+
+echo
+echo "=== Base image info before commit and resize ==="
+$QEMU_IMG info "${TEST_IMG}.base" | _filter_testdir
+
+echo
+echo === Running QEMU Live Commit Test ===
+echo
+
+qemu_comm_method="qmp"
+_launch_qemu -drive file="${TEST_IMG}",if=virtio,id=test
+h=$QEMU_HANDLE
+
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" "return"
+
+_send_qemu_cmd $h "{ 'execute': 'block-commit',
+                                 'arguments': { 'device': 'test',
+                                 'top': '"${TEST_IMG}.snp1"' } }" "BLOCK_JOB_COMPLETED"
+
+echo
+echo "=== Base image info after commit and resize ==="
+$QEMU_IMG info "${TEST_IMG}.base" | _filter_testdir
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/095.out b/tests/qemu-iotests/095.out
new file mode 100644
index 0000000000..5864ddac2b
--- /dev/null
+++ b/tests/qemu-iotests/095.out
@@ -0,0 +1,31 @@
+QA output created by 095
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=5242880 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=104857600 backing_file='TEST_DIR/t.IMGFMT.base' 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=104857600 backing_file='TEST_DIR/t.IMGFMT.snp1' 
+
+=== Base image info before commit and resize ===
+image: TEST_DIR/t.qcow2.base
+file format: qcow2
+virtual size: 5.0M (5242880 bytes)
+disk size: 196K
+cluster_size: 65536
+Format specific information:
+    compat: 1.1
+    lazy refcounts: false
+
+=== Running QEMU Live Commit Test ===
+
+{"return": {}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "test", "len": 104857600, "offset": 104857600, "speed": 0, "type": "commit"}}
+
+=== Base image info after commit and resize ===
+image: TEST_DIR/t.qcow2.base
+file format: qcow2
+virtual size: 100M (104857600 bytes)
+disk size: 196K
+cluster_size: 65536
+Format specific information:
+    compat: 1.1
+    lazy refcounts: false
+*** done
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index e2ed5a95f8..8ca40116d7 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -34,22 +34,95 @@ timestamp=${TIMESTAMP:=false}
 # generic initialization
 iam=check
 
+_init_error()
+{
+    echo "$iam: $1" >&2
+    exit 1
+}
+
+if [ -L "$0" ]
+then
+    # called from the build tree
+    source_iotests=$(dirname "$(readlink "$0")")
+    if [ -z "$source_iotests" ]
+    then
+        _init_error "failed to obtain source tree name from check symlink"
+    fi
+    source_iotests=$(cd "$source_iotests"; pwd) || _init_error "failed to enter source tree"
+    build_iotests=$PWD
+else
+    # called from the source tree
+    source_iotests=$PWD
+    # this may be an in-tree build (note that in the following code we may not
+    # assume that it truly is and have to test whether the build results
+    # actually exist)
+    build_iotests=$PWD
+fi
+
+build_root="$build_iotests/../.."
+
+if [ -x "$build_iotests/socket_scm_helper" ]
+then
+    export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper"
+fi
+
+# if ./qemu exists, it should be prioritized and will be chosen by common.config
+if [[ -z "$QEMU_PROG" && ! -x './qemu' ]]
+then
+    arch=$(uname -m 2> /dev/null)
+
+    if [[ -n $arch && -x "$build_root/$arch-softmmu/qemu-system-$arch" ]]
+    then
+        export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch"
+    else
+        pushd "$build_root" > /dev/null
+        for binary in *-softmmu/qemu-system-*
+        do
+            if [ -x "$binary" ]
+            then
+                export QEMU_PROG="$build_root/$binary"
+                break
+            fi
+        done
+        popd > /dev/null
+    fi
+fi
+
+if [[ -z $QEMU_IMG_PROG && -x "$build_root/qemu-img" && ! -x './qemu-img' ]]
+then
+    export QEMU_IMG_PROG="$build_root/qemu-img"
+fi
+
+if [[ -z $QEMU_IO_PROG && -x "$build_root/qemu-io" && ! -x './qemu-io' ]]
+then
+    export QEMU_IO_PROG="$build_root/qemu-io"
+fi
+
+if [[ -z $QEMU_NBD_PROG && -x "$build_root/qemu-nbd" && ! -x './qemu-nbd' ]]
+then
+    export QEMU_NBD_PROG="$build_root/qemu-nbd"
+fi
+
+# we need common.env
+if ! . "$build_iotests/common.env"
+then
+    _init_error "failed to source common.env (make sure the qemu-iotests are run from tests/qemu-iotests in the build tree)"
+fi
+
 # we need common.config
-if ! . ./common.config
+if ! . "$source_iotests/common.config"
 then
-    echo "$iam: failed to source common.config"
-    exit 1
+    _init_error "failed to source common.config"
 fi
 
 # we need common.rc
-if ! . ./common.rc
+if ! . "$source_iotests/common.rc"
 then
-    echo "check: failed to source common.rc"
-    exit 1
+    _init_error "failed to source common.rc"
 fi
 
 # we need common
-. ./common
+. "$source_iotests/common"
 
 #if [ `id -u` -ne 0 ]
 #then
@@ -194,7 +267,7 @@ do
         echo " - expunged"
         rm -f $seq.out.bad
         echo "/^$seq\$/d" >>$tmp.expunged
-    elif [ ! -f $seq ]
+    elif [ ! -f "$source_iotests/$seq" ]
     then
         echo " - no such test?"
         echo "/^$seq\$/d" >>$tmp.expunged
@@ -215,9 +288,16 @@ do
 
         start=`_wallclock`
         $timestamp && echo -n "        ["`date "+%T"`"]"
-        [ ! -x $seq ] && chmod u+x $seq # ensure we can run it
+
+        if [ "$(head -n 1 "$source_iotests/$seq")" == "#!/usr/bin/env python" ]; then
+            run_command="$PYTHON $seq"
+        else
+            run_command="./$seq"
+        fi
+        export OUTPUT_DIR=$PWD
+        (cd "$source_iotests";
         MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \
-                ./$seq >$tmp.out 2>&1
+                $run_command >$tmp.out 2>&1)
         sts=$?
         $timestamp && _timestamp
         stop=`_wallclock`
@@ -242,17 +322,17 @@ do
                 err=true
             fi
 
-            reference=$seq.out
+            reference="$source_iotests/$seq.out"
             if [ "$CACHEMODE" = "none" ]; then
-                [ -f $seq.out.nocache ] && reference=$seq.out.nocache
+                [ -f "$source_iotests/$seq.out.nocache" ] && reference="$source_iotests/$seq.out.nocache"
             fi
 
-            if [ ! -f $reference ]
+            if [ ! -f "$reference" ]
             then
                 echo " - no qualified output"
                 err=true
             else
-                if diff -w $reference $tmp.out >/dev/null 2>&1
+                if diff -w "$reference" $tmp.out >/dev/null 2>&1
                 then
                     echo ""
                     if $err
@@ -264,7 +344,7 @@ do
                 else
                     echo " - output mismatch (see $seq.out.bad)"
                     mv $tmp.out $seq.out.bad
-                    $diff -w $reference $seq.out.bad
+                    $diff -w "$reference" $seq.out.bad
                     err=true
                 fi
             fi
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 0aaf84d015..e4083f4212 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -25,8 +25,7 @@ _setenvironment()
     export MSGVERB
 }
 
-here=`pwd`
-rm -f $here/$iam.out
+rm -f "$OUTPUT_DIR/$iam.out"
 _setenvironment
 
 check=${check-true}
@@ -59,7 +58,7 @@ do
     if $group
     then
         # arg after -g
-        group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
+        group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
 s/ .*//p
 }'`
         if [ -z "$group_list" ]
@@ -84,7 +83,7 @@ s/ .*//p
     then
         # arg after -x
         [ ! -s $tmp.list ] && ls [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] >$tmp.list 2>/dev/null
-        group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
+        group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
 s/ .*//p
 }'`
         if [ -z "$group_list" ]
@@ -366,7 +365,7 @@ testlist options
 BEGIN        { for (t='$start'; t<='$end'; t++) printf "%03d\n",t }' \
         | while read id
         do
-            if grep -s "^$id " group >/dev/null
+            if grep -s "^$id " "$source_iotests/group" >/dev/null
             then
                 # in group file ... OK
                 echo $id >>$tmp.list
@@ -402,7 +401,7 @@ else
         touch $tmp.list
     else
         # no test numbers, do everything from group file
-        sed -n -e '/^[0-9][0-9][0-9]*/s/[         ].*//p' <group >$tmp.list
+        sed -n -e '/^[0-9][0-9][0-9]*/s/[         ].*//p' <"$source_iotests/group" >$tmp.list
     fi
 fi
 
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index d90a8bca8b..bd6790be63 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -126,7 +126,7 @@ fi
 export TEST_DIR
 
 if [ -z "$SAMPLE_IMG_DIR" ]; then
-        SAMPLE_IMG_DIR=`pwd`/sample_images
+        SAMPLE_IMG_DIR="$source_iotests/sample_images"
 fi
 
 if [ ! -d "$SAMPLE_IMG_DIR" ]; then
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 195c5646aa..e0ea7e3a7d 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -318,9 +318,9 @@ _do()
         status=1; exit
     fi
 
-    (eval "echo '---' \"$_cmd\"") >>$here/$seq.full
+    (eval "echo '---' \"$_cmd\"") >>"$OUTPUT_DIR/$seq.full"
     (eval "$_cmd") >$tmp._out 2>&1; ret=$?
-    cat $tmp._out >>$here/$seq.full
+    cat $tmp._out >>"$OUTPUT_DIR/$seq.full"
     if [ $# -eq 2 ]; then
         if [ $ret -eq 0 ]; then
             echo "done"
@@ -344,7 +344,7 @@ _do()
 #
 _notrun()
 {
-    echo "$*" >$seq.notrun
+    echo "$*" >"$OUTPUT_DIR/$seq.notrun"
     echo "$seq not run: $*"
     status=0
     exit
@@ -354,7 +354,7 @@ _notrun()
 #
 _fail()
 {
-    echo "$*" | tee -a $here/$seq.full
+    echo "$*" | tee -a "$OUTPUT_DIR/$seq.full"
     echo "(see $seq.full for details)"
     status=1
     exit 1
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 0f074403ae..e3dc4e8754 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -99,3 +99,4 @@
 090 rw auto quick
 091 rw auto
 092 rw auto quick
+095 rw auto
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index f6c437c0c3..39a4cfcf4d 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -37,6 +37,7 @@ qemu_args = os.environ.get('QEMU', 'qemu').strip().split(' ')
 imgfmt = os.environ.get('IMGFMT', 'raw')
 imgproto = os.environ.get('IMGPROTO', 'file')
 test_dir = os.environ.get('TEST_DIR', '/var/tmp')
+output_dir = os.environ.get('OUTPUT_DIR', '.')
 cachemode = os.environ.get('CACHEMODE')
 
 socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper')
@@ -278,7 +279,7 @@ def notrun(reason):
     # Each test in qemu-iotests has a number ("seq")
     seq = os.path.basename(sys.argv[0])
 
-    open('%s.notrun' % seq, 'wb').write(reason + '\n')
+    open('%s/%s.notrun' % (output_dir, seq), 'wb').write(reason + '\n')
     print '%s not run: %s' % (seq, reason)
     sys.exit(0)
 
diff --git a/trace-events b/trace-events
index ba01ad52cf..d071b97dd7 100644
--- a/trace-events
+++ b/trace-events
@@ -1169,12 +1169,13 @@ qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride,
 qxl_render_update_area_done(void *cookie) "%p"
 
 # hw/ppc/spapr_pci.c
-spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)"
+spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=%x)"
 spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64
-spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u"
+spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr %x func %u, requested %u, first irq %u"
 spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
 spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u"
 spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at %x asked %u, have only %u"
 
 # hw/intc/xics.c
 xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x"
@@ -1188,6 +1189,12 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
 xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
 xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
 xics_ics_eoi(int nr) "ics_eoi: irq %#x"
+xics_alloc(int src, int irq) "source#%d, irq %d"
+xics_alloc_failed_hint(int src, int irq) "source#%d, irq %d is already in use"
+xics_alloc_failed_no_left(int src) "source#%d, no irq left"
+xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d"
+xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs"
+xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free"
 
 # hw/ppc/spapr.c
 spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes"
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 43de3add29..6dc27ce04f 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -1111,6 +1111,7 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst,
     size_t num_opts, num_dst_opts;
     QemuOptDesc *desc;
     bool need_init = false;
+    bool need_head_update;
 
     if (!list) {
         return dst;
@@ -1121,6 +1122,12 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst,
      */
     if (!dst) {
         need_init = true;
+        need_head_update = true;
+    } else {
+        /* Moreover, even if dst is not NULL, the realloc may move it to a
+         * different address in which case we may get a stale tail pointer
+         * in dst->head. */
+        need_head_update = QTAILQ_EMPTY(&dst->head);
     }
 
     num_opts = count_opts_list(dst);
@@ -1131,9 +1138,11 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst,
     if (need_init) {
         dst->name = NULL;
         dst->implied_opt_name = NULL;
-        QTAILQ_INIT(&dst->head);
         dst->merge_lists = false;
     }
+    if (need_head_update) {
+        QTAILQ_INIT(&dst->head);
+    }
     dst->desc[num_dst_opts].name = NULL;
 
     /* append list->desc to dst->desc */
diff --git a/vmstate.c b/vmstate.c
index c9965205df..ef2f87bdad 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -43,11 +43,18 @@ static int vmstate_size(void *opaque, VMStateField *field)
     return size;
 }
 
-static void *vmstate_base_addr(void *opaque, VMStateField *field)
+static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
 {
     void *base_addr = opaque + field->offset;
 
     if (field->flags & VMS_POINTER) {
+        if (alloc && (field->flags & VMS_ALLOC)) {
+            int n_elems = vmstate_n_elems(opaque, field);
+            if (n_elems) {
+                gsize size = n_elems * field->size;
+                *((void **)base_addr + field->start) = g_malloc(size);
+            }
+        }
         base_addr = *(void **)base_addr + field->start;
     }
 
@@ -81,7 +88,7 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
              field->field_exists(opaque, version_id)) ||
             (!field->field_exists &&
              field->version_id <= version_id)) {
-            void *base_addr = vmstate_base_addr(opaque, field);
+            void *base_addr = vmstate_base_addr(opaque, field, true);
             int i, n_elems = vmstate_n_elems(opaque, field);
             int size = vmstate_size(opaque, field);
 
@@ -135,7 +142,7 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
     while (field->name) {
         if (!field->field_exists ||
             field->field_exists(opaque, vmsd->version_id)) {
-            void *base_addr = vmstate_base_addr(opaque, field);
+            void *base_addr = vmstate_base_addr(opaque, field, false);
             int i, n_elems = vmstate_n_elems(opaque, field);
             int size = vmstate_size(opaque, field);