45 files changed, 1273 insertions, 498 deletions
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 1e78b7c9e9..6e478f4765 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -633,7 +633,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
 
     if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
         fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0);
+        err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
         if (err == -1) {
             goto out;
         }
@@ -685,7 +685,7 @@ static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path,
 
     if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
         fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS);
+        err = mkdirat(dirfd, name, fs_ctx->dmode);
         if (err == -1) {
             goto out;
         }
@@ -786,7 +786,7 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
     /* Determine the security model */
     if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
         fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS);
+        fd = openat_file(dirfd, name, flags, fs_ctx->fmode);
         if (fd == -1) {
             goto out;
         }
@@ -849,7 +849,7 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
         ssize_t oldpath_size, write_size;
 
         fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR,
-                         SM_LOCAL_MODE_BITS);
+                         fs_ctx->fmode);
         if (fd == -1) {
             goto out;
         }
@@ -1100,7 +1100,7 @@ static int local_remove(FsContext *ctx, const char *path)
         goto out;
     }
 
-    if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) {
+    if (fstatat(dirfd, name, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) {
         goto err_out;
     }
 
@@ -1467,6 +1467,23 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
         return -1;
     }
 
+    if (fse->export_flags & V9FS_SM_MAPPED ||
+        fse->export_flags & V9FS_SM_MAPPED_FILE) {
+        fse->fmode =
+            qemu_opt_get_number(opts, "fmode", SM_LOCAL_MODE_BITS) & 0777;
+        fse->dmode =
+            qemu_opt_get_number(opts, "dmode", SM_LOCAL_DIR_MODE_BITS) & 0777;
+    } else {
+        if (qemu_opt_find(opts, "fmode")) {
+            error_report("fmode is only valid for mapped 9p modes");
+            return -1;
+        }
+        if (qemu_opt_find(opts, "dmode")) {
+            error_report("dmode is only valid for mapped 9p modes");
+            return -1;
+        }
+    }
+
     fse->path = g_strdup(path);
 
     return 0;
diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index 4b6d4e6a3f..df0a8de08a 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -494,8 +494,7 @@ static int synth_name_to_path(FsContext *ctx, V9fsPath *dir_path,
     }
 out:
     /* Copy the node pointer to fid */
-    target->data = g_malloc(sizeof(void *));
-    memcpy(target->data, &node, sizeof(void *));
+    target->data = g_memdup(&node, sizeof(void *));
     target->size = sizeof(void *);
     return 0;
 }
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 96d2683348..6c92bad5b3 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -624,15 +624,11 @@ void pdu_free(V9fsPDU *pdu)
     QLIST_INSERT_HEAD(&s->free_list, pdu, next);
 }
 
-/*
- * We don't do error checking for pdu_marshal/unmarshal here
- * because we always expect to have enough space to encode
- * error details
- */
 static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
 {
     int8_t id = pdu->id + 1; /* Response */
     V9fsState *s = pdu->s;
+    int ret;
 
     if (len < 0) {
         int err = -len;
@@ -644,11 +640,19 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
             str.data = strerror(err);
             str.size = strlen(str.data);
 
-            len += pdu_marshal(pdu, len, "s", &str);
+            ret = pdu_marshal(pdu, len, "s", &str);
+            if (ret < 0) {
+                goto out_notify;
+            }
+            len += ret;
             id = P9_RERROR;
         }
 
-        len += pdu_marshal(pdu, len, "d", err);
+        ret = pdu_marshal(pdu, len, "d", err);
+        if (ret < 0) {
+            goto out_notify;
+        }
+        len += ret;
 
         if (s->proto_version == V9FS_PROTO_2000L) {
             id = P9_RLERROR;
@@ -657,12 +661,15 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
     }
 
     /* fill out the header */
-    pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag);
+    if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
+        goto out_notify;
+    }
 
     /* keep these in sync */
     pdu->size = len;
     pdu->id = id;
 
+out_notify:
     pdu->s->transport->push_and_notify(pdu);
 
     /* Now wakeup anybody waiting in flush for this request */
@@ -1664,7 +1671,7 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
     unsigned int niov;
 
     if (is_write) {
-        pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov);
+        pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
     } else {
         pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
     }
@@ -3533,6 +3540,9 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
 
     s->ops = fse->ops;
 
+    s->ctx.fmode = fse->fmode;
+    s->ctx.dmode = fse->dmode;
+
     s->fid_list = NULL;
     qemu_co_rwlock_init(&s->rename_lock);
 
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index c886ba78d2..d1cfeaf10e 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -124,6 +124,11 @@ typedef struct {
     uint8_t id;
     uint16_t tag_le;
 } QEMU_PACKED P9MsgHeader;
+/* According to the specification, 9p messages start with a 7-byte header.
+ * Since most of the code uses this header size in literal form, we must be
+ * sure this is indeed the case.
+ */
+QEMU_BUILD_BUG_ON(sizeof(P9MsgHeader) != 7);
 
 struct V9fsPDU
 {
@@ -358,7 +363,7 @@ struct V9fsTransport {
     void        (*init_in_iov_from_pdu)(V9fsPDU *pdu, struct iovec **piov,
                                         unsigned int *pniov, size_t size);
     void        (*init_out_iov_from_pdu)(V9fsPDU *pdu, struct iovec **piov,
-                                         unsigned int *pniov);
+                                         unsigned int *pniov, size_t size);
     void        (*push_and_notify)(V9fsPDU *pdu);
 };
 
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 245abd8aae..62650b0a6b 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -53,23 +53,22 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
             goto out_free_pdu;
         }
 
-        if (elem->in_num == 0) {
+        if (iov_size(elem->in_sg, elem->in_num) < 7) {
             virtio_error(vdev,
                          "The guest sent a VirtFS request without space for "
                          "the reply");
             goto out_free_req;
         }
-        QEMU_BUILD_BUG_ON(sizeof(out) != 7);
 
-        v->elems[pdu->idx] = elem;
-        len = iov_to_buf(elem->out_sg, elem->out_num, 0,
-                         &out, sizeof(out));
-        if (len != sizeof(out)) {
+        len = iov_to_buf(elem->out_sg, elem->out_num, 0, &out, 7);
+        if (len != 7) {
             virtio_error(vdev, "The guest sent a malformed VirtFS request: "
                          "header size is %zd, should be 7", len);
             goto out_free_req;
         }
 
+        v->elems[pdu->idx] = elem;
+
         pdu_submit(pdu, &out);
     }
 
@@ -147,8 +146,16 @@ static ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset,
     V9fsState *s = pdu->s;
     V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
     VirtQueueElement *elem = v->elems[pdu->idx];
+    ssize_t ret;
+
+    ret = v9fs_iov_vmarshal(elem->in_sg, elem->in_num, offset, 1, fmt, ap);
+    if (ret < 0) {
+        VirtIODevice *vdev = VIRTIO_DEVICE(v);
 
-    return v9fs_iov_vmarshal(elem->in_sg, elem->in_num, offset, 1, fmt, ap);
+        virtio_error(vdev, "Failed to encode VirtFS reply type %d",
+                     pdu->id + 1);
+    }
+    return ret;
 }
 
 static ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset,
@@ -157,28 +164,52 @@ static ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset,
     V9fsState *s = pdu->s;
     V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
     VirtQueueElement *elem = v->elems[pdu->idx];
+    ssize_t ret;
+
+    ret = v9fs_iov_vunmarshal(elem->out_sg, elem->out_num, offset, 1, fmt, ap);
+    if (ret < 0) {
+        VirtIODevice *vdev = VIRTIO_DEVICE(v);
 
-    return v9fs_iov_vunmarshal(elem->out_sg, elem->out_num, offset, 1, fmt, ap);
+        virtio_error(vdev, "Failed to decode VirtFS request type %d", pdu->id);
+    }
+    return ret;
 }
 
-/* The size parameter is used by other transports. Do not drop it. */
 static void virtio_init_in_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov,
                                         unsigned int *pniov, size_t size)
 {
     V9fsState *s = pdu->s;
     V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
     VirtQueueElement *elem = v->elems[pdu->idx];
+    size_t buf_size = iov_size(elem->in_sg, elem->in_num);
+
+    if (buf_size < size) {
+        VirtIODevice *vdev = VIRTIO_DEVICE(v);
+
+        virtio_error(vdev,
+                     "VirtFS reply type %d needs %zu bytes, buffer has %zu",
+                     pdu->id + 1, size, buf_size);
+    }
 
     *piov = elem->in_sg;
     *pniov = elem->in_num;
 }
 
 static void virtio_init_out_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov,
-                                         unsigned int *pniov)
+                                         unsigned int *pniov, size_t size)
 {
     V9fsState *s = pdu->s;
     V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
     VirtQueueElement *elem = v->elems[pdu->idx];
+    size_t buf_size = iov_size(elem->out_sg, elem->out_num);
+
+    if (buf_size < size) {
+        VirtIODevice *vdev = VIRTIO_DEVICE(v);
+
+        virtio_error(vdev,
+                     "VirtFS request type %d needs %zu bytes, buffer has %zu",
+                     pdu->id, size, buf_size);
+    }
 
     *piov = elem->out_sg;
     *pniov = elem->out_num;
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 922cc967be..ee87f08926 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -54,6 +54,8 @@ typedef struct Xen9pfsDev {
     Xen9pfsRing *rings;
 } Xen9pfsDev;
 
+static void xen_9pfs_disconnect(struct XenDevice *xendev);
+
 static void xen_9pfs_in_sg(Xen9pfsRing *ring,
                            struct iovec *in_sg,
                            int *num,
@@ -125,10 +127,19 @@ static ssize_t xen_9pfs_pdu_vmarshal(V9fsPDU *pdu,
     Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
     struct iovec in_sg[2];
     int num;
+    ssize_t ret;
 
     xen_9pfs_in_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
                    in_sg, &num, pdu->idx, ROUND_UP(offset + 128, 512));
-    return v9fs_iov_vmarshal(in_sg, num, offset, 0, fmt, ap);
+
+    ret = v9fs_iov_vmarshal(in_sg, num, offset, 0, fmt, ap);
+    if (ret < 0) {
+        xen_pv_printf(&xen_9pfs->xendev, 0,
+                      "Failed to encode VirtFS request type %d\n", pdu->id + 1);
+        xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
+        xen_9pfs_disconnect(&xen_9pfs->xendev);
+    }
+    return ret;
 }
 
 static ssize_t xen_9pfs_pdu_vunmarshal(V9fsPDU *pdu,
@@ -139,15 +150,25 @@ static ssize_t xen_9pfs_pdu_vunmarshal(V9fsPDU *pdu,
     Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
     struct iovec out_sg[2];
     int num;
+    ssize_t ret;
 
     xen_9pfs_out_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
                     out_sg, &num, pdu->idx);
-    return v9fs_iov_vunmarshal(out_sg, num, offset, 0, fmt, ap);
+
+    ret = v9fs_iov_vunmarshal(out_sg, num, offset, 0, fmt, ap);
+    if (ret < 0) {
+        xen_pv_printf(&xen_9pfs->xendev, 0,
+                      "Failed to decode VirtFS request type %d\n", pdu->id);
+        xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
+        xen_9pfs_disconnect(&xen_9pfs->xendev);
+    }
+    return ret;
 }
 
 static void xen_9pfs_init_out_iov_from_pdu(V9fsPDU *pdu,
                                            struct iovec **piov,
-                                           unsigned int *pniov)
+                                           unsigned int *pniov,
+                                           size_t size)
 {
     Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
     Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
@@ -169,11 +190,22 @@ static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu,
     Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
     Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
     int num;
+    size_t buf_size;
 
     g_free(ring->sg);
 
     ring->sg = g_malloc0(sizeof(*ring->sg) * 2);
     xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size);
+
+    buf_size = iov_size(ring->sg, num);
+    if (buf_size  < size) {
+        xen_pv_printf(&xen_9pfs->xendev, 0, "Xen 9pfs request type %d"
+                "needs %zu bytes, buffer has %zu\n", pdu->id, size,
+                buf_size);
+        xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
+        xen_9pfs_disconnect(&xen_9pfs->xendev);
+    }
+
     *piov = ring->sg;
     *pniov = num;
 }
@@ -217,7 +249,7 @@ static int xen_9pfs_init(struct XenDevice *xendev)
 static int xen_9pfs_receive(Xen9pfsRing *ring)
 {
     P9MsgHeader h;
-    RING_IDX cons, prod, masked_prod, masked_cons;
+    RING_IDX cons, prod, masked_prod, masked_cons, queued;
     V9fsPDU *pdu;
 
     if (ring->inprogress) {
@@ -228,8 +260,8 @@ static int xen_9pfs_receive(Xen9pfsRing *ring)
     prod = ring->intf->out_prod;
     xen_rmb();
 
-    if (xen_9pfs_queued(prod, cons, XEN_FLEX_RING_SIZE(ring->ring_order)) <
-        sizeof(h)) {
+    queued = xen_9pfs_queued(prod, cons, XEN_FLEX_RING_SIZE(ring->ring_order));
+    if (queued < sizeof(h)) {
         return 0;
     }
     ring->inprogress = true;
@@ -240,6 +272,9 @@ static int xen_9pfs_receive(Xen9pfsRing *ring)
     xen_9pfs_read_packet((uint8_t *) &h, ring->ring.out, sizeof(h),
                          masked_prod, &masked_cons,
                          XEN_FLEX_RING_SIZE(ring->ring_order));
+    if (queued < le32_to_cpu(h.size_le)) {
+        return 0;
+    }
 
     /* cannot fail, because we only handle one request per ring at a time */
     pdu = pdu_alloc(&ring->priv->state);
@@ -268,15 +303,30 @@ static void xen_9pfs_evtchn_event(void *opaque)
     qemu_bh_schedule(ring->bh);
 }
 
-static int xen_9pfs_free(struct XenDevice *xendev)
+static void xen_9pfs_disconnect(struct XenDevice *xendev)
 {
+    Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
     int i;
+
+    for (i = 0; i < xen_9pdev->num_rings; i++) {
+        if (xen_9pdev->rings[i].evtchndev != NULL) {
+            qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
+                    NULL, NULL, NULL);
+            xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
+                             xen_9pdev->rings[i].local_port);
+            xen_9pdev->rings[i].evtchndev = NULL;
+        }
+    }
+}
+
+static int xen_9pfs_free(struct XenDevice *xendev)
+{
     Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
+    int i;
 
-    g_free(xen_9pdev->id);
-    g_free(xen_9pdev->tag);
-    g_free(xen_9pdev->path);
-    g_free(xen_9pdev->security_model);
+    if (xen_9pdev->rings[0].evtchndev != NULL) {
+        xen_9pfs_disconnect(xendev);
+    }
 
     for (i = 0; i < xen_9pdev->num_rings; i++) {
         if (xen_9pdev->rings[i].data != NULL) {
@@ -289,16 +339,15 @@ static int xen_9pfs_free(struct XenDevice *xendev)
                     xen_9pdev->rings[i].intf,
                     1);
         }
-        if (xen_9pdev->rings[i].evtchndev > 0) {
-            qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
-                    NULL, NULL, NULL);
-            xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
-                             xen_9pdev->rings[i].local_port);
-        }
         if (xen_9pdev->rings[i].bh != NULL) {
             qemu_bh_delete(xen_9pdev->rings[i].bh);
         }
     }
+
+    g_free(xen_9pdev->id);
+    g_free(xen_9pdev->tag);
+    g_free(xen_9pdev->path);
+    g_free(xen_9pdev->security_model);
     g_free(xen_9pdev->rings);
     return 0;
 }
@@ -422,11 +471,6 @@ static void xen_9pfs_alloc(struct XenDevice *xendev)
     xenstore_write_be_int(xendev, "max-ring-page-order", MAX_RING_ORDER);
 }
 
-static void xen_9pfs_disconnect(struct XenDevice *xendev)
-{
-    /* Dynamic hotplug of PV filesystems at runtime is not supported. */
-}
-
 struct XenDevOps xen_9pfs_ops = {
     .size       = sizeof(Xen9pfsDev),
     .flags      = DEVOPS_FLAG_NEED_GNTDEV,
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 28f6b6ee35..401129073b 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1217,7 +1217,7 @@ static const VMStateDescription vmstate_fdc = {
         VMSTATE_UINT8(config, FDCtrl),
         VMSTATE_UINT8(lock, FDCtrl),
         VMSTATE_UINT8(pwrd, FDCtrl),
-        VMSTATE_UINT8_EQUAL(num_floppies, FDCtrl),
+        VMSTATE_UINT8_EQUAL(num_floppies, FDCtrl, NULL),
         VMSTATE_STRUCT_ARRAY(drives, FDCtrl, MAX_FD, 1,
                              vmstate_fdrive, FDrive),
         VMSTATE_END_OF_LIST()
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 381dc7c5fb..6071dc12d8 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -21,7 +21,7 @@
  *              cmb_size_mb=<cmb_size_mb[optional]>
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
- * offset 0 in BAR2 and supports SQS only for now.
+ * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
  */
 
 #include "qemu/osdep.h"
@@ -93,8 +93,8 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
     }
 }
 
-static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
-    uint32_t len, NvmeCtrl *n)
+static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
+                             uint64_t prp2, uint32_t len, NvmeCtrl *n)
 {
     hwaddr trans_len = n->page_size - (prp1 % n->page_size);
     trans_len = MIN(len, trans_len);
@@ -102,10 +102,15 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
 
     if (!prp1) {
         return NVME_INVALID_FIELD | NVME_DNR;
+    } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
+               prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
+        qsg->nsg = 0;
+        qemu_iovec_init(iov, num_prps);
+        qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
+    } else {
+        pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
+        qemu_sglist_add(qsg, prp1, trans_len);
     }
-
-    pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
-    qemu_sglist_add(qsg, prp1, trans_len);
     len -= trans_len;
     if (len) {
         if (!prp2) {
@@ -118,7 +123,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
 
             nents = (len + n->page_size - 1) >> n->page_bits;
             prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-            pci_dma_read(&n->parent_obj, prp2, (void *)prp_list, prp_trans);
+            nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
             while (len != 0) {
                 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
 
@@ -130,7 +135,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
                     i = 0;
                     nents = (len + n->page_size - 1) >> n->page_bits;
                     prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-                    pci_dma_read(&n->parent_obj, prp_ent, (void *)prp_list,
+                    nvme_addr_read(n, prp_ent, (void *)prp_list,
                         prp_trans);
                     prp_ent = le64_to_cpu(prp_list[i]);
                 }
@@ -140,7 +145,11 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
                 }
 
                 trans_len = MIN(len, n->page_size);
-                qemu_sglist_add(qsg, prp_ent, trans_len);
+                if (qsg->nsg){
+                    qemu_sglist_add(qsg, prp_ent, trans_len);
+                } else {
+                    qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
+                }
                 len -= trans_len;
                 i++;
             }
@@ -148,7 +157,11 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
             if (prp2 & (n->page_size - 1)) {
                 goto unmap;
             }
-            qemu_sglist_add(qsg, prp2, len);
+            if (qsg->nsg) {
+                qemu_sglist_add(qsg, prp2, len);
+            } else {
+                qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
+            }
         }
     }
     return NVME_SUCCESS;
@@ -162,16 +175,24 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
     uint64_t prp1, uint64_t prp2)
 {
     QEMUSGList qsg;
+    QEMUIOVector iov;
+    uint16_t status = NVME_SUCCESS;
 
-    if (nvme_map_prp(&qsg, prp1, prp2, len, n)) {
+    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
         return NVME_INVALID_FIELD | NVME_DNR;
     }
-    if (dma_buf_read(ptr, len, &qsg)) {
+    if (qsg.nsg > 0) {
+        if (dma_buf_read(ptr, len, &qsg)) {
+            status = NVME_INVALID_FIELD | NVME_DNR;
+        }
         qemu_sglist_destroy(&qsg);
-        return NVME_INVALID_FIELD | NVME_DNR;
+    } else {
+        if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
+            status = NVME_INVALID_FIELD | NVME_DNR;
+        }
+        qemu_iovec_destroy(&iov);
     }
-    qemu_sglist_destroy(&qsg);
-    return NVME_SUCCESS;
+    return status;
 }
 
 static void nvme_post_cqes(void *opaque)
@@ -285,20 +306,27 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
         return NVME_LBA_RANGE | NVME_DNR;
     }
 
-    if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) {
+    if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
         block_acct_invalid(blk_get_stats(n->conf.blk), acct);
         return NVME_INVALID_FIELD | NVME_DNR;
     }
 
-    assert((nlb << data_shift) == req->qsg.size);
-
-    req->has_sg = true;
     dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
-    req->aiocb = is_write ?
-        dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
-                      nvme_rw_cb, req) :
-        dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
-                     nvme_rw_cb, req);
+    if (req->qsg.nsg > 0) {
+        req->has_sg = true;
+        req->aiocb = is_write ?
+            dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
+                          nvme_rw_cb, req) :
+            dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
+                         nvme_rw_cb, req);
+    } else {
+        req->has_sg = false;
+        req->aiocb = is_write ?
+            blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
+                            req) :
+            blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
+                           req);
+    }
 
     return NVME_NO_COMPLETE;
 }
@@ -987,11 +1015,14 @@ static int nvme_init(PCIDevice *pci_dev)
         NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
         NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
         NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
-        NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 0);
-        NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 0);
+        NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
+        NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
         NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
         NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb);
 
+        n->cmbloc = n->bar.cmbloc;
+        n->cmbsz = n->bar.cmbsz;
+
         n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
         memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
                               "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index b4961d2547..6aab338ff5 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -712,6 +712,7 @@ typedef struct NvmeRequest {
     NvmeCqe                 cqe;
     BlockAcctCookie         acct;
     QEMUSGList              qsg;
+    QEMUIOVector            iov;
     QTAILQ_ENTRY(NvmeRequest)entry;
 } NvmeRequest;
 
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 65e83dc258..c332c01ea8 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -1,11 +1,11 @@
 # See docs/tracing.txt for syntax documentation.
 
 # hw/block/virtio-blk.c
-virtio_blk_req_complete(void *req, int status) "req %p status %d"
-virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
-virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
-virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
-virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t offset, size_t size, bool is_write) "mrb %p start %d num_reqs %d offset %"PRIu64" size %zu is_write %d"
+virtio_blk_req_complete(void *vdev, void *req, int status) "vdev %p req %p status %d"
+virtio_blk_rw_complete(void *vdev, void *req, int ret) "vdev %p req %p ret %d"
+virtio_blk_handle_write(void *vdev, void *req, uint64_t sector, size_t nsectors) "vdev %p req %p sector %"PRIu64" nsectors %zu"
+virtio_blk_handle_read(void *vdev, void *req, uint64_t sector, size_t nsectors) "vdev %p req %p sector %"PRIu64" nsectors %zu"
+virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint64_t offset, size_t size, bool is_write) "vdev %p mrb %p start %d num_reqs %d offset %"PRIu64" size %zu is_write %d"
 
 # hw/block/hd-geometry.c
 hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 604d37dfc8..c0bd247b37 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -50,7 +50,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
     VirtIOBlock *s = req->dev;
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
-    trace_virtio_blk_req_complete(req, status);
+    trace_virtio_blk_req_complete(vdev, req, status);
 
     stb_p(&req->in->status, status);
     virtqueue_push(req->vq, &req->elem, req->in_len);
@@ -88,12 +88,13 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
 {
     VirtIOBlockReq *next = opaque;
     VirtIOBlock *s = next->dev;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
     aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
     while (next) {
         VirtIOBlockReq *req = next;
         next = req->mr_next;
-        trace_virtio_blk_rw_complete(req, ret);
+        trace_virtio_blk_rw_complete(vdev, req, ret);
 
         if (req->qiov.nalloc != -1) {
             /* If nalloc is != 1 req->qiov is a local copy of the original
@@ -355,7 +356,8 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
             mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
         }
 
-        trace_virtio_blk_submit_multireq(mrb, start, num_reqs,
+        trace_virtio_blk_submit_multireq(VIRTIO_DEVICE(mrb->reqs[start]->dev),
+                                         mrb, start, num_reqs,
                                          sector_num << BDRV_SECTOR_BITS,
                                          qiov->size, is_write);
         block_acct_merge_done(blk_get_stats(blk),
@@ -526,11 +528,11 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 
         if (is_write) {
             qemu_iovec_init_external(&req->qiov, iov, out_num);
-            trace_virtio_blk_handle_write(req, req->sector_num,
+            trace_virtio_blk_handle_write(vdev, req, req->sector_num,
                                           req->qiov.size / BDRV_SECTOR_SIZE);
         } else {
             qemu_iovec_init_external(&req->qiov, in_iov, in_num);
-            trace_virtio_blk_handle_read(req, req->sector_num,
+            trace_virtio_blk_handle_read(vdev, req, req->sector_num,
                                          req->qiov.size / BDRV_SECTOR_SIZE);
         }
 
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 3a22805fbc..d42ed7070d 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -36,8 +36,6 @@
 
 static int batch_maps   = 0;
 
-static int max_requests = 32;
-
 /* ------------------------------------------------------------- */
 
 #define BLOCK_SIZE  512
@@ -84,6 +82,8 @@ struct ioreq {
     BlockAcctCookie     acct;
 };
 
+#define MAX_RING_PAGE_ORDER 4
+
 struct XenBlkDev {
     struct XenDevice    xendev;  /* must be first */
     char                *params;
@@ -94,7 +94,8 @@ struct XenBlkDev {
     bool                directiosafe;
     const char          *fileproto;
     const char          *filename;
-    int                 ring_ref;
+    unsigned int        ring_ref[1 << MAX_RING_PAGE_ORDER];
+    unsigned int        nr_ring_ref;
     void                *sring;
     int64_t             file_blk;
     int64_t             file_size;
@@ -110,6 +111,7 @@ struct XenBlkDev {
     int                 requests_total;
     int                 requests_inflight;
     int                 requests_finished;
+    unsigned int        max_requests;
 
     /* Persistent grants extension */
     gboolean            feature_discard;
@@ -199,7 +201,7 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
     struct ioreq *ioreq = NULL;
 
     if (QLIST_EMPTY(&blkdev->freelist)) {
-        if (blkdev->requests_total >= max_requests) {
+        if (blkdev->requests_total >= blkdev->max_requests) {
             goto out;
         }
         /* allocate new struct */
@@ -769,31 +771,30 @@ static int blk_send_response_one(struct ioreq *ioreq)
     struct XenBlkDev  *blkdev = ioreq->blkdev;
     int               send_notify   = 0;
     int               have_requests = 0;
-    blkif_response_t  resp;
-    void              *dst;
-
-    resp.id        = ioreq->req.id;
-    resp.operation = ioreq->req.operation;
-    resp.status    = ioreq->status;
+    blkif_response_t  *resp;
 
     /* Place on the response ring for the relevant domain. */
     switch (blkdev->protocol) {
     case BLKIF_PROTOCOL_NATIVE:
-        dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.native,
+                                 blkdev->rings.native.rsp_prod_pvt);
         break;
     case BLKIF_PROTOCOL_X86_32:
-        dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
-                                blkdev->rings.x86_32_part.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
+                                 blkdev->rings.x86_32_part.rsp_prod_pvt);
         break;
     case BLKIF_PROTOCOL_X86_64:
-        dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
-                                blkdev->rings.x86_64_part.rsp_prod_pvt);
+        resp = (blkif_response_t *) RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
+                                 blkdev->rings.x86_64_part.rsp_prod_pvt);
         break;
     default:
-        dst = NULL;
         return 0;
     }
-    memcpy(dst, &resp, sizeof(resp));
+
+    resp->id        = ioreq->req.id;
+    resp->operation = ioreq->req.operation;
+    resp->status    = ioreq->status;
+
     blkdev->rings.common.rsp_prod_pvt++;
 
     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
@@ -905,7 +906,7 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
         ioreq_runio_qemu_aio(ioreq);
     }
 
-    if (blkdev->more_work && blkdev->requests_inflight < max_requests) {
+    if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {
         qemu_bh_schedule(blkdev->bh);
     }
 }
@@ -918,15 +919,6 @@ static void blk_bh(void *opaque)
     blk_handle_requests(blkdev);
 }
 
-/*
- * We need to account for the grant allocations requiring contiguous
- * chunks; the worst case number would be
- *     max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
- * but in order to keep things simple just use
- *     2 * max_req * max_seg.
- */
-#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg))
-
 static void blk_alloc(struct XenDevice *xendev)
 {
     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
@@ -938,11 +930,6 @@ static void blk_alloc(struct XenDevice *xendev)
     if (xen_mode != XEN_EMULATE) {
         batch_maps = 1;
     }
-    if (xengnttab_set_max_grants(xendev->gnttabdev,
-            MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
-        xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
-                      strerror(errno));
-    }
 }
 
 static void blk_parse_discard(struct XenBlkDev *blkdev)
@@ -1023,13 +1010,23 @@ static int blk_init(struct XenDevice *xendev)
 
     blkdev->file_blk  = BLOCK_SIZE;
 
+    blkdev->feature_grant_copy =
+                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
+
+    xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
+                  blkdev->feature_grant_copy ? "enabled" : "disabled");
+
     /* fill info
      * blk_connect supplies sector-size and sectors
      */
     xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1);
-    xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
+    xenstore_write_be_int(&blkdev->xendev, "feature-persistent",
+                          !blkdev->feature_grant_copy);
     xenstore_write_be_int(&blkdev->xendev, "info", info);
 
+    xenstore_write_be_int(&blkdev->xendev, "max-ring-page-order",
+                          MAX_RING_PAGE_ORDER);
+
     blk_parse_discard(blkdev);
 
     g_free(directiosafe);
@@ -1051,12 +1048,25 @@ out_error:
     return -1;
 }
 
+/*
+ * We need to account for the grant allocations requiring contiguous
+ * chunks; the worst case number would be
+ *     max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
+ * but in order to keep things simple just use
+ *     2 * max_req * max_seg.
+ */
+#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg))
+
 static int blk_connect(struct XenDevice *xendev)
 {
     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
     int pers, index, qflags;
     bool readonly = true;
     bool writethrough = true;
+    int order, ring_ref;
+    unsigned int ring_size, max_grants;
+    unsigned int i;
+    uint32_t *domids;
 
     /* read-only ? */
     if (blkdev->directiosafe) {
@@ -1131,9 +1141,42 @@ static int blk_connect(struct XenDevice *xendev)
     xenstore_write_be_int64(&blkdev->xendev, "sectors",
                             blkdev->file_size / blkdev->file_blk);
 
-    if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) {
+    if (xenstore_read_fe_int(&blkdev->xendev, "ring-page-order",
+                             &order) == -1) {
+        blkdev->nr_ring_ref = 1;
+
+        if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref",
+                                 &ring_ref) == -1) {
+            return -1;
+        }
+        blkdev->ring_ref[0] = ring_ref;
+
+    } else if (order >= 0 && order <= MAX_RING_PAGE_ORDER) {
+        blkdev->nr_ring_ref = 1 << order;
+
+        for (i = 0; i < blkdev->nr_ring_ref; i++) {
+            char *key;
+
+            key = g_strdup_printf("ring-ref%u", i);
+            if (!key) {
+                return -1;
+            }
+
+            if (xenstore_read_fe_int(&blkdev->xendev, key,
+                                     &ring_ref) == -1) {
+                g_free(key);
+                return -1;
+            }
+            blkdev->ring_ref[i] = ring_ref;
+
+            g_free(key);
+        }
+    } else {
+        xen_pv_printf(xendev, 0, "invalid ring-page-order: %d\n",
+                      order);
         return -1;
     }
+
     if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
                              &blkdev->xendev.remote_port) == -1) {
         return -1;
@@ -1156,41 +1199,85 @@ static int blk_connect(struct XenDevice *xendev)
         blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
     }
 
-    blkdev->sring = xengnttab_map_grant_ref(blkdev->xendev.gnttabdev,
-                                            blkdev->xendev.dom,
-                                            blkdev->ring_ref,
-                                            PROT_READ | PROT_WRITE);
+    ring_size = XC_PAGE_SIZE * blkdev->nr_ring_ref;
+    switch (blkdev->protocol) {
+    case BLKIF_PROTOCOL_NATIVE:
+    {
+        blkdev->max_requests = __CONST_RING_SIZE(blkif, ring_size);
+        break;
+    }
+    case BLKIF_PROTOCOL_X86_32:
+    {
+        blkdev->max_requests = __CONST_RING_SIZE(blkif_x86_32, ring_size);
+        break;
+    }
+    case BLKIF_PROTOCOL_X86_64:
+    {
+        blkdev->max_requests = __CONST_RING_SIZE(blkif_x86_64, ring_size);
+        break;
+    }
+    default:
+        return -1;
+    }
+
+    /* Calculate the maximum number of grants needed by ioreqs */
+    max_grants = MAX_GRANTS(blkdev->max_requests,
+                            BLKIF_MAX_SEGMENTS_PER_REQUEST);
+    /* Add on the number needed for the ring pages */
+    max_grants += blkdev->nr_ring_ref;
+
+    if (xengnttab_set_max_grants(blkdev->xendev.gnttabdev, max_grants)) {
+        xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
+                      strerror(errno));
+        return -1;
+    }
+
+    domids = g_malloc0_n(blkdev->nr_ring_ref, sizeof(uint32_t));
+    for (i = 0; i < blkdev->nr_ring_ref; i++) {
+        domids[i] = blkdev->xendev.dom;
+    }
+
+    blkdev->sring = xengnttab_map_grant_refs(blkdev->xendev.gnttabdev,
+                                             blkdev->nr_ring_ref,
+                                             domids,
+                                             blkdev->ring_ref,
+                                             PROT_READ | PROT_WRITE);
+
+    g_free(domids);
+
     if (!blkdev->sring) {
         return -1;
     }
+
     blkdev->cnt_map++;
 
     switch (blkdev->protocol) {
     case BLKIF_PROTOCOL_NATIVE:
     {
         blkif_sring_t *sring_native = blkdev->sring;
-        BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
+        BACK_RING_INIT(&blkdev->rings.native, sring_native, ring_size);
         break;
     }
     case BLKIF_PROTOCOL_X86_32:
     {
         blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;
 
-        BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE);
+        BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, ring_size);
         break;
     }
     case BLKIF_PROTOCOL_X86_64:
     {
         blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;
 
-        BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE);
+        BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, ring_size);
         break;
     }
     }
 
     if (blkdev->feature_persistent) {
         /* Init persistent grants */
-        blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+        blkdev->max_grants = blkdev->max_requests *
+            BLKIF_MAX_SEGMENTS_PER_REQUEST;
         blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp,
                                              NULL, NULL,
                                              batch_maps ?
@@ -1202,15 +1289,9 @@ static int blk_connect(struct XenDevice *xendev)
 
     xen_be_bind_evtchn(&blkdev->xendev);
 
-    blkdev->feature_grant_copy =
-                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
-
-    xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
-                  blkdev->feature_grant_copy ? "enabled" : "disabled");
-
-    xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
+    xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, nr-ring-ref %u, "
                   "remote port %d, local port %d\n",
-                  blkdev->xendev.protocol, blkdev->ring_ref,
+                  blkdev->xendev.protocol, blkdev->nr_ring_ref,
                   blkdev->xendev.remote_port, blkdev->xendev.local_port);
     return 0;
 }
@@ -1227,7 +1308,8 @@ static void blk_disconnect(struct XenDevice *xendev)
     xen_pv_unbind_evtchn(&blkdev->xendev);
 
     if (blkdev->sring) {
-        xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
+        xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring,
+                        blkdev->nr_ring_ref);
         blkdev->cnt_map--;
         blkdev->sring = NULL;
     }
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 2e7e9778cd..ecb55528e8 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -770,19 +770,6 @@ static void machine_class_finalize(ObjectClass *klass, void *data)
     g_free(mc->name);
 }
 
-static void register_compat_prop(const char *driver,
-                                 const char *property,
-                                 const char *value)
-{
-    GlobalProperty *p = g_new0(GlobalProperty, 1);
-    /* Machine compat_props must never cause errors: */
-    p->errp = &error_abort;
-    p->driver = driver;
-    p->property = property;
-    p->value = value;
-    qdev_prop_register_global(p);
-}
-
 static void machine_register_compat_for_subclass(ObjectClass *oc, void *opaque)
 {
     GlobalProperty *p = opaque;
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 68cd65345c..f11d57831b 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -1084,6 +1084,27 @@ void qdev_prop_register_global(GlobalProperty *prop)
     global_props = g_list_append(global_props, prop);
 }
 
+void register_compat_prop(const char *driver,
+                          const char *property,
+                          const char *value)
+{
+    GlobalProperty *p = g_new0(GlobalProperty, 1);
+
+    /* Any compat_props must never cause error */
+    p->errp = &error_abort;
+    p->driver = driver;
+    p->property = property;
+    p->value = value;
+    qdev_prop_register_global(p);
+}
+
+void register_compat_props_array(GlobalProperty *prop)
+{
+    for (; prop && prop->driver; prop++) {
+        register_compat_prop(prop->driver, prop->property, prop->value);
+    }
+}
+
 void qdev_prop_register_global_list(GlobalProperty *props)
 {
     int i;
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index ad09bb98f9..3c1688e7cb 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2373,12 +2373,12 @@ static VMStateDescription qxl_vmstate = {
         VMSTATE_UINT32(last_release_offset, PCIQXLDevice),
         VMSTATE_UINT32(mode, PCIQXLDevice),
         VMSTATE_UINT32(ssd.unique, PCIQXLDevice),
-        VMSTATE_INT32_EQUAL(num_memslots, PCIQXLDevice),
+        VMSTATE_INT32_EQUAL(num_memslots, PCIQXLDevice, NULL),
         VMSTATE_STRUCT_ARRAY(guest_slots, PCIQXLDevice, NUM_MEMSLOTS, 0,
                              qxl_memslot, struct guest_slots),
         VMSTATE_STRUCT(guest_primary.surface, PCIQXLDevice, 0,
                        qxl_surface, QXLSurfaceCreate),
-        VMSTATE_INT32_EQUAL(ssd.num_surfaces, PCIQXLDevice),
+        VMSTATE_INT32_EQUAL(ssd.num_surfaces, PCIQXLDevice, NULL),
         VMSTATE_VARRAY_INT32(guest_surfaces.cmds, PCIQXLDevice,
                              ssd.num_surfaces, 0,
                              vmstate_info_uint64, uint64_t),
diff --git a/hw/display/vga.c b/hw/display/vga.c
index dcc95f88e2..80508b83f4 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -2099,7 +2099,7 @@ const VMStateDescription vmstate_vga_common = {
         VMSTATE_BUFFER(palette, VGACommonState),
 
         VMSTATE_INT32(bank_offset, VGACommonState),
-        VMSTATE_UINT8_EQUAL(is_vbe_vmstate, VGACommonState),
+        VMSTATE_UINT8_EQUAL(is_vbe_vmstate, VGACommonState, NULL),
         VMSTATE_UINT16(vbe_index, VGACommonState),
         VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, VBE_DISPI_INDEX_NB),
         VMSTATE_UINT32(vbe_start_addr, VGACommonState),
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 58dc0b2737..0506d2c1b0 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -962,7 +962,7 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = {
     .version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_INT32(enable, struct VirtIOGPU),
-        VMSTATE_UINT32_EQUAL(conf.max_outputs, struct VirtIOGPU),
+        VMSTATE_UINT32_EQUAL(conf.max_outputs, struct VirtIOGPU, NULL),
         VMSTATE_STRUCT_VARRAY_UINT32(scanout, struct VirtIOGPU,
                                      conf.max_outputs, 1,
                                      vmstate_virtio_gpu_scanout,
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index ec5f27d67e..c989cef1cd 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -1192,7 +1192,7 @@ static const VMStateDescription vmstate_vmware_vga_internal = {
     .minimum_version_id = 0,
     .post_load = vmsvga_post_load,
     .fields = (VMStateField[]) {
-        VMSTATE_INT32_EQUAL(new_depth, struct vmsvga_state_s),
+        VMSTATE_INT32_EQUAL(new_depth, struct vmsvga_state_s, NULL),
         VMSTATE_INT32(enable, struct vmsvga_state_s),
         VMSTATE_INT32(config, struct vmsvga_state_s),
         VMSTATE_INT32(cursor.id, struct vmsvga_state_s),
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 46a2bc41ab..22dbef64c6 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -314,12 +314,9 @@ static void pc_init1(MachineState *machine,
 static void pc_compat_2_3(MachineState *machine)
 {
     PCMachineState *pcms = PC_MACHINE(machine);
-    savevm_skip_section_footers();
     if (kvm_enabled()) {
         pcms->smm = ON_OFF_AUTO_OFF;
     }
-    global_state_set_optional();
-    savevm_skip_configuration();
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index f60826d6e0..874d3fe280 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1669,7 +1669,7 @@ const VMStateDescription vmstate_ahci = {
         VMSTATE_UINT32(control_regs.impl, AHCIState),
         VMSTATE_UINT32(control_regs.version, AHCIState),
         VMSTATE_UINT32(idp_index, AHCIState),
-        VMSTATE_INT32_EQUAL(ports, AHCIState),
+        VMSTATE_INT32_EQUAL(ports, AHCIState, NULL),
         VMSTATE_END_OF_LIST()
     },
 };
diff --git a/hw/input/hid.c b/hw/input/hid.c
index 93887ecc43..0d049ff61c 100644
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -487,6 +487,7 @@ void hid_reset(HIDState *hs)
         memset(hs->kbd.keycodes, 0, sizeof(hs->kbd.keycodes));
         memset(hs->kbd.key, 0, sizeof(hs->kbd.key));
         hs->kbd.keys = 0;
+        hs->kbd.modifiers = 0;
         break;
     case HID_MOUSE:
     case HID_TABLET:
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index 1d3a440bbd..3ba05efd06 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -85,12 +85,12 @@ typedef struct {
     int rptr, wptr, count;
 } PS2Queue;
 
-typedef struct {
+struct PS2State {
     PS2Queue queue;
     int32_t write_cmd;
     void (*update_irq)(void *, int);
     void *update_arg;
-} PS2State;
+};
 
 typedef struct {
     PS2State common;
@@ -551,9 +551,17 @@ static uint8_t translate_table[256] = {
     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
 };
 
-void ps2_queue(void *opaque, int b)
+static void ps2_reset_queue(PS2State *s)
+{
+    PS2Queue *q = &s->queue;
+
+    q->rptr = 0;
+    q->wptr = 0;
+    q->count = 0;
+}
+
+void ps2_queue(PS2State *s, int b)
 {
-    PS2State *s = (PS2State *)opaque;
     PS2Queue *q = &s->queue;
 
     if (q->count >= PS2_QUEUE_SIZE - 1)
@@ -692,13 +700,12 @@ static void ps2_keyboard_event(DeviceState *dev, QemuConsole *src,
     }
 }
 
-uint32_t ps2_read_data(void *opaque)
+uint32_t ps2_read_data(PS2State *s)
 {
-    PS2State *s = (PS2State *)opaque;
     PS2Queue *q;
     int val, index;
 
-    trace_ps2_read_data(opaque);
+    trace_ps2_read_data(s);
     q = &s->queue;
     if (q->count == 0) {
         /* NOTE: if no data left, we return the last keyboard one
@@ -733,6 +740,7 @@ static void ps2_reset_keyboard(PS2KbdState *s)
     trace_ps2_reset_keyboard(s);
     s->scan_enabled = 1;
     s->scancode_set = 2;
+    ps2_reset_queue(&s->common);
     ps2_set_ledstate(s, 0);
 }
 
@@ -1081,12 +1089,8 @@ void ps2_write_mouse(void *opaque, int val)
 
 static void ps2_common_reset(PS2State *s)
 {
-    PS2Queue *q;
     s->write_cmd = -1;
-    q = &s->queue;
-    q->rptr = 0;
-    q->wptr = 0;
-    q->count = 0;
+    ps2_reset_queue(s);
     s->update_irq(s->update_arg, 0);
 }
 
diff --git a/hw/input/vmmouse.c b/hw/input/vmmouse.c
index 4747da9a8d..b6d22086f4 100644
--- a/hw/input/vmmouse.c
+++ b/hw/input/vmmouse.c
@@ -243,7 +243,7 @@ static const VMStateDescription vmstate_vmmouse = {
     .minimum_version_id = 0,
     .post_load = vmmouse_post_load,
     .fields = (VMStateField[]) {
-        VMSTATE_INT32_EQUAL(queue_size, VMMouseState),
+        VMSTATE_INT32_EQUAL(queue_size, VMMouseState, NULL),
         VMSTATE_UINT32_ARRAY(queue, VMMouseState, VMMOUSE_QUEUE_SIZE),
         VMSTATE_UINT16(nb_queue, VMMouseState),
         VMSTATE_UINT16(status, VMMouseState),
diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index f966d0604a..9dd285b923 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -45,6 +45,7 @@
 #include "qemu/bitops.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/log.h"
+#include "qemu/timer.h"
 
 //#define DEBUG_OPENPIC
 
@@ -54,8 +55,10 @@ static const int debug_openpic = 1;
 static const int debug_openpic = 0;
 #endif
 
+static int get_current_cpu(void);
 #define DPRINTF(fmt, ...) do { \
         if (debug_openpic) { \
+            printf("Core%d: ", get_current_cpu()); \
             printf(fmt , ## __VA_ARGS__); \
         } \
     } while (0)
@@ -246,9 +249,31 @@ typedef struct IRQSource {
 #define IDR_EP      0x80000000  /* external pin */
 #define IDR_CI      0x40000000  /* critical interrupt */
 
+/* Convert between openpic clock ticks and nanosecs.  In the hardware the clock
+   frequency is driven by board inputs to the PIC which the PIC would then
+   divide by 4 or 8.  For now hard code to 25MZ.
+*/
+#define OPENPIC_TIMER_FREQ_MHZ 25
+#define OPENPIC_TIMER_NS_PER_TICK (1000 / OPENPIC_TIMER_FREQ_MHZ)
+static inline uint64_t ns_to_ticks(uint64_t ns)
+{
+    return ns    / OPENPIC_TIMER_NS_PER_TICK;
+}
+static inline uint64_t ticks_to_ns(uint64_t ticks)
+{
+    return ticks * OPENPIC_TIMER_NS_PER_TICK;
+}
+
 typedef struct OpenPICTimer {
     uint32_t tccr;  /* Global timer current count register */
     uint32_t tbcr;  /* Global timer base count register */
+    int                   n_IRQ;
+    bool                  qemu_timer_active; /* Is the qemu_timer is running? */
+    struct QEMUTimer     *qemu_timer;
+    struct OpenPICState  *opp;          /* Device timer is part of. */
+    /* The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last
+       current_count written or read, only defined if qemu_timer_active. */
+    uint64_t              origin_time;
 } OpenPICTimer;
 
 typedef struct OpenPICMSI {
@@ -795,6 +820,65 @@ static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len)
     return retval;
 }
 
+static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled);
+
+static void qemu_timer_cb(void *opaque)
+{
+    OpenPICTimer *tmr = opaque;
+    OpenPICState *opp = tmr->opp;
+    uint32_t    n_IRQ = tmr->n_IRQ;
+    uint32_t val =   tmr->tbcr & ~TBCR_CI;
+    uint32_t tog = ((tmr->tccr & TCCR_TOG) ^ TCCR_TOG);  /* invert toggle. */
+
+    DPRINTF("%s n_IRQ=%d\n", __func__, n_IRQ);
+    /* Reload current count from base count and setup timer. */
+    tmr->tccr = val | tog;
+    openpic_tmr_set_tmr(tmr, val, /*enabled=*/true);
+    /* Raise the interrupt. */
+    opp->src[n_IRQ].destmask = read_IRQreg_idr(opp, n_IRQ);
+    openpic_set_irq(opp, n_IRQ, 1);
+    openpic_set_irq(opp, n_IRQ, 0);
+}
+
+/* If enabled is true, arranges for an interrupt to be raised val clocks into
+   the future, if enabled is false cancels the timer. */
+static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled)
+{
+    uint64_t ns = ticks_to_ns(val & ~TCCR_TOG);
+    /* A count of zero causes a timer to be set to expire immediately.  This
+       effectively stops the simulation since the timer is constantly expiring
+       which prevents guest code execution, so we don't honor that
+       configuration.  On real hardware, this situation would generate an
+       interrupt on every clock cycle if the interrupt was unmasked. */
+    if ((ns == 0) || !enabled) {
+        tmr->qemu_timer_active = false;
+        tmr->tccr = tmr->tccr & TCCR_TOG;
+        timer_del(tmr->qemu_timer); /* set timer to never expire. */
+    } else {
+        tmr->qemu_timer_active = true;
+        uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        tmr->origin_time = now;
+        timer_mod(tmr->qemu_timer, now + ns);     /* set timer expiration. */
+    }
+}
+
+/* Returns the currrent tccr value, i.e., timer value (in clocks) with
+   appropriate TOG. */
+static uint64_t openpic_tmr_get_timer(OpenPICTimer *tmr)
+{
+    uint64_t retval;
+    if (!tmr->qemu_timer_active) {
+        retval = tmr->tccr;
+    } else {
+        uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        uint64_t used = now - tmr->origin_time;  /* nsecs */
+        uint32_t used_ticks = (uint32_t)ns_to_ticks(used);
+        uint32_t count = (tmr->tccr & ~TCCR_TOG) - used_ticks;
+        retval = (uint32_t)((tmr->tccr & TCCR_TOG) | (count & ~TCCR_TOG));
+    }
+    return retval;
+}
+
 static void openpic_tmr_write(void *opaque, hwaddr addr, uint64_t val,
                               unsigned len)
 {
@@ -819,10 +903,15 @@ static void openpic_tmr_write(void *opaque, hwaddr addr, uint64_t val,
     case 0x00: /* TCCR */
         break;
     case 0x10: /* TBCR */
-        if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
-            (val & TBCR_CI) == 0 &&
-            (opp->timers[idx].tbcr & TBCR_CI) != 0) {
-            opp->timers[idx].tccr &= ~TCCR_TOG;
+        /* Did the enable status change? */
+        if ((opp->timers[idx].tbcr & TBCR_CI) != (val & TBCR_CI)) {
+            /* Did "Count Inhibit" transition from 1 to 0? */
+            if ((val & TBCR_CI) == 0) {
+                opp->timers[idx].tccr = val & ~TCCR_TOG;
+            }
+            openpic_tmr_set_tmr(&opp->timers[idx],
+                                (val & ~TBCR_CI),
+                                /*enabled=*/((val & TBCR_CI) == 0));
         }
         opp->timers[idx].tbcr = val;
         break;
@@ -854,7 +943,7 @@ static uint64_t openpic_tmr_read(void *opaque, hwaddr addr, unsigned len)
     idx = (addr >> 6) & 0x3;
     switch (addr & 0x30) {
     case 0x00: /* TCCR */
-        retval = opp->timers[idx].tccr;
+        retval = openpic_tmr_get_timer(&opp->timers[idx]);
         break;
     case 0x10: /* TBCR */
         retval = opp->timers[idx].tbcr;
@@ -1136,7 +1225,10 @@ static uint32_t openpic_iack(OpenPICState *opp, IRQDest *dst, int cpu)
         IRQ_resetbit(&dst->raised, irq);
     }
 
-    if ((irq >= opp->irq_ipi0) &&  (irq < (opp->irq_ipi0 + OPENPIC_MAX_IPI))) {
+    /* Timers and IPIs support multicast. */
+    if (((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + OPENPIC_MAX_IPI))) ||
+        ((irq >= opp->irq_tim0) && (irq < (opp->irq_tim0 + OPENPIC_MAX_TMR)))) {
+        DPRINTF("irq is IPI or TMR\n");
         src->destmask &= ~(1 << cpu);
         if (src->destmask && !src->level) {
             /* trigger on CPUs that didn't know about it yet */
@@ -1341,6 +1433,10 @@ static void openpic_reset(DeviceState *d)
     for (i = 0; i < OPENPIC_MAX_TMR; i++) {
         opp->timers[i].tccr = 0;
         opp->timers[i].tbcr = TBCR_CI;
+        if (opp->timers[i].qemu_timer_active) {
+            timer_del(opp->timers[i].qemu_timer);  /* Inhibit timer */
+            opp->timers[i].qemu_timer_active = false;
+        }
     }
     /* Go out of RESET state */
     opp->gcr = 0;
@@ -1391,6 +1487,15 @@ static void fsl_common_init(OpenPICState *opp)
         opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
         opp->src[i].level = false;
     }
+
+    for (i = 0; i < OPENPIC_MAX_TMR; i++) {
+        opp->timers[i].n_IRQ = opp->irq_tim0 + i;
+        opp->timers[i].qemu_timer_active = false;
+        opp->timers[i].qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                                 &qemu_timer_cb,
+                                                 &opp->timers[i]);
+        opp->timers[i].opp = opp;
+    }
 }
 
 static void map_list(OpenPICState *opp, const MemReg *list, int *count)
@@ -1499,7 +1604,7 @@ static const VMStateDescription vmstate_openpic = {
         VMSTATE_UINT32(max_irq, OpenPICState),
         VMSTATE_STRUCT_VARRAY_UINT32(src, OpenPICState, max_irq, 0,
                                      vmstate_openpic_irqsource, IRQSource),
-        VMSTATE_UINT32_EQUAL(nb_cpus, OpenPICState),
+        VMSTATE_UINT32_EQUAL(nb_cpus, OpenPICState, NULL),
         VMSTATE_STRUCT_VARRAY_UINT32(dst, OpenPICState, nb_cpus, 0,
                                      vmstate_openpic_irqdest, IRQDest),
         VMSTATE_STRUCT_ARRAY(timers, OpenPICState, OPENPIC_MAX_TMR, 0,
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 7ccfb53c55..a84ba51ad8 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -344,10 +344,14 @@ static void icp_realize(DeviceState *dev, Error **errp)
     }
 
     qemu_register_reset(icp_reset, dev);
+    vmstate_register(NULL, icp->cs->cpu_index, &vmstate_icp_server, icp);
 }
 
 static void icp_unrealize(DeviceState *dev, Error **errp)
 {
+    ICPState *icp = ICP(dev);
+
+    vmstate_unregister(NULL, &vmstate_icp_server, icp);
     qemu_unregister_reset(icp_reset, dev);
 }
 
@@ -355,7 +359,6 @@ static void icp_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    dc->vmsd = &vmstate_icp_server;
     dc->realize = icp_realize;
     dc->unrealize = icp_unrealize;
 }
@@ -574,7 +577,7 @@ static const VMStateDescription vmstate_ics_simple = {
     .post_load = ics_simple_dispatch_post_load,
     .fields = (VMStateField[]) {
         /* Sanity check */
-        VMSTATE_UINT32_EQUAL(nr_irqs, ICSState),
+        VMSTATE_UINT32_EQUAL(nr_irqs, ICSState, NULL),
 
         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(irqs, ICSState, nr_irqs,
                                              vmstate_ics_simple_irq,
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 20198466f0..08a79c3e3e 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -57,3 +57,4 @@ obj-$(CONFIG_EDU) += edu.o
 obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o
 obj-$(CONFIG_AUX) += auxbus.o
 obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o aspeed_sdmc.o
+obj-y += mmio_interface.o
diff --git a/hw/misc/applesmc.c b/hw/misc/applesmc.c
index 77fab5b9d2..7896812304 100644
--- a/hw/misc/applesmc.c
+++ b/hw/misc/applesmc.c
@@ -39,21 +39,43 @@
 /* #define DEBUG_SMC */
 
 #define APPLESMC_DEFAULT_IOBASE        0x300
-/* data port used by Apple SMC */
-#define APPLESMC_DATA_PORT             0x0
-/* command/status port used by Apple SMC */
-#define APPLESMC_CMD_PORT              0x4
-#define APPLESMC_NR_PORTS              32
 
-#define APPLESMC_READ_CMD              0x10
-#define APPLESMC_WRITE_CMD             0x11
-#define APPLESMC_GET_KEY_BY_INDEX_CMD  0x12
-#define APPLESMC_GET_KEY_TYPE_CMD      0x13
+enum {
+    APPLESMC_DATA_PORT               = 0x00,
+    APPLESMC_CMD_PORT                = 0x04,
+    APPLESMC_ERR_PORT                = 0x1e,
+    APPLESMC_NUM_PORTS               = 0x20,
+};
+
+enum {
+    APPLESMC_READ_CMD                = 0x10,
+    APPLESMC_WRITE_CMD               = 0x11,
+    APPLESMC_GET_KEY_BY_INDEX_CMD    = 0x12,
+    APPLESMC_GET_KEY_TYPE_CMD        = 0x13,
+};
+
+enum {
+    APPLESMC_ST_CMD_DONE             = 0x00,
+    APPLESMC_ST_DATA_READY           = 0x01,
+    APPLESMC_ST_BUSY                 = 0x02,
+    APPLESMC_ST_ACK                  = 0x04,
+    APPLESMC_ST_NEW_CMD              = 0x08,
+};
+
+enum {
+    APPLESMC_ST_1E_CMD_INTRUPTED     = 0x80,
+    APPLESMC_ST_1E_STILL_BAD_CMD     = 0x81,
+    APPLESMC_ST_1E_BAD_CMD           = 0x82,
+    APPLESMC_ST_1E_NOEXIST           = 0x84,
+    APPLESMC_ST_1E_WRITEONLY         = 0x85,
+    APPLESMC_ST_1E_READONLY          = 0x86,
+    APPLESMC_ST_1E_BAD_INDEX         = 0xb8,
+};
 
 #ifdef DEBUG_SMC
 #define smc_debug(...) fprintf(stderr, "AppleSMC: " __VA_ARGS__)
 #else
-#define smc_debug(...) do { } while(0)
+#define smc_debug(...) do { } while (0)
 #endif
 
 static char default_osk[64] = "This is a dummy key. Enter the real key "
@@ -74,15 +96,17 @@ struct AppleSMCState {
 
     MemoryRegion io_data;
     MemoryRegion io_cmd;
+    MemoryRegion io_err;
     uint32_t iobase;
     uint8_t cmd;
     uint8_t status;
-    uint8_t key[4];
+    uint8_t status_1e;
+    uint8_t last_ret;
+    char key[4];
     uint8_t read_pos;
     uint8_t data_len;
     uint8_t data_pos;
     uint8_t data[255];
-    uint8_t charactic[4];
     char *osk;
     QLIST_HEAD(, AppleSMCData) data_def;
 };
@@ -91,89 +115,138 @@ static void applesmc_io_cmd_write(void *opaque, hwaddr addr, uint64_t val,
                                   unsigned size)
 {
     AppleSMCState *s = opaque;
-
-    smc_debug("CMD Write B: %#x = %#x\n", addr, val);
-    switch(val) {
-        case APPLESMC_READ_CMD:
-            s->status = 0x0c;
-            break;
+    uint8_t status = s->status & 0x0f;
+
+    smc_debug("CMD received: 0x%02x\n", (uint8_t)val);
+    switch (val) {
+    case APPLESMC_READ_CMD:
+        /* did last command run through OK? */
+        if (status == APPLESMC_ST_CMD_DONE || status == APPLESMC_ST_NEW_CMD) {
+            s->cmd = val;
+            s->status = APPLESMC_ST_NEW_CMD | APPLESMC_ST_ACK;
+        } else {
+            smc_debug("ERROR: previous command interrupted!\n");
+            s->status = APPLESMC_ST_NEW_CMD;
+            s->status_1e = APPLESMC_ST_1E_CMD_INTRUPTED;
+        }
+        break;
+    default:
+        smc_debug("UNEXPECTED CMD 0x%02x\n", (uint8_t)val);
+        s->status = APPLESMC_ST_NEW_CMD;
+        s->status_1e = APPLESMC_ST_1E_BAD_CMD;
     }
-    s->cmd = val;
     s->read_pos = 0;
     s->data_pos = 0;
 }
 
-static void applesmc_fill_data(AppleSMCState *s)
+static struct AppleSMCData *applesmc_find_key(AppleSMCState *s)
 {
     struct AppleSMCData *d;
 
     QLIST_FOREACH(d, &s->data_def, node) {
         if (!memcmp(d->key, s->key, 4)) {
-            smc_debug("Key matched (%s Len=%d Data=%s)\n", d->key,
-                      d->len, d->data);
-            memcpy(s->data, d->data, d->len);
-            return;
+            return d;
         }
     }
+    return NULL;
 }
 
 static void applesmc_io_data_write(void *opaque, hwaddr addr, uint64_t val,
                                    unsigned size)
 {
     AppleSMCState *s = opaque;
+    struct AppleSMCData *d;
 
-    smc_debug("DATA Write B: %#x = %#x\n", addr, val);
-    switch(s->cmd) {
-        case APPLESMC_READ_CMD:
-            if(s->read_pos < 4) {
-                s->key[s->read_pos] = val;
-                s->status = 0x04;
-            } else if(s->read_pos == 4) {
-                s->data_len = val;
-                s->status = 0x05;
+    smc_debug("DATA received: 0x%02x\n", (uint8_t)val);
+    switch (s->cmd) {
+    case APPLESMC_READ_CMD:
+        if ((s->status & 0x0f) == APPLESMC_ST_CMD_DONE) {
+            break;
+        }
+        if (s->read_pos < 4) {
+            s->key[s->read_pos] = val;
+            s->status = APPLESMC_ST_ACK;
+        } else if (s->read_pos == 4) {
+            d = applesmc_find_key(s);
+            if (d != NULL) {
+                memcpy(s->data, d->data, d->len);
+                s->data_len = d->len;
                 s->data_pos = 0;
-                smc_debug("Key = %c%c%c%c Len = %d\n", s->key[0],
-                          s->key[1], s->key[2], s->key[3], val);
-                applesmc_fill_data(s);
+                s->status = APPLESMC_ST_ACK | APPLESMC_ST_DATA_READY;
+                s->status_1e = APPLESMC_ST_CMD_DONE;  /* clear on valid key */
+            } else {
+                smc_debug("READ_CMD: key '%c%c%c%c' not found!\n",
+                          s->key[0], s->key[1], s->key[2], s->key[3]);
+                s->status = APPLESMC_ST_CMD_DONE;
+                s->status_1e = APPLESMC_ST_1E_NOEXIST;
             }
-            s->read_pos++;
-            break;
+        }
+        s->read_pos++;
+        break;
+    default:
+        s->status = APPLESMC_ST_CMD_DONE;
+        s->status_1e = APPLESMC_ST_1E_STILL_BAD_CMD;
     }
 }
 
-static uint64_t applesmc_io_data_read(void *opaque, hwaddr addr1,
-                                      unsigned size)
+static void applesmc_io_err_write(void *opaque, hwaddr addr, uint64_t val,
+                                  unsigned size)
+{
+    smc_debug("ERR_CODE received: 0x%02x, ignoring!\n", (uint8_t)val);
+    /* NOTE: writing to the error port not supported! */
+}
+
+static uint64_t applesmc_io_data_read(void *opaque, hwaddr addr, unsigned size)
 {
     AppleSMCState *s = opaque;
-    uint8_t retval = 0;
-
-    switch(s->cmd) {
-        case APPLESMC_READ_CMD:
-            if(s->data_pos < s->data_len) {
-                retval = s->data[s->data_pos];
-                smc_debug("READ_DATA[%d] = %#hhx\n", s->data_pos,
-                          retval);
-                s->data_pos++;
-                if(s->data_pos == s->data_len) {
-                    s->status = 0x00;
-                    smc_debug("EOF\n");
-                } else
-                    s->status = 0x05;
+
+    switch (s->cmd) {
+    case APPLESMC_READ_CMD:
+        if (!(s->status & APPLESMC_ST_DATA_READY)) {
+            break;
+        }
+        if (s->data_pos < s->data_len) {
+            s->last_ret = s->data[s->data_pos];
+            smc_debug("READ '%c%c%c%c'[%d] = %02x\n",
+                      s->key[0], s->key[1], s->key[2], s->key[3],
+                      s->data_pos, s->last_ret);
+            s->data_pos++;
+            if (s->data_pos == s->data_len) {
+                s->status = APPLESMC_ST_CMD_DONE;
+                smc_debug("READ '%c%c%c%c' Len=%d complete!\n",
+                          s->key[0], s->key[1], s->key[2], s->key[3],
+                          s->data_len);
+            } else {
+                s->status = APPLESMC_ST_ACK | APPLESMC_ST_DATA_READY;
             }
+        }
+        break;
+    default:
+        s->status = APPLESMC_ST_CMD_DONE;
+        s->status_1e = APPLESMC_ST_1E_STILL_BAD_CMD;
     }
-    smc_debug("DATA Read b: %#x = %#x\n", addr1, retval);
+    smc_debug("DATA sent: 0x%02x\n", s->last_ret);
 
-    return retval;
+    return s->last_ret;
 }
 
-static uint64_t applesmc_io_cmd_read(void *opaque, hwaddr addr1, unsigned size)
+static uint64_t applesmc_io_cmd_read(void *opaque, hwaddr addr, unsigned size)
 {
     AppleSMCState *s = opaque;
 
-    smc_debug("CMD Read B: %#x\n", addr1);
+    smc_debug("CMD sent: 0x%02x\n", s->status);
     return s->status;
 }
 
+static uint64_t applesmc_io_err_read(void *opaque, hwaddr addr, unsigned size)
+{
+    AppleSMCState *s = opaque;
+
+    /* NOTE: read does not clear the 1e status */
+    smc_debug("ERR_CODE sent: 0x%02x\n", s->status_1e);
+    return s->status_1e;
+}
+
 static void applesmc_add_key(AppleSMCState *s, const char *key,
                              int len, const char *data)
 {
@@ -196,6 +269,9 @@ static void qdev_applesmc_isa_reset(DeviceState *dev)
     QLIST_FOREACH_SAFE(d, &s->data_def, node, next) {
         QLIST_REMOVE(d, node);
     }
+    s->status = 0x00;
+    s->status_1e = 0x00;
+    s->last_ret = 0x00;
 
     applesmc_add_key(s, "REV ", 6, "\x01\x13\x0f\x00\x00\x03");
     applesmc_add_key(s, "OSK0", 32, s->osk);
@@ -225,20 +301,35 @@ static const MemoryRegionOps applesmc_cmd_io_ops = {
     },
 };
 
+static const MemoryRegionOps applesmc_err_io_ops = {
+    .write = applesmc_io_err_write,
+    .read = applesmc_io_err_read,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
 static void applesmc_isa_realize(DeviceState *dev, Error **errp)
 {
     AppleSMCState *s = APPLE_SMC(dev);
 
     memory_region_init_io(&s->io_data, OBJECT(s), &applesmc_data_io_ops, s,
-                          "applesmc-data", 4);
+                          "applesmc-data", 1);
     isa_register_ioport(&s->parent_obj, &s->io_data,
                         s->iobase + APPLESMC_DATA_PORT);
 
     memory_region_init_io(&s->io_cmd, OBJECT(s), &applesmc_cmd_io_ops, s,
-                          "applesmc-cmd", 4);
+                          "applesmc-cmd", 1);
     isa_register_ioport(&s->parent_obj, &s->io_cmd,
                         s->iobase + APPLESMC_CMD_PORT);
 
+    memory_region_init_io(&s->io_err, OBJECT(s), &applesmc_err_io_ops, s,
+                          "applesmc-err", 1);
+    isa_register_ioport(&s->parent_obj, &s->io_err,
+                        s->iobase + APPLESMC_ERR_PORT);
+
     if (!s->osk || (strlen(s->osk) != 64)) {
         fprintf(stderr, "WARNING: Using AppleSMC with invalid key\n");
         s->osk = default_osk;
diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c
index 2a277bdb86..6dbdc03677 100644
--- a/hw/misc/max111x.c
+++ b/hw/misc/max111x.c
@@ -116,7 +116,7 @@ static const VMStateDescription vmstate_max111x = {
         VMSTATE_UINT8(tb1, MAX111xState),
         VMSTATE_UINT8(rb2, MAX111xState),
         VMSTATE_UINT8(rb3, MAX111xState),
-        VMSTATE_INT32_EQUAL(inputs, MAX111xState),
+        VMSTATE_INT32_EQUAL(inputs, MAX111xState, NULL),
         VMSTATE_INT32(com, MAX111xState),
         VMSTATE_ARRAY_INT32_UNSAFE(input, MAX111xState, inputs,
                                    vmstate_info_uint8, uint8_t),
diff --git a/hw/misc/mmio_interface.c b/hw/misc/mmio_interface.c
new file mode 100644
index 0000000000..6f004d2bab
--- /dev/null
+++ b/hw/misc/mmio_interface.c
@@ -0,0 +1,128 @@
+/*
+ * mmio_interface.c
+ *
+ *  Copyright (C) 2017 : GreenSocs
+ *      http://www.greensocs.com/ , email: info@greensocs.com
+ *
+ *  Developed by :
+ *  Frederic Konrad   <fred.konrad@greensocs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option)any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "hw/qdev-properties.h"
+#include "hw/misc/mmio_interface.h"
+#include "qapi/error.h"
+
+#ifndef DEBUG_MMIO_INTERFACE
+#define DEBUG_MMIO_INTERFACE 0
+#endif
+
+static uint64_t mmio_interface_counter;
+
+#define DPRINTF(fmt, ...) do {                                                 \
+    if (DEBUG_MMIO_INTERFACE) {                                                \
+        qemu_log("mmio_interface: 0x%" PRIX64 ": " fmt, s->id, ## __VA_ARGS__);\
+    }                                                                          \
+} while (0);
+
+static void mmio_interface_init(Object *obj)
+{
+    MMIOInterface *s = MMIO_INTERFACE(obj);
+
+    if (DEBUG_MMIO_INTERFACE) {
+        s->id = mmio_interface_counter++;
+    }
+
+    DPRINTF("interface created\n");
+    s->host_ptr = 0;
+    s->subregion = 0;
+}
+
+static void mmio_interface_realize(DeviceState *dev, Error **errp)
+{
+    MMIOInterface *s = MMIO_INTERFACE(dev);
+
+    DPRINTF("realize from 0x%" PRIX64 " to 0x%" PRIX64 " map host pointer"
+            " %p\n", s->start, s->end, s->host_ptr);
+
+    if (!s->host_ptr) {
+        error_setg(errp, "host_ptr property must be set");
+    }
+
+    if (!s->subregion) {
+        error_setg(errp, "subregion property must be set");
+    }
+
+    memory_region_init_ram_ptr(&s->ram_mem, OBJECT(s), "ram",
+                               s->end - s->start + 1, s->host_ptr);
+    memory_region_set_readonly(&s->ram_mem, s->ro);
+    memory_region_add_subregion(s->subregion, s->start, &s->ram_mem);
+}
+
+static void mmio_interface_unrealize(DeviceState *dev, Error **errp)
+{
+    MMIOInterface *s = MMIO_INTERFACE(dev);
+
+    DPRINTF("unrealize from 0x%" PRIX64 " to 0x%" PRIX64 " map host pointer"
+            " %p\n", s->start, s->end, s->host_ptr);
+    memory_region_del_subregion(s->subregion, &s->ram_mem);
+}
+
+static void mmio_interface_finalize(Object *obj)
+{
+    MMIOInterface *s = MMIO_INTERFACE(obj);
+
+    DPRINTF("finalize from 0x%" PRIX64 " to 0x%" PRIX64 " map host pointer"
+            " %p\n", s->start, s->end, s->host_ptr);
+    object_unparent(OBJECT(&s->ram_mem));
+}
+
+static Property mmio_interface_properties[] = {
+    DEFINE_PROP_UINT64("start", MMIOInterface, start, 0),
+    DEFINE_PROP_UINT64("end", MMIOInterface, end, 0),
+    DEFINE_PROP_PTR("host_ptr", MMIOInterface, host_ptr),
+    DEFINE_PROP_BOOL("ro", MMIOInterface, ro, false),
+    DEFINE_PROP_MEMORY_REGION("subregion", MMIOInterface, subregion),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void mmio_interface_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = mmio_interface_realize;
+    dc->unrealize = mmio_interface_unrealize;
+    dc->props = mmio_interface_properties;
+}
+
+static const TypeInfo mmio_interface_info = {
+    .name          = TYPE_MMIO_INTERFACE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(MMIOInterface),
+    .instance_init = mmio_interface_init,
+    .instance_finalize = mmio_interface_finalize,
+    .class_init    = mmio_interface_class_init,
+};
+
+static void mmio_interface_register_types(void)
+{
+    type_register_static(&mmio_interface_info);
+}
+
+type_init(mmio_interface_register_types)
diff --git a/hw/nvram/eeprom93xx.c b/hw/nvram/eeprom93xx.c
index 848692abc0..2fd0e3c29f 100644
--- a/hw/nvram/eeprom93xx.c
+++ b/hw/nvram/eeprom93xx.c
@@ -143,7 +143,7 @@ static const VMStateDescription vmstate_eeprom = {
         VMSTATE_UINT8(addrbits, eeprom_t),
         VMSTATE_UINT16_HACK_TEST(size, eeprom_t, is_old_eeprom_version),
         VMSTATE_UNUSED_TEST(is_old_eeprom_version, 1),
-        VMSTATE_UINT16_EQUAL_V(size, eeprom_t, EEPROM_VERSION),
+        VMSTATE_UINT16_EQUAL_V(size, eeprom_t, EEPROM_VERSION, NULL),
         VMSTATE_UINT16(data, eeprom_t),
         VMSTATE_VARRAY_UINT16_UNSAFE(contents, eeprom_t, size, 0,
                                      vmstate_info_uint16, uint16_t),
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 98ccc27533..b7fee4bdf2 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -74,7 +74,7 @@ static const VMStateDescription vmstate_pcibus = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_INT32_EQUAL(nirq, PCIBus),
+        VMSTATE_INT32_EQUAL(nirq, PCIBus, NULL),
         VMSTATE_VARRAY_INT32(irq_count, PCIBus,
                              nirq, 0, vmstate_info_int32,
                              int32_t),
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 828052b0c0..97200742b4 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -813,7 +813,7 @@ const VMStateDescription vmstate_pcie_aer_log = {
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT16(log_num, PCIEAERLog),
-        VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog),
+        VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL),
         VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
                               vmstate_pcie_aer_err, PCIEAERErr),
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index d16646c95d..36d3dcd89a 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -36,7 +36,6 @@
 #include "hw/pci/pci_host.h"
 #include "hw/ppc/ppc.h"
 #include "hw/boards.h"
-#include "hw/audio/soundhw.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
 #include "hw/ide.h"
@@ -782,9 +781,6 @@ static void ibm_40p_init(MachineState *machine)
     qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL,
                        &cmos_checksum);
 
-    /* initialize audio subsystem */
-    soundhw_init();
-
     /* add some more devices */
     if (defaults_enabled()) {
         isa_create_simple(isa_bus, "i8042");
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ede5167bc0..0ee9fac50b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -127,9 +127,49 @@ error:
     return NULL;
 }
 
+static bool pre_2_10_vmstate_dummy_icp_needed(void *opaque)
+{
+    /* Dummy entries correspond to unused ICPState objects in older QEMUs,
+     * and newer QEMUs don't even have them. In both cases, we don't want
+     * to send anything on the wire.
+     */
+    return false;
+}
+
+static const VMStateDescription pre_2_10_vmstate_dummy_icp = {
+    .name = "icp/server",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = pre_2_10_vmstate_dummy_icp_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UNUSED(4), /* uint32_t xirr */
+        VMSTATE_UNUSED(1), /* uint8_t pending_priority */
+        VMSTATE_UNUSED(1), /* uint8_t mfrr */
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void pre_2_10_vmstate_register_dummy_icp(int i)
+{
+    vmstate_register(NULL, i, &pre_2_10_vmstate_dummy_icp,
+                     (void *)(uintptr_t) i);
+}
+
+static void pre_2_10_vmstate_unregister_dummy_icp(int i)
+{
+    vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp,
+                       (void *)(uintptr_t) i);
+}
+
+static inline int xics_max_server_number(void)
+{
+    return DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), smp_threads);
+}
+
 static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
 
     if (kvm_enabled()) {
         if (machine_kernel_irqchip_allowed(machine) &&
@@ -151,6 +191,17 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
             return;
         }
     }
+
+    if (smc->pre_2_10_has_unused_icps) {
+        int i;
+
+        for (i = 0; i < xics_max_server_number(); i++) {
+            /* Dummy entries get deregistered when real ICPState objects
+             * are registered during CPU core hotplug.
+             */
+            pre_2_10_vmstate_register_dummy_icp(i);
+        }
+    }
 }
 
 static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -979,7 +1030,6 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
     void *fdt;
     sPAPRPHBState *phb;
     char *buf;
-    int smt = kvmppc_smt_threads();
 
     fdt = g_malloc0(FDT_MAX_SIZE);
     _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
@@ -1019,7 +1069,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
     _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
 
     /* /interrupt controller */
-    spapr_dt_xics(DIV_ROUND_UP(max_cpus * smt, smp_threads), fdt, PHANDLE_XICP);
+    spapr_dt_xics(xics_max_server_number(), fdt, PHANDLE_XICP);
 
     ret = spapr_populate_memory(spapr, fdt);
     if (ret < 0) {
@@ -1326,7 +1376,6 @@ static void ppc_spapr_reset(void)
          * Set the GR bit in PATB so that we know there is no HPT. */
         spapr->patb_entry = PATBE1_GR;
     } else {
-        spapr->patb_entry = 0;
         spapr_setup_hpt_and_vrma(spapr);
     }
 
@@ -1346,6 +1395,8 @@ static void ppc_spapr_reset(void)
     if (!spapr->cas_reboot) {
         spapr_ovec_cleanup(spapr->ov5_cas);
         spapr->ov5_cas = spapr_ovec_new();
+
+        ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
     }
 
     fdt = spapr_build_fdt(spapr, rtas_addr, spapr->rtas_size);
@@ -1443,6 +1494,18 @@ static int spapr_post_load(void *opaque, int version_id)
         err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset);
     }
 
+    if (spapr->patb_entry) {
+        PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+        bool radix = !!(spapr->patb_entry & PATBE1_GR);
+        bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE);
+
+        err = kvmppc_configure_v3_mmu(cpu, radix, gtse, spapr->patb_entry);
+        if (err) {
+            error_report("Process table config unsupported by the host");
+            return -EINVAL;
+        }
+    }
+
     return err;
 }
 
@@ -1558,13 +1621,19 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
     sPAPRMachineState *spapr = opaque;
 
     /* "Iteration" header */
-    qemu_put_be32(f, spapr->htab_shift);
+    if (!spapr->htab_shift) {
+        qemu_put_be32(f, -1);
+    } else {
+        qemu_put_be32(f, spapr->htab_shift);
+    }
 
     if (spapr->htab) {
         spapr->htab_save_index = 0;
         spapr->htab_first_pass = true;
     } else {
-        assert(kvm_enabled());
+        if (spapr->htab_shift) {
+            assert(kvm_enabled());
+        }
     }
 
 
@@ -1710,7 +1779,12 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
     int rc = 0;
 
     /* Iteration header */
-    qemu_put_be32(f, 0);
+    if (!spapr->htab_shift) {
+        qemu_put_be32(f, -1);
+        return 0;
+    } else {
+        qemu_put_be32(f, 0);
+    }
 
     if (!spapr->htab) {
         assert(kvm_enabled());
@@ -1744,7 +1818,12 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
     int fd;
 
     /* Iteration header */
-    qemu_put_be32(f, 0);
+    if (!spapr->htab_shift) {
+        qemu_put_be32(f, -1);
+        return 0;
+    } else {
+        qemu_put_be32(f, 0);
+    }
 
     if (!spapr->htab) {
         int rc;
@@ -1788,6 +1867,11 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
 
     section_hdr = qemu_get_be32(f);
 
+    if (section_hdr == -1) {
+        spapr_free_hpt(spapr);
+        return 0;
+    }
+
     if (section_hdr) {
         Error *local_err = NULL;
 
@@ -2131,7 +2215,7 @@ static void ppc_spapr_init(MachineState *machine)
         machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu;
     }
 
-    ppc_cpu_parse_features(machine->cpu_model);
+    spapr_cpu_parse_features(spapr);
 
     spapr_init_cpus(spapr);
 
@@ -2503,6 +2587,10 @@ static void spapr_machine_initfn(Object *obj)
                                     " place of standard EPOW events when possible"
                                     " (required for memory hot-unplug support)",
                                     NULL);
+
+    ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr,
+                            "Maximum permitted CPU compatibility mode",
+                            &error_fatal);
 }
 
 static void spapr_machine_finalizefn(Object *obj)
@@ -2548,12 +2636,6 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
 
         spapr_drc_attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp);
         addr += SPAPR_MEMORY_BLOCK_SIZE;
-        if (!dev->hotplugged) {
-            sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-            /* guests expect coldplugged LMBs to be pre-allocated */
-            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
-            drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
-        }
     }
     /* send hotplug notification to the
      * guest only in case of hotplugged memory
@@ -2806,9 +2888,24 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
                               Error **errp)
 {
     MachineState *ms = MACHINE(qdev_get_machine());
+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
     CPUCore *cc = CPU_CORE(dev);
     CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
 
+    if (smc->pre_2_10_has_unused_icps) {
+        sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
+        sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(cc));
+        const char *typename = object_class_get_name(scc->cpu_class);
+        size_t size = object_type_get_instance_size(typename);
+        int i;
+
+        for (i = 0; i < cc->nr_threads; i++) {
+            CPUState *cs = CPU(sc->threads + i * size);
+
+            pre_2_10_vmstate_register_dummy_icp(cs->cpu_index);
+        }
+    }
+
     assert(core_slot);
     core_slot->cpu = NULL;
     object_unparent(OBJECT(dev));
@@ -2860,6 +2957,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
     MachineClass *mc = MACHINE_GET_CLASS(spapr);
+    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
     sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
     CPUCore *cc = CPU_CORE(dev);
     CPUState *cs = CPU(core->threads);
@@ -2905,17 +3003,23 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
          * of hotplugged CPUs.
          */
         spapr_hotplug_req_add_by_index(drc);
-    } else {
-        /*
-         * Set the right DRC states for cold plugged CPU.
-         */
-        if (drc) {
-            sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
-            drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
-        }
     }
     core_slot->cpu = OBJECT(dev);
+
+    if (smc->pre_2_10_has_unused_icps) {
+        sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(cc));
+        const char *typename = object_class_get_name(scc->cpu_class);
+        size_t size = object_type_get_instance_size(typename);
+        int i;
+
+        for (i = 0; i < cc->nr_threads; i++) {
+            sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev);
+            void *obj = sc->threads + i * size;
+
+            cs = CPU(obj);
+            pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index);
+        }
+    }
 }
 
 static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
@@ -3356,7 +3460,12 @@ DEFINE_SPAPR_MACHINE(2_10, "2.10", true);
  * pseries-2.9
  */
 #define SPAPR_COMPAT_2_9                                               \
-    HW_COMPAT_2_9
+    HW_COMPAT_2_9                                                      \
+    {                                                                  \
+        .driver = TYPE_POWERPC_CPU,                                    \
+        .property = "pre-2.10-migration",                              \
+        .value    = "on",                                              \
+    },                                                                 \
 
 static void spapr_machine_2_9_instance_options(MachineState *machine)
 {
@@ -3365,9 +3474,12 @@ static void spapr_machine_2_9_instance_options(MachineState *machine)
 
 static void spapr_machine_2_9_class_options(MachineClass *mc)
 {
+    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
     spapr_machine_2_10_class_options(mc);
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
     mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
+    smc->pre_2_10_has_unused_icps = true;
 }
 
 DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
@@ -3580,9 +3692,6 @@ DEFINE_SPAPR_MACHINE(2_4, "2.4", false);
 static void spapr_machine_2_3_instance_options(MachineState *machine)
 {
     spapr_machine_2_4_instance_options(machine);
-    savevm_skip_section_footers();
-    global_state_set_optional();
-    savevm_skip_configuration();
 }
 
 static void spapr_machine_2_3_class_options(MachineClass *mc)
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 9fb896b407..ea278ce2a7 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -20,6 +20,57 @@
 #include "sysemu/numa.h"
 #include "qemu/error-report.h"
 
+void spapr_cpu_parse_features(sPAPRMachineState *spapr)
+{
+    /*
+     * Backwards compatibility hack:
+     *
+     *   CPUs had a "compat=" property which didn't make sense for
+     *   anything except pseries.  It was replaced by "max-cpu-compat"
+     *   machine option.  This supports old command lines like
+     *       -cpu POWER8,compat=power7
+     *   By stripping the compat option and applying it to the machine
+     *   before passing it on to the cpu level parser.
+     */
+    gchar **inpieces;
+    int i, j;
+    gchar *compat_str = NULL;
+
+    inpieces = g_strsplit(MACHINE(spapr)->cpu_model, ",", 0);
+
+    /* inpieces[0] is the actual model string */
+    i = 1;
+    j = 1;
+    while (inpieces[i]) {
+        if (g_str_has_prefix(inpieces[i], "compat=")) {
+            /* in case of multiple compat= options */
+            g_free(compat_str);
+            compat_str = inpieces[i];
+        } else {
+            j++;
+        }
+
+        i++;
+        /* Excise compat options from list */
+        inpieces[j] = inpieces[i];
+    }
+
+    if (compat_str) {
+        char *val = compat_str + strlen("compat=");
+        gchar *newprops = g_strjoinv(",", inpieces);
+
+        object_property_set_str(OBJECT(spapr), val, "max-cpu-compat",
+                                &error_fatal);
+
+        ppc_cpu_parse_features(newprops);
+        g_free(newprops);
+    } else {
+        ppc_cpu_parse_features(MACHINE(spapr)->cpu_model);
+    }
+
+    g_strfreev(inpieces);
+}
+
 static void spapr_cpu_reset(void *opaque)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
@@ -67,16 +118,6 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
     /* Enable PAPR mode in TCG or KVM */
     cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
 
-    if (cpu->max_compat) {
-        Error *local_err = NULL;
-
-        ppc_set_compat(cpu, cpu->max_compat, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-    }
-
     qemu_register_reset(spapr_cpu_reset, cpu);
     spapr_cpu_reset(cpu);
 }
@@ -137,7 +178,7 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp)
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     CPUState *cs = CPU(child);
     PowerPCCPU *cpu = POWERPC_CPU(cs);
-    Object *obj = NULL;
+    Object *obj;
 
     object_property_set_bool(child, true, "realized", &local_err);
     if (local_err) {
@@ -157,13 +198,14 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp)
     object_property_add_const_link(obj, ICP_PROP_CPU, child, &error_abort);
     object_property_set_bool(obj, true, "realized", &local_err);
     if (local_err) {
-        goto error;
+        goto free_icp;
     }
 
     return;
 
-error:
+free_icp:
     object_unparent(obj);
+error:
     error_propagate(errp, local_err);
 }
 
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 5cb75bbf34..bd40b84cfc 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -46,30 +46,64 @@ uint32_t spapr_drc_index(sPAPRDRConnector *drc)
         | (drc->id & DRC_INDEX_ID_MASK);
 }
 
-static uint32_t set_isolation_state(sPAPRDRConnector *drc,
-                                    sPAPRDRIsolationState state)
+static uint32_t drc_isolate_physical(sPAPRDRConnector *drc)
 {
-    trace_spapr_drc_set_isolation_state(spapr_drc_index(drc), state);
-
     /* if the guest is configuring a device attached to this DRC, we
      * should reset the configuration state at this point since it may
      * no longer be reliable (guest released device and needs to start
      * over, or unplug occurred so the FDT is no longer valid)
      */
-    if (state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
-        g_free(drc->ccs);
-        drc->ccs = NULL;
-    }
+    g_free(drc->ccs);
+    drc->ccs = NULL;
 
-    if (state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) {
-        /* cannot unisolate a non-existent resource, and, or resources
-         * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, 13.5.3.5)
-         */
-        if (!drc->dev ||
-            drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
-            return RTAS_OUT_NO_SUCH_INDICATOR;
+    drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
+
+    /* if we're awaiting release, but still in an unconfigured state,
+     * it's likely the guest is still in the process of configuring
+     * the device and is transitioning the devices to an ISOLATED
+     * state as a part of that process. so we only complete the
+     * removal when this transition happens for a device in a
+     * configured state, as suggested by the state diagram from PAPR+
+     * 2.7, 13.4
+     */
+    if (drc->awaiting_release) {
+        uint32_t drc_index = spapr_drc_index(drc);
+        if (drc->configured) {
+            trace_spapr_drc_set_isolation_state_finalizing(drc_index);
+            spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
+        } else {
+            trace_spapr_drc_set_isolation_state_deferring(drc_index);
         }
     }
+    drc->configured = false;
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc)
+{
+    /* cannot unisolate a non-existent resource, and, or resources
+     * which are in an 'UNUSABLE' allocation state. (PAPR 2.7,
+     * 13.5.3.5)
+     */
+    if (!drc->dev) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+
+    drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_isolate_logical(sPAPRDRConnector *drc)
+{
+    /* if the guest is configuring a device attached to this DRC, we
+     * should reset the configuration state at this point since it may
+     * no longer be reliable (guest released device and needs to start
+     * over, or unplug occurred so the FDT is no longer valid)
+     */
+    g_free(drc->ccs);
+    drc->ccs = NULL;
 
     /*
      * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
@@ -81,66 +115,87 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
      * If the LMB being removed doesn't belong to a DIMM device that is
      * actually being unplugged, fail the isolation request here.
      */
-    if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
-        if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
-             !drc->awaiting_release) {
-            return RTAS_OUT_HW_ERROR;
-        }
+    if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB
+        && !drc->awaiting_release) {
+        return RTAS_OUT_HW_ERROR;
     }
 
-    drc->isolation_state = state;
+    drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
 
-    if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
-        /* if we're awaiting release, but still in an unconfigured state,
-         * it's likely the guest is still in the process of configuring
-         * the device and is transitioning the devices to an ISOLATED
-         * state as a part of that process. so we only complete the
-         * removal when this transition happens for a device in a
-         * configured state, as suggested by the state diagram from
-         * PAPR+ 2.7, 13.4
-         */
-        if (drc->awaiting_release) {
-            uint32_t drc_index = spapr_drc_index(drc);
-            if (drc->configured) {
-                trace_spapr_drc_set_isolation_state_finalizing(drc_index);
-                spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
-            } else {
-                trace_spapr_drc_set_isolation_state_deferring(drc_index);
-            }
+    /* if we're awaiting release, but still in an unconfigured state,
+     * it's likely the guest is still in the process of configuring
+     * the device and is transitioning the devices to an ISOLATED
+     * state as a part of that process. so we only complete the
+     * removal when this transition happens for a device in a
+     * configured state, as suggested by the state diagram from PAPR+
+     * 2.7, 13.4
+     */
+    if (drc->awaiting_release) {
+        uint32_t drc_index = spapr_drc_index(drc);
+        if (drc->configured) {
+            trace_spapr_drc_set_isolation_state_finalizing(drc_index);
+            spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
+        } else {
+            trace_spapr_drc_set_isolation_state_deferring(drc_index);
         }
-        drc->configured = false;
     }
+    drc->configured = false;
 
     return RTAS_OUT_SUCCESS;
 }
 
-static uint32_t set_allocation_state(sPAPRDRConnector *drc,
-                                     sPAPRDRAllocationState state)
+static uint32_t drc_unisolate_logical(sPAPRDRConnector *drc)
 {
-    trace_spapr_drc_set_allocation_state(spapr_drc_index(drc), state);
+    /* cannot unisolate a non-existent resource, and, or resources
+     * which are in an 'UNUSABLE' allocation state. (PAPR 2.7,
+     * 13.5.3.5)
+     */
+    if (!drc->dev ||
+        drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+
+    drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
+
+    return RTAS_OUT_SUCCESS;
+}
 
-    if (state == SPAPR_DR_ALLOCATION_STATE_USABLE) {
-        /* if there's no resource/device associated with the DRC, there's
-         * no way for us to put it in an allocation state consistent with
-         * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should
-         * result in an RTAS return code of -3 / "no such indicator"
+static uint32_t drc_set_usable(sPAPRDRConnector *drc)
+{
+    /* if there's no resource/device associated with the DRC, there's
+     * no way for us to put it in an allocation state consistent with
+     * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should
+     * result in an RTAS return code of -3 / "no such indicator"
+     */
+    if (!drc->dev) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+    if (drc->awaiting_release && drc->awaiting_allocation) {
+        /* kernel is acknowledging a previous hotplug event
+         * while we are already removing it.
+         * it's safe to ignore awaiting_allocation here since we know the
+         * situation is predicated on the guest either already having done
+         * so (boot-time hotplug), or never being able to acquire in the
+         * first place (hotplug followed by immediate unplug).
          */
-        if (!drc->dev) {
-            return RTAS_OUT_NO_SUCH_INDICATOR;
-        }
+        return RTAS_OUT_NO_SUCH_INDICATOR;
     }
 
-    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
-        drc->allocation_state = state;
-        if (drc->awaiting_release &&
-            drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
-            uint32_t drc_index = spapr_drc_index(drc);
-            trace_spapr_drc_set_allocation_state_finalizing(drc_index);
-            spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
-        } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) {
-            drc->awaiting_allocation = false;
-        }
+    drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
+    drc->awaiting_allocation = false;
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_set_unusable(sPAPRDRConnector *drc)
+{
+    drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE;
+    if (drc->awaiting_release) {
+        uint32_t drc_index = spapr_drc_index(drc);
+        trace_spapr_drc_set_allocation_state_finalizing(drc_index);
+        spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
     }
+
     return RTAS_OUT_SUCCESS;
 }
 
@@ -172,12 +227,6 @@ static const char *spapr_drc_name(sPAPRDRConnector *drc)
     return g_strdup_printf("%s%d", drck->drc_name_prefix, drc->id);
 }
 
-/* has the guest been notified of device attachment? */
-static void set_signalled(sPAPRDRConnector *drc)
-{
-    drc->signalled = true;
-}
-
 /*
  * dr-entity-sense sensor value
  * returned via get-sensor-state RTAS calls
@@ -304,33 +353,12 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
     }
     g_assert(fdt || coldplug);
 
-    /* NOTE: setting initial isolation state to UNISOLATED means we can't
-     * detach unless guest has a userspace/kernel that moves this state
-     * back to ISOLATED in response to an unplug event, or this is done
-     * manually by the admin prior. if we force things while the guest
-     * may be accessing the device, we can easily crash the guest, so we
-     * we defer completion of removal in such cases to the reset() hook.
-     */
-    if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) {
-        drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
-    }
     drc->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE;
 
     drc->dev = d;
     drc->fdt = fdt;
     drc->fdt_start_offset = fdt_start_offset;
     drc->configured = coldplug;
-    /* 'logical' DR resources such as memory/cpus are in some cases treated
-     * as a pool of resources from which the guest is free to choose from
-     * based on only a count. for resources that can be assigned in this
-     * fashion, we must assume the resource is signalled immediately
-     * since a single hotplug request might make an arbitrary number of
-     * such attached resources available to the guest, as opposed to
-     * 'physical' DR resources such as PCI where each device/resource is
-     * signalled individually.
-     */
-    drc->signalled = (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI)
-                     ? true : coldplug;
 
     if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
         drc->awaiting_allocation = true;
@@ -342,49 +370,8 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
                              NULL, 0, NULL);
 }
 
-void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
+static void spapr_drc_release(sPAPRDRConnector *drc)
 {
-    trace_spapr_drc_detach(spapr_drc_index(drc));
-
-    /* if we've signalled device presence to the guest, or if the guest
-     * has gone ahead and configured the device (via manually-executed
-     * device add via drmgr in guest, namely), we need to wait
-     * for the guest to quiesce the device before completing detach.
-     * Otherwise, we can assume the guest hasn't seen it and complete the
-     * detach immediately. Note that there is a small race window
-     * just before, or during, configuration, which is this context
-     * refers mainly to fetching the device tree via RTAS.
-     * During this window the device access will be arbitrated by
-     * associated DRC, which will simply fail the RTAS calls as invalid.
-     * This is recoverable within guest and current implementations of
-     * drmgr should be able to cope.
-     */
-    if (!drc->signalled && !drc->configured) {
-        /* if the guest hasn't seen the device we can't rely on it to
-         * set it back to an isolated state via RTAS, so do it here manually
-         */
-        drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
-    }
-
-    if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
-        trace_spapr_drc_awaiting_isolated(spapr_drc_index(drc));
-        drc->awaiting_release = true;
-        return;
-    }
-
-    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI &&
-        drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
-        trace_spapr_drc_awaiting_unusable(spapr_drc_index(drc));
-        drc->awaiting_release = true;
-        return;
-    }
-
-    if (drc->awaiting_allocation) {
-        drc->awaiting_release = true;
-        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
-        return;
-    }
-
     drc->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
 
     /* Calling release callbacks based on spapr_drc_type(drc). */
@@ -412,6 +399,32 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
     drc->dev = NULL;
 }
 
+void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
+{
+    trace_spapr_drc_detach(spapr_drc_index(drc));
+
+    if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
+        trace_spapr_drc_awaiting_isolated(spapr_drc_index(drc));
+        drc->awaiting_release = true;
+        return;
+    }
+
+    if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI &&
+        drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
+        trace_spapr_drc_awaiting_unusable(spapr_drc_index(drc));
+        drc->awaiting_release = true;
+        return;
+    }
+
+    if (drc->awaiting_allocation) {
+        drc->awaiting_release = true;
+        trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
+        return;
+    }
+
+    spapr_drc_release(drc);
+}
+
 static bool release_pending(sPAPRDRConnector *drc)
 {
     return drc->awaiting_release;
@@ -420,7 +433,6 @@ static bool release_pending(sPAPRDRConnector *drc)
 static void reset(DeviceState *d)
 {
     sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d);
-    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
     trace_spapr_drc_reset(spapr_drc_index(drc));
 
@@ -428,32 +440,26 @@ static void reset(DeviceState *d)
     drc->ccs = NULL;
 
     /* immediately upon reset we can safely assume DRCs whose devices
-     * are pending removal can be safely removed, and that they will
-     * subsequently be left in an ISOLATED state. move the DRC to this
-     * state in these cases (which will in turn complete any pending
-     * device removals)
+     * are pending removal can be safely removed.
      */
     if (drc->awaiting_release) {
-        drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_ISOLATED);
-        /* generally this should also finalize the removal, but if the device
-         * hasn't yet been configured we normally defer removal under the
-         * assumption that this transition is taking place as part of device
-         * configuration. so check if we're still waiting after this, and
-         * force removal if we are
-         */
-        if (drc->awaiting_release) {
-            spapr_drc_detach(drc, DEVICE(drc->dev), NULL);
-        }
-
-        /* non-PCI devices may be awaiting a transition to UNUSABLE */
-        if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI &&
-            drc->awaiting_release) {
-            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_UNUSABLE);
-        }
+        spapr_drc_release(drc);
     }
 
-    if (drck->dr_entity_sense(drc) == SPAPR_DR_ENTITY_SENSE_PRESENT) {
-        drck->set_signalled(drc);
+    drc->awaiting_allocation = false;
+
+    if (drc->dev) {
+        /* A device present at reset is coldplugged */
+        drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
+        if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
+            drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
+        }
+    } else {
+        /* Otherwise device is absent, but might be hotplugged */
+        drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED;
+        if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
+            drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE;
+        }
     }
 }
 
@@ -479,7 +485,7 @@ static bool spapr_drc_needed(void *opaque)
     case SPAPR_DR_CONNECTOR_TYPE_LMB:
         rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) &&
                (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) &&
-               drc->configured && drc->signalled && !drc->awaiting_release);
+               drc->configured && !drc->awaiting_release);
         break;
     case SPAPR_DR_CONNECTOR_TYPE_PHB:
     case SPAPR_DR_CONNECTOR_TYPE_VIO:
@@ -501,7 +507,6 @@ static const VMStateDescription vmstate_spapr_drc = {
         VMSTATE_BOOL(configured, sPAPRDRConnector),
         VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
         VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
-        VMSTATE_BOOL(signalled, sPAPRDRConnector),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -596,10 +601,7 @@ static void spapr_dr_connector_class_init(ObjectClass *k, void *data)
     dk->reset = reset;
     dk->realize = realize;
     dk->unrealize = unrealize;
-    drck->set_isolation_state = set_isolation_state;
-    drck->set_allocation_state = set_allocation_state;
     drck->release_pending = release_pending;
-    drck->set_signalled = set_signalled;
     /*
      * Reason: it crashes FIXME find and document the real reason
      */
@@ -611,6 +613,8 @@ static void spapr_drc_physical_class_init(ObjectClass *k, void *data)
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
 
     drck->dr_entity_sense = physical_entity_sense;
+    drck->isolate = drc_isolate_physical;
+    drck->unisolate = drc_unisolate_physical;
 }
 
 static void spapr_drc_logical_class_init(ObjectClass *k, void *data)
@@ -618,6 +622,8 @@ static void spapr_drc_logical_class_init(ObjectClass *k, void *data)
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
 
     drck->dr_entity_sense = logical_entity_sense;
+    drck->isolate = drc_isolate_logical;
+    drck->unisolate = drc_unisolate_logical;
 }
 
 static void spapr_drc_cpu_class_init(ObjectClass *k, void *data)
@@ -858,24 +864,45 @@ static uint32_t rtas_set_isolation_state(uint32_t idx, uint32_t state)
     sPAPRDRConnectorClass *drck;
 
     if (!drc) {
-        return RTAS_OUT_PARAM_ERROR;
+        return RTAS_OUT_NO_SUCH_INDICATOR;
     }
 
+    trace_spapr_drc_set_isolation_state(spapr_drc_index(drc), state);
+
     drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    return drck->set_isolation_state(drc, state);
+
+    switch (state) {
+    case SPAPR_DR_ISOLATION_STATE_ISOLATED:
+        return drck->isolate(drc);
+
+    case SPAPR_DR_ISOLATION_STATE_UNISOLATED:
+        return drck->unisolate(drc);
+
+    default:
+        return RTAS_OUT_PARAM_ERROR;
+    }
 }
 
 static uint32_t rtas_set_allocation_state(uint32_t idx, uint32_t state)
 {
     sPAPRDRConnector *drc = spapr_drc_by_index(idx);
-    sPAPRDRConnectorClass *drck;
 
-    if (!drc) {
-        return RTAS_OUT_PARAM_ERROR;
+    if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_LOGICAL)) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
     }
 
-    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    return drck->set_allocation_state(drc, state);
+    trace_spapr_drc_set_allocation_state(spapr_drc_index(drc), state);
+
+    switch (state) {
+    case SPAPR_DR_ALLOCATION_STATE_USABLE:
+        return drc_set_usable(drc);
+
+    case SPAPR_DR_ALLOCATION_STATE_UNUSABLE:
+        return drc_set_unusable(drc);
+
+    default:
+        return RTAS_OUT_PARAM_ERROR;
+    }
 }
 
 static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 171aedc7e0..587a3dacb2 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -475,13 +475,6 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
                                                        RTAS_LOG_TYPE_EPOW)));
 }
 
-static void spapr_hotplug_set_signalled(uint32_t drc_index)
-{
-    sPAPRDRConnector *drc = spapr_drc_by_index(drc_index);
-    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    drck->set_signalled(drc);
-}
-
 static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
                                     sPAPRDRConnectorType drc_type,
                                     union drc_identifier *drc_id)
@@ -528,9 +521,6 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
     switch (drc_type) {
     case SPAPR_DR_CONNECTOR_TYPE_PCI:
         hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
-        if (hp->hotplug_action == RTAS_LOG_V6_HP_ACTION_ADD) {
-            spapr_hotplug_set_signalled(drc_id->index);
-        }
         break;
     case SPAPR_DR_CONNECTOR_TYPE_LMB:
         hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY;
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index aa1ffea9e5..8624ce8d5b 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1045,11 +1045,11 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu,
     }
 }
 
-static uint32_t cas_check_pvr(PowerPCCPU *cpu, target_ulong *addr,
-                              Error **errp)
+static uint32_t cas_check_pvr(sPAPRMachineState *spapr, PowerPCCPU *cpu,
+                              target_ulong *addr, Error **errp)
 {
     bool explicit_match = false; /* Matched the CPU's real PVR */
-    uint32_t max_compat = cpu->max_compat;
+    uint32_t max_compat = spapr->max_compat_pvr;
     uint32_t best_compat = 0;
     int i;
 
@@ -1105,7 +1105,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     bool guest_radix;
     Error *local_err = NULL;
 
-    cas_pvr = cas_check_pvr(cpu, &addr, &local_err);
+    cas_pvr = cas_check_pvr(spapr, cpu, &addr, &local_err);
     if (local_err) {
         error_report_err(local_err);
         return H_HARDWARE;
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 0341bc069d..8656a54a3e 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_spapr_tce_table = {
     .post_load = spapr_tce_table_post_load,
     .fields      = (VMStateField []) {
         /* Sanity check */
-        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
+        VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable, NULL),
 
         /* IOMMU state */
         VMSTATE_UINT32(mig_nb_table, sPAPRTCETable),
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 0b447f2eed..3b37dcdc09 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1848,7 +1848,7 @@ static const VMStateDescription vmstate_spapr_pci_lsi = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi),
+        VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi, NULL),
 
         VMSTATE_END_OF_LIST()
     },
@@ -1936,7 +1936,7 @@ static const VMStateDescription vmstate_spapr_pci = {
     .pre_save = spapr_pci_pre_save,
     .post_load = spapr_pci_post_load,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
+        VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState, NULL),
         VMSTATE_UINT32_TEST(mig_liobn, sPAPRPHBState, pre_2_8_migration),
         VMSTATE_UINT64_TEST(mig_mem_win_addr, sPAPRPHBState, pre_2_8_migration),
         VMSTATE_UINT64_TEST(mig_mem_win_size, sPAPRPHBState, pre_2_8_migration),
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index a0ee4fd265..ea3bc8bd9e 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -557,8 +557,8 @@ const VMStateDescription vmstate_spapr_vio = {
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         /* Sanity check */
-        VMSTATE_UINT32_EQUAL(reg, VIOsPAPRDevice),
-        VMSTATE_UINT32_EQUAL(irq, VIOsPAPRDevice),
+        VMSTATE_UINT32_EQUAL(reg, VIOsPAPRDevice, NULL),
+        VMSTATE_UINT32_EQUAL(irq, VIOsPAPRDevice, NULL),
 
         /* General VIO device state */
         VMSTATE_UINT64(signal_state, VIOsPAPRDevice),
diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index da8adfa443..e833028393 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -496,6 +496,18 @@ static const MemoryRegionOps spips_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static void xilinx_qspips_invalidate_mmio_ptr(XilinxQSPIPS *q)
+{
+    XilinxSPIPS *s = &q->parent_obj;
+
+    if (q->lqspi_cached_addr != ~0ULL) {
+        /* Invalidate the current mapped mmio */
+        memory_region_invalidate_mmio_ptr(&s->mmlqspi, q->lqspi_cached_addr,
+                                          LQSPI_CACHE_SIZE);
+        q->lqspi_cached_addr = ~0ULL;
+    }
+}
+
 static void xilinx_qspips_write(void *opaque, hwaddr addr,
                                 uint64_t value, unsigned size)
 {
@@ -505,7 +517,7 @@ static void xilinx_qspips_write(void *opaque, hwaddr addr,
     addr >>= 2;
 
     if (addr == R_LQSPI_CFG) {
-        q->lqspi_cached_addr = ~0ULL;
+        xilinx_qspips_invalidate_mmio_ptr(q);
     }
 }
 
@@ -517,27 +529,20 @@ static const MemoryRegionOps qspips_ops = {
 
 #define LQSPI_CACHE_SIZE 1024
 
-static uint64_t
-lqspi_read(void *opaque, hwaddr addr, unsigned int size)
+static void lqspi_load_cache(void *opaque, hwaddr addr)
 {
-    int i;
     XilinxQSPIPS *q = opaque;
     XilinxSPIPS *s = opaque;
-    uint32_t ret;
-
-    if (addr >= q->lqspi_cached_addr &&
-            addr <= q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) {
-        uint8_t *retp = &q->lqspi_buf[addr - q->lqspi_cached_addr];
-        ret = cpu_to_le32(*(uint32_t *)retp);
-        DB_PRINT_L(1, "addr: %08x, data: %08x\n", (unsigned)addr,
-                   (unsigned)ret);
-        return ret;
-    } else {
-        int flash_addr = (addr / num_effective_busses(s));
-        int slave = flash_addr >> LQSPI_ADDRESS_BITS;
-        int cache_entry = 0;
-        uint32_t u_page_save = s->regs[R_LQSPI_STS] & ~LQSPI_CFG_U_PAGE;
-
+    int i;
+    int flash_addr = ((addr & ~(LQSPI_CACHE_SIZE - 1))
+                   / num_effective_busses(s));
+    int slave = flash_addr >> LQSPI_ADDRESS_BITS;
+    int cache_entry = 0;
+    uint32_t u_page_save = s->regs[R_LQSPI_STS] & ~LQSPI_CFG_U_PAGE;
+
+    if (addr < q->lqspi_cached_addr ||
+            addr > q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) {
+        xilinx_qspips_invalidate_mmio_ptr(q);
         s->regs[R_LQSPI_STS] &= ~LQSPI_CFG_U_PAGE;
         s->regs[R_LQSPI_STS] |= slave ? LQSPI_CFG_U_PAGE : 0;
 
@@ -589,12 +594,43 @@ lqspi_read(void *opaque, hwaddr addr, unsigned int size)
         xilinx_spips_update_cs_lines(s);
 
         q->lqspi_cached_addr = flash_addr * num_effective_busses(s);
+    }
+}
+
+static void *lqspi_request_mmio_ptr(void *opaque, hwaddr addr, unsigned *size,
+                                    unsigned *offset)
+{
+    XilinxQSPIPS *q = opaque;
+    hwaddr offset_within_the_region = addr & ~(LQSPI_CACHE_SIZE - 1);
+
+    lqspi_load_cache(opaque, offset_within_the_region);
+    *size = LQSPI_CACHE_SIZE;
+    *offset = offset_within_the_region;
+    return q->lqspi_buf;
+}
+
+static uint64_t
+lqspi_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    XilinxQSPIPS *q = opaque;
+    uint32_t ret;
+
+    if (addr >= q->lqspi_cached_addr &&
+            addr <= q->lqspi_cached_addr + LQSPI_CACHE_SIZE - 4) {
+        uint8_t *retp = &q->lqspi_buf[addr - q->lqspi_cached_addr];
+        ret = cpu_to_le32(*(uint32_t *)retp);
+        DB_PRINT_L(1, "addr: %08x, data: %08x\n", (unsigned)addr,
+                   (unsigned)ret);
+        return ret;
+    } else {
+        lqspi_load_cache(opaque, addr);
         return lqspi_read(opaque, addr, size);
     }
 }
 
 static const MemoryRegionOps lqspi_ops = {
     .read = lqspi_read,
+    .request_ptr = lqspi_request_mmio_ptr,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid = {
         .min_access_size = 1,
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index ca72a80f27..e3562a4c60 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -415,7 +415,7 @@ static const VMStateDescription vmstate_uhci = {
     .post_load = uhci_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_PCI_DEVICE(dev, UHCIState),
-        VMSTATE_UINT8_EQUAL(num_ports_vmstate, UHCIState),
+        VMSTATE_UINT8_EQUAL(num_ports_vmstate, UHCIState, NULL),
         VMSTATE_STRUCT_ARRAY(ports, UHCIState, NB_PORTS, 1,
                              vmstate_uhci_port, UHCIPort),
         VMSTATE_UINT16(cmd, UHCIState),
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 20d6a08616..301920ec1b 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1740,7 +1740,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
     bool pcie_port = pci_bus_is_express(pci_dev->bus) &&
                      !pci_bus_is_root(pci_dev->bus);
 
-    if (!kvm_has_many_ioeventfds()) {
+    if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
         proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
     }
 
diff --git a/hw/xen/xen-common.c b/hw/xen/xen-common.c
index d3fa705a82..632a938dcc 100644
--- a/hw/xen/xen-common.c
+++ b/hw/xen/xen-common.c
@@ -138,20 +138,35 @@ static int xen_init(MachineState *ms)
         return -1;
     }
     qemu_add_vm_change_state_handler(xen_change_state_handler, NULL);
-
-    global_state_set_optional();
-    savevm_skip_configuration();
-    savevm_skip_section_footers();
-
     return 0;
 }
 
+static GlobalProperty xen_compat_props[] = {
+    {
+        .driver = "migration",
+        .property = "store-global-state",
+        .value = "off",
+    },
+    {
+        .driver = "migration",
+        .property = "send-configuration",
+        .value = "off",
+    },
+    {
+        .driver = "migration",
+        .property = "send-section-footer",
+        .value = "off",
+    },
+    { /* end of list */ },
+};
+
 static void xen_accel_class_init(ObjectClass *oc, void *data)
 {
     AccelClass *ac = ACCEL_CLASS(oc);
     ac->name = "Xen";
     ac->init_machine = xen_init;
     ac->allowed = &xen_allowed;
+    ac->global_props = xen_compat_props;
 }
 
 #define TYPE_XEN_ACCEL ACCEL_CLASS_NAME("xen")