summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/9pfs/9p-local.c1031
-rw-r--r--hw/9pfs/9p-local.h20
-rw-r--r--hw/9pfs/9p-posix-acl.c44
-rw-r--r--hw/9pfs/9p-util.c69
-rw-r--r--hw/9pfs/9p-util.h54
-rw-r--r--hw/9pfs/9p-xattr-user.c24
-rw-r--r--hw/9pfs/9p-xattr.c166
-rw-r--r--hw/9pfs/9p-xattr.h87
-rw-r--r--hw/9pfs/9p.c19
-rw-r--r--hw/9pfs/Makefile.objs2
-rw-r--r--hw/9pfs/cofile.c2
-rw-r--r--hw/acpi/Makefile.objs1
-rw-r--r--hw/acpi/aml-build.c2
-rw-r--r--hw/acpi/bios-linker-loader.c66
-rw-r--r--hw/acpi/cpu.c2
-rw-r--r--hw/acpi/pcihp.c11
-rw-r--r--hw/acpi/piix4.c2
-rw-r--r--hw/acpi/tco.c1
-rw-r--r--hw/acpi/vmgenid.c258
-rw-r--r--hw/arm/armv7m.c379
-rw-r--r--hw/arm/bcm2835_peripherals.c58
-rw-r--r--hw/arm/exynos4210.c18
-rw-r--r--hw/arm/netduino2.c7
-rw-r--r--hw/arm/stm32f205_soc.c28
-rw-r--r--hw/arm/virt.c32
-rw-r--r--hw/block/block.c24
-rw-r--r--hw/block/fdc.c28
-rw-r--r--hw/block/m25p80.c8
-rw-r--r--hw/block/nand.c7
-rw-r--r--hw/block/nvme.c8
-rw-r--r--hw/block/onenand.c7
-rw-r--r--hw/block/pflash_cfi01.c18
-rw-r--r--hw/block/pflash_cfi02.c19
-rw-r--r--hw/block/virtio-blk.c8
-rw-r--r--hw/core/Makefile.objs1
-rw-r--r--hw/core/bus.c2
-rw-r--r--hw/core/irq.c1
-rw-r--r--hw/core/loader-fit.c325
-rw-r--r--hw/core/loader.c24
-rw-r--r--hw/core/machine.c31
-rw-r--r--hw/core/or-irq.c3
-rw-r--r--hw/core/ptimer.c8
-rw-r--r--hw/core/qdev-properties-system.c9
-rw-r--r--hw/core/qdev.c21
-rw-r--r--hw/core/register.c30
-rw-r--r--hw/display/cirrus_vga.c8
-rw-r--r--hw/display/milkymist-tmu2.c2
-rw-r--r--hw/display/virtio-gpu-3d.c13
-rw-r--r--hw/gpio/Makefile.objs1
-rw-r--r--hw/gpio/bcm2835_gpio.c353
-rw-r--r--hw/i386/acpi-build.c28
-rw-r--r--hw/i386/kvmvapic.c4
-rw-r--r--hw/i386/pc.c126
-rw-r--r--hw/ide/core.c2
-rw-r--r--hw/ide/qdev.c9
-rw-r--r--hw/intc/Makefile.objs2
-rw-r--r--hw/intc/arm_gic.c31
-rw-r--r--hw/intc/arm_gic_common.c23
-rw-r--r--hw/intc/arm_gicv3_common.c38
-rw-r--r--hw/intc/arm_gicv3_cpuif.c11
-rw-r--r--hw/intc/arm_gicv3_kvm.c629
-rw-r--r--hw/intc/armv7m_nvic.c965
-rw-r--r--hw/intc/gic_internal.h7
-rw-r--r--hw/intc/gicv3_internal.h3
-rw-r--r--hw/intc/mips_gic.c56
-rw-r--r--hw/intc/s390_flic.c28
-rw-r--r--hw/intc/s390_flic_kvm.c6
-rw-r--r--hw/intc/trace-events15
-rw-r--r--hw/intc/xics.c461
-rw-r--r--hw/intc/xics_kvm.c184
-rw-r--r--hw/intc/xics_spapr.c128
-rw-r--r--hw/mips/Makefile.objs1
-rw-r--r--hw/mips/boston.c577
-rw-r--r--hw/misc/Makefile.objs3
-rw-r--r--hw/misc/bcm2835_rng.c149
-rw-r--r--hw/misc/exynos4210_clk.c164
-rw-r--r--hw/misc/imx6_src.c58
-rw-r--r--hw/misc/ivshmem.c9
-rw-r--r--hw/misc/mips_cmgcr.c17
-rw-r--r--hw/net/cadence_gem.c2
-rw-r--r--hw/net/spapr_llan.c18
-rw-r--r--hw/nvram/spapr_nvram.c8
-rw-r--r--hw/pci-host/Makefile.objs1
-rw-r--r--hw/pci-host/prep.c11
-rw-r--r--hw/pci-host/xilinx-pcie.c328
-rw-r--r--hw/pci/pci.c28
-rw-r--r--hw/pci/pcie_aer.c2
-rw-r--r--hw/ppc/mac_newworld.c15
-rw-r--r--hw/ppc/pnv.c6
-rw-r--r--hw/ppc/ppc.c16
-rw-r--r--hw/ppc/ppc405_uc.c6
-rw-r--r--hw/ppc/ppc4xx_pci.c13
-rw-r--r--hw/ppc/spapr.c454
-rw-r--r--hw/ppc/spapr_cpu_core.c161
-rw-r--r--hw/ppc/spapr_events.c10
-rw-r--r--hw/ppc/spapr_hcall.c89
-rw-r--r--hw/ppc/spapr_ovec.c19
-rw-r--r--hw/ppc/spapr_pci.c312
-rw-r--r--hw/ppc/spapr_vio.c2
-rw-r--r--hw/ppc/trace-events12
-rw-r--r--hw/s390x/css.c15
-rw-r--r--hw/s390x/ipl.c90
-rw-r--r--hw/s390x/ipl.h5
-rw-r--r--hw/s390x/s390-virtio-ccw.c12
-rw-r--r--hw/s390x/s390-virtio.c12
-rw-r--r--hw/s390x/s390-virtio.h1
-rw-r--r--hw/s390x/virtio-ccw.c109
-rw-r--r--hw/s390x/virtio-ccw.h13
-rw-r--r--hw/scsi/scsi-disk.c12
-rw-r--r--hw/sd/Makefile.objs1
-rw-r--r--hw/sd/bcm2835_sdhost.c429
-rw-r--r--hw/sd/core.c27
-rw-r--r--hw/sd/sd.c8
-rw-r--r--hw/sd/sdhci.c25
-rw-r--r--hw/sparc64/niagara.c33
-rw-r--r--hw/timer/Makefile.objs1
-rw-r--r--hw/timer/armv7m_systick.c240
-rw-r--r--hw/timer/imx_gpt.c33
-rw-r--r--hw/timer/mips_gictimer.c5
-rw-r--r--hw/timer/trace-events6
-rw-r--r--hw/usb/bus.c28
-rw-r--r--hw/usb/dev-audio.c4
-rw-r--r--hw/usb/dev-bluetooth.c4
-rw-r--r--hw/usb/dev-hid.c4
-rw-r--r--hw/usb/dev-hub.c4
-rw-r--r--hw/usb/dev-network.c4
-rw-r--r--hw/usb/dev-smartcard-reader.c4
-rw-r--r--hw/usb/dev-storage.c22
-rw-r--r--hw/usb/dev-uas.c6
-rw-r--r--hw/usb/dev-wacom.c4
-rw-r--r--hw/usb/hcd-ohci.c2
-rw-r--r--hw/usb/hcd-xhci.c29
-rw-r--r--hw/usb/host-libusb.c4
-rw-r--r--hw/usb/redirect.c4
-rw-r--r--hw/vfio/pci-quirks.c65
-rw-r--r--hw/vfio/pci.c37
-rw-r--r--hw/virtio/virtio-pci.c2
-rw-r--r--hw/virtio/virtio.c32
138 files changed, 7632 insertions, 2293 deletions
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 7de07e1ba6..f22a3c3654 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -13,7 +13,9 @@
 
 #include "qemu/osdep.h"
 #include "9p.h"
+#include "9p-local.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
 #include "fsdev/qemu-fsdev.h"   /* local_ops */
 #include <arpa/inet.h>
 #include <pwd.h>
@@ -43,40 +45,62 @@
 #define BTRFS_SUPER_MAGIC 0x9123683E
 #endif
 
-#define VIRTFS_META_DIR ".virtfs_metadata"
+typedef struct {
+    int mountfd;
+} LocalData;
 
-static char *local_mapped_attr_path(FsContext *ctx, const char *path)
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+                        mode_t mode)
 {
-    int dirlen;
-    const char *name = strrchr(path, '/');
-    if (name) {
-        dirlen = name - path;
-        ++name;
-    } else {
-        name = path;
-        dirlen = 0;
+    LocalData *data = fs_ctx->private;
+
+    /* All paths are relative to the path data->mountfd points to */
+    while (*path == '/') {
+        path++;
     }
-    return g_strdup_printf("%s/%.*s/%s/%s", ctx->fs_root,
-                           dirlen, path, VIRTFS_META_DIR, name);
+
+    return relative_openat_nofollow(data->mountfd, path, flags, mode);
+}
+
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path)
+{
+    return local_open_nofollow(fs_ctx, path, O_DIRECTORY | O_RDONLY, 0);
+}
+
+static void renameat_preserve_errno(int odirfd, const char *opath, int ndirfd,
+                                    const char *npath)
+{
+    int serrno = errno;
+    renameat(odirfd, opath, ndirfd, npath);
+    errno = serrno;
 }
 
-static FILE *local_fopen(const char *path, const char *mode)
+static void unlinkat_preserve_errno(int dirfd, const char *path, int flags)
+{
+    int serrno = errno;
+    unlinkat(dirfd, path, flags);
+    errno = serrno;
+}
+
+#define VIRTFS_META_DIR ".virtfs_metadata"
+
+static FILE *local_fopenat(int dirfd, const char *name, const char *mode)
 {
     int fd, o_mode = 0;
     FILE *fp;
-    int flags = O_NOFOLLOW;
+    int flags;
     /*
      * only supports two modes
      */
     if (mode[0] == 'r') {
-        flags |= O_RDONLY;
+        flags = O_RDONLY;
     } else if (mode[0] == 'w') {
-        flags |= O_WRONLY | O_TRUNC | O_CREAT;
+        flags = O_WRONLY | O_TRUNC | O_CREAT;
         o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
     } else {
         return NULL;
     }
-    fd = open(path, flags, o_mode);
+    fd = openat_file(dirfd, name, flags, o_mode);
     if (fd == -1) {
         return NULL;
     }
@@ -88,16 +112,20 @@ static FILE *local_fopen(const char *path, const char *mode)
 }
 
 #define ATTR_MAX 100
-static void local_mapped_file_attr(FsContext *ctx, const char *path,
+static void local_mapped_file_attr(int dirfd, const char *name,
                                    struct stat *stbuf)
 {
     FILE *fp;
     char buf[ATTR_MAX];
-    char *attr_path;
+    int map_dirfd;
 
-    attr_path = local_mapped_attr_path(ctx, path);
-    fp = local_fopen(attr_path, "r");
-    g_free(attr_path);
+    map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+    if (map_dirfd == -1) {
+        return;
+    }
+
+    fp = local_fopenat(map_dirfd, name, "r");
+    close_preserve_errno(map_dirfd);
     if (!fp) {
         return;
     }
@@ -119,12 +147,17 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path,
 
 static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
 {
-    int err;
-    char *buffer;
-    char *path = fs_path->data;
+    int err = -1;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
+    int dirfd;
 
-    buffer = rpath(fs_ctx, path);
-    err =  lstat(buffer, stbuf);
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    err = fstatat(dirfd, name, stbuf, AT_SYMLINK_NOFOLLOW);
     if (err) {
         goto err_out;
     }
@@ -134,87 +167,83 @@ static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
         gid_t tmp_gid;
         mode_t tmp_mode;
         dev_t tmp_dev;
-        if (getxattr(buffer, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) {
+
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.uid", &tmp_uid,
+                                 sizeof(uid_t)) > 0) {
             stbuf->st_uid = le32_to_cpu(tmp_uid);
         }
-        if (getxattr(buffer, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.gid", &tmp_gid,
+                                 sizeof(gid_t)) > 0) {
             stbuf->st_gid = le32_to_cpu(tmp_gid);
         }
-        if (getxattr(buffer, "user.virtfs.mode",
-                    &tmp_mode, sizeof(mode_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.mode", &tmp_mode,
+                                 sizeof(mode_t)) > 0) {
             stbuf->st_mode = le32_to_cpu(tmp_mode);
         }
-        if (getxattr(buffer, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.rdev", &tmp_dev,
+                                 sizeof(dev_t)) > 0) {
             stbuf->st_rdev = le64_to_cpu(tmp_dev);
         }
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        local_mapped_file_attr(fs_ctx, path, stbuf);
+        local_mapped_file_attr(dirfd, name, stbuf);
     }
 
 err_out:
-    g_free(buffer);
-    return err;
-}
-
-static int local_create_mapped_attr_dir(FsContext *ctx, const char *path)
-{
-    int err;
-    char *attr_dir;
-    char *tmp_path = g_strdup(path);
-
-    attr_dir = g_strdup_printf("%s/%s/%s",
-             ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR);
-
-    err = mkdir(attr_dir, 0700);
-    if (err < 0 && errno == EEXIST) {
-        err = 0;
-    }
-    g_free(attr_dir);
-    g_free(tmp_path);
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return err;
 }
 
-static int local_set_mapped_file_attr(FsContext *ctx,
-                                      const char *path, FsCred *credp)
+static int local_set_mapped_file_attrat(int dirfd, const char *name,
+                                        FsCred *credp)
 {
     FILE *fp;
-    int ret = 0;
+    int ret;
     char buf[ATTR_MAX];
-    char *attr_path;
     int uid = -1, gid = -1, mode = -1, rdev = -1;
+    int map_dirfd;
+
+    ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700);
+    if (ret < 0 && errno != EEXIST) {
+        return -1;
+    }
 
-    attr_path = local_mapped_attr_path(ctx, path);
-    fp = local_fopen(attr_path, "r");
+    map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+    if (map_dirfd == -1) {
+        return -1;
+    }
+
+    fp = local_fopenat(map_dirfd, name, "r");
     if (!fp) {
-        goto create_map_file;
+        if (errno == ENOENT) {
+            goto update_map_file;
+        } else {
+            close_preserve_errno(map_dirfd);
+            return -1;
+        }
     }
     memset(buf, 0, ATTR_MAX);
     while (fgets(buf, ATTR_MAX, fp)) {
         if (!strncmp(buf, "virtfs.uid", 10)) {
-            uid = atoi(buf+11);
+            uid = atoi(buf + 11);
         } else if (!strncmp(buf, "virtfs.gid", 10)) {
-            gid = atoi(buf+11);
+            gid = atoi(buf + 11);
         } else if (!strncmp(buf, "virtfs.mode", 11)) {
-            mode = atoi(buf+12);
+            mode = atoi(buf + 12);
         } else if (!strncmp(buf, "virtfs.rdev", 11)) {
-            rdev = atoi(buf+12);
+            rdev = atoi(buf + 12);
         }
         memset(buf, 0, ATTR_MAX);
     }
     fclose(fp);
-    goto update_map_file;
-
-create_map_file:
-    ret = local_create_mapped_attr_dir(ctx, path);
-    if (ret < 0) {
-        goto err_out;
-    }
 
 update_map_file:
-    fp = local_fopen(attr_path, "w");
+    fp = local_fopenat(map_dirfd, name, "w");
+    close_preserve_errno(map_dirfd);
     if (!fp) {
-        ret = -1;
-        goto err_out;
+        return -1;
     }
 
     if (credp->fc_uid != -1) {
@@ -230,7 +259,6 @@ update_map_file:
         rdev = credp->fc_rdev;
     }
 
-
     if (uid != -1) {
         fprintf(fp, "virtfs.uid=%d\n", uid);
     }
@@ -245,39 +273,71 @@ update_map_file:
     }
     fclose(fp);
 
-err_out:
-    g_free(attr_path);
+    return 0;
+}
+
+static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
+{
+    int fd, ret;
+
+    /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW).
+     * Unfortunately, the linux kernel doesn't implement it yet. As an
+     * alternative, let's open the file and use fchmod() instead. This
+     * may fail depending on the permissions of the file, but it is the
+     * best we can do to avoid TOCTTOU. We first try to open read-only
+     * in case name points to a directory. If that fails, we try write-only
+     * in case name doesn't point to a directory.
+     */
+    fd = openat_file(dirfd, name, O_RDONLY, 0);
+    if (fd == -1) {
+        /* In case the file is writable-only and isn't a directory. */
+        if (errno == EACCES) {
+            fd = openat_file(dirfd, name, O_WRONLY, 0);
+        }
+        if (fd == -1 && errno == EISDIR) {
+            errno = EACCES;
+        }
+    }
+    if (fd == -1) {
+        return -1;
+    }
+    ret = fchmod(fd, mode);
+    close_preserve_errno(fd);
     return ret;
 }
 
-static int local_set_xattr(const char *path, FsCred *credp)
+static int local_set_xattrat(int dirfd, const char *path, FsCred *credp)
 {
     int err;
 
     if (credp->fc_uid != -1) {
         uint32_t tmp_uid = cpu_to_le32(credp->fc_uid);
-        err = setxattr(path, "user.virtfs.uid", &tmp_uid, sizeof(uid_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.uid", &tmp_uid,
+                                   sizeof(uid_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_gid != -1) {
         uint32_t tmp_gid = cpu_to_le32(credp->fc_gid);
-        err = setxattr(path, "user.virtfs.gid", &tmp_gid, sizeof(gid_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.gid", &tmp_gid,
+                                   sizeof(gid_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_mode != -1) {
         uint32_t tmp_mode = cpu_to_le32(credp->fc_mode);
-        err = setxattr(path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode,
+                                   sizeof(mode_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_rdev != -1) {
         uint64_t tmp_rdev = cpu_to_le64(credp->fc_rdev);
-        err = setxattr(path, "user.virtfs.rdev", &tmp_rdev, sizeof(dev_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.rdev", &tmp_rdev,
+                                   sizeof(dev_t), 0);
         if (err) {
             return err;
         }
@@ -285,58 +345,56 @@ static int local_set_xattr(const char *path, FsCred *credp)
     return 0;
 }
 
-static int local_post_create_passthrough(FsContext *fs_ctx, const char *path,
-                                         FsCred *credp)
+static int local_set_cred_passthrough(FsContext *fs_ctx, int dirfd,
+                                      const char *name, FsCred *credp)
 {
-    char *buffer;
-
-    buffer = rpath(fs_ctx, path);
-    if (lchown(buffer, credp->fc_uid, credp->fc_gid) < 0) {
+    if (fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) < 0) {
         /*
          * If we fail to change ownership and if we are
          * using security model none. Ignore the error
          */
         if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) {
-            goto err;
+            return -1;
         }
     }
 
-    if (chmod(buffer, credp->fc_mode & 07777) < 0) {
-        goto err;
-    }
-
-    g_free(buffer);
-    return 0;
-err:
-    g_free(buffer);
-    return -1;
+    return fchmodat_nofollow(dirfd, name, credp->fc_mode & 07777);
 }
 
 static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path,
                               char *buf, size_t bufsz)
 {
     ssize_t tsize = -1;
-    char *buffer;
-    char *path = fs_path->data;
 
     if ((fs_ctx->export_flags & V9FS_SM_MAPPED) ||
         (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
         int fd;
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, O_RDONLY | O_NOFOLLOW);
-        g_free(buffer);
+
+        fd = local_open_nofollow(fs_ctx, fs_path->data, O_RDONLY, 0);
         if (fd == -1) {
             return -1;
         }
         do {
             tsize = read(fd, (void *)buf, bufsz);
         } while (tsize == -1 && errno == EINTR);
-        close(fd);
+        close_preserve_errno(fd);
     } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
                (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        tsize = readlink(buffer, buf, bufsz);
-        g_free(buffer);
+        char *dirpath = g_path_get_dirname(fs_path->data);
+        char *name = g_path_get_basename(fs_path->data);
+        int dirfd;
+
+        dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+        if (dirfd == -1) {
+            goto out;
+        }
+
+        tsize = readlinkat(dirfd, name, buf, bufsz);
+        close_preserve_errno(dirfd);
+    out:
+        g_free(name);
+        g_free(dirpath);
     }
     return tsize;
 }
@@ -354,27 +412,32 @@ static int local_closedir(FsContext *ctx, V9fsFidOpenState *fs)
 static int local_open(FsContext *ctx, V9fsPath *fs_path,
                       int flags, V9fsFidOpenState *fs)
 {
-    char *buffer;
-    char *path = fs_path->data;
+    int fd;
 
-    buffer = rpath(ctx, path);
-    fs->fd = open(buffer, flags | O_NOFOLLOW);
-    g_free(buffer);
+    fd = local_open_nofollow(ctx, fs_path->data, flags, 0);
+    if (fd == -1) {
+        return -1;
+    }
+    fs->fd = fd;
     return fs->fd;
 }
 
 static int local_opendir(FsContext *ctx,
                          V9fsPath *fs_path, V9fsFidOpenState *fs)
 {
-    char *buffer;
-    char *path = fs_path->data;
+    int dirfd;
+    DIR *stream;
+
+    dirfd = local_opendir_nofollow(ctx, fs_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    buffer = rpath(ctx, path);
-    fs->dir.stream = opendir(buffer);
-    g_free(buffer);
-    if (!fs->dir.stream) {
+    stream = fdopendir(dirfd);
+    if (!stream) {
         return -1;
     }
+    fs->dir.stream = stream;
     return 0;
 }
 
@@ -463,145 +526,122 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
 
 static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
 {
-    char *buffer;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
     int ret = -1;
-    char *path = fs_path->data;
+    int dirfd;
+
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
     if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        ret = local_set_xattr(buffer, credp);
-        g_free(buffer);
+        ret = local_set_xattrat(dirfd, name, credp);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        return local_set_mapped_file_attr(fs_ctx, path, credp);
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        ret = chmod(buffer, credp->fc_mode);
-        g_free(buffer);
+        ret = local_set_mapped_file_attrat(dirfd, name, credp);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        ret = fchmodat_nofollow(dirfd, name, credp->fc_mode);
     }
+    close_preserve_errno(dirfd);
+
+out:
+    g_free(dirpath);
+    g_free(name);
     return ret;
 }
 
 static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
                        const char *name, FsCred *credp)
 {
-    char *path;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0);
         if (err == -1) {
             goto out;
         }
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
 
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0);
-        if (err == -1) {
-            goto out;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, credp->fc_mode, credp->fc_rdev);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
         if (err == -1) {
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, 0);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
 static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path,
                        const char *name, FsCred *credp)
 {
-    char *path;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS);
         if (err == -1) {
             goto out;
         }
-        credp->fc_mode = credp->fc_mode|S_IFDIR;
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS);
-        if (err == -1) {
-            goto out;
+        credp->fc_mode = credp->fc_mode | S_IFDIR;
+
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        credp->fc_mode = credp->fc_mode|S_IFDIR;
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, credp->fc_mode);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = mkdirat(dirfd, name, credp->fc_mode);
         if (err == -1) {
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, AT_REMOVEDIR);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
@@ -649,62 +689,45 @@ static int local_fstat(FsContext *fs_ctx, int fid_type,
 static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
                        int flags, FsCred *credp, V9fsFidOpenState *fs)
 {
-    char *path;
     int fd = -1;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
     /*
      * Mark all the open to not follow symlinks
      */
     flags |= O_NOFOLLOW;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
     /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, SM_LOCAL_MODE_BITS);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
         credp->fc_mode = credp->fc_mode|S_IFREG;
-        /* Set cleint credentials in xattr */
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            /* Set cleint credentials in xattr */
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, SM_LOCAL_MODE_BITS);
-        if (fd == -1) {
-            err = fd;
-            goto out;
-        }
-        credp->fc_mode = credp->fc_mode|S_IFREG;
-        /* Set client credentials in .virtfs_metadata directory files */
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
                (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, credp->fc_mode);
+        fd = openat_file(dirfd, name, flags, credp->fc_mode);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
@@ -713,12 +736,11 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
     goto out;
 
 err_end:
-    close(fd);
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name,
+                            flags & O_DIRECTORY ? AT_REMOVEDIR : 0);
+    close_preserve_errno(fd);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
@@ -727,23 +749,22 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
                          V9fsPath *dir_path, const char *name, FsCred *credp)
 {
     int err = -1;
-    int serrno = 0;
-    char *newpath;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    newpath = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
     /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
         int fd;
         ssize_t oldpath_size, write_size;
-        buffer = rpath(fs_ctx, newpath);
-        fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS);
+
+        fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR,
+                         SM_LOCAL_MODE_BITS);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
         /* Write the oldpath (target) to the file. */
@@ -751,218 +772,204 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
         do {
             write_size = write(fd, (void *)oldpath, oldpath_size);
         } while (write_size == -1 && errno == EINTR);
+        close_preserve_errno(fd);
 
         if (write_size != oldpath_size) {
-            serrno = errno;
-            close(fd);
-            err = -1;
             goto err_end;
         }
-        close(fd);
         /* Set cleint credentials in symlink's xattr */
-        credp->fc_mode = credp->fc_mode|S_IFLNK;
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        int fd;
-        ssize_t oldpath_size, write_size;
-        buffer = rpath(fs_ctx, newpath);
-        fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS);
-        if (fd == -1) {
-            err = fd;
-            goto out;
-        }
-        /* Write the oldpath (target) to the file. */
-        oldpath_size = strlen(oldpath);
-        do {
-            write_size = write(fd, (void *)oldpath, oldpath_size);
-        } while (write_size == -1 && errno == EINTR);
+        credp->fc_mode = credp->fc_mode | S_IFLNK;
 
-        if (write_size != oldpath_size) {
-            serrno = errno;
-            close(fd);
-            err = -1;
-            goto err_end;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        close(fd);
-        /* Set cleint credentials in symlink's xattr */
-        credp->fc_mode = credp->fc_mode|S_IFLNK;
-        err = local_set_mapped_file_attr(fs_ctx, newpath, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, newpath);
-        err = symlink(oldpath, buffer);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = symlinkat(oldpath, dirfd, name);
         if (err) {
             goto out;
         }
-        err = lchown(buffer, credp->fc_uid, credp->fc_gid);
+        err = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                       AT_SYMLINK_NOFOLLOW);
         if (err == -1) {
             /*
              * If we fail to change ownership and if we are
              * using security model none. Ignore the error
              */
             if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) {
-                serrno = errno;
                 goto err_end;
-            } else
+            } else {
                 err = 0;
+            }
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, 0);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
 static int local_link(FsContext *ctx, V9fsPath *oldpath,
                       V9fsPath *dirpath, const char *name)
 {
-    int ret;
-    V9fsString newpath;
-    char *buffer, *buffer1;
+    char *odirpath = g_path_get_dirname(oldpath->data);
+    char *oname = g_path_get_basename(oldpath->data);
+    int ret = -1;
+    int odirfd, ndirfd;
 
-    v9fs_string_init(&newpath);
-    v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name);
+    odirfd = local_opendir_nofollow(ctx, odirpath);
+    if (odirfd == -1) {
+        goto out;
+    }
+
+    ndirfd = local_opendir_nofollow(ctx, dirpath->data);
+    if (ndirfd == -1) {
+        close_preserve_errno(odirfd);
+        goto out;
+    }
 
-    buffer = rpath(ctx, oldpath->data);
-    buffer1 = rpath(ctx, newpath.data);
-    ret = link(buffer, buffer1);
-    g_free(buffer);
-    g_free(buffer1);
+    ret = linkat(odirfd, oname, ndirfd, name, 0);
+    if (ret < 0) {
+        goto out_close;
+    }
 
     /* now link the virtfs_metadata files */
-    if (!ret && (ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
-        /* Link the .virtfs_metadata files. Create the metada directory */
-        ret = local_create_mapped_attr_dir(ctx, newpath.data);
-        if (ret < 0) {
-            goto err_out;
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        int omap_dirfd, nmap_dirfd;
+
+        ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700);
+        if (ret < 0 && errno != EEXIST) {
+            goto err_undo_link;
+        }
+
+        omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR);
+        if (omap_dirfd == -1) {
+            goto err;
+        }
+
+        nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR);
+        if (nmap_dirfd == -1) {
+            close_preserve_errno(omap_dirfd);
+            goto err;
         }
-        buffer = local_mapped_attr_path(ctx, oldpath->data);
-        buffer1 = local_mapped_attr_path(ctx, newpath.data);
-        ret = link(buffer, buffer1);
-        g_free(buffer);
-        g_free(buffer1);
+
+        ret = linkat(omap_dirfd, oname, nmap_dirfd, name, 0);
+        close_preserve_errno(nmap_dirfd);
+        close_preserve_errno(omap_dirfd);
         if (ret < 0 && errno != ENOENT) {
-            goto err_out;
+            goto err_undo_link;
         }
-    }
-err_out:
-    v9fs_string_free(&newpath);
-    return ret;
-}
 
-static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
-{
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+        ret = 0;
+    }
+    goto out_close;
 
-    buffer = rpath(ctx, path);
-    ret = truncate(buffer, size);
-    g_free(buffer);
+err:
+    ret = -1;
+err_undo_link:
+    unlinkat_preserve_errno(ndirfd, name, 0);
+out_close:
+    close_preserve_errno(ndirfd);
+    close_preserve_errno(odirfd);
+out:
+    g_free(oname);
+    g_free(odirpath);
     return ret;
 }
 
-static int local_rename(FsContext *ctx, const char *oldpath,
-                        const char *newpath)
+static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
 {
-    int err;
-    char *buffer, *buffer1;
+    int fd, ret;
 
-    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        err = local_create_mapped_attr_dir(ctx, newpath);
-        if (err < 0) {
-            return err;
-        }
-        /* rename the .virtfs_metadata files */
-        buffer = local_mapped_attr_path(ctx, oldpath);
-        buffer1 = local_mapped_attr_path(ctx, newpath);
-        err = rename(buffer, buffer1);
-        g_free(buffer);
-        g_free(buffer1);
-        if (err < 0 && errno != ENOENT) {
-            return err;
-        }
+    fd = local_open_nofollow(ctx, fs_path->data, O_WRONLY, 0);
+    if (fd == -1) {
+        return -1;
     }
-
-    buffer = rpath(ctx, oldpath);
-    buffer1 = rpath(ctx, newpath);
-    err = rename(buffer, buffer1);
-    g_free(buffer);
-    g_free(buffer1);
-    return err;
+    ret = ftruncate(fd, size);
+    close_preserve_errno(fd);
+    return ret;
 }
 
 static int local_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
 {
-    char *buffer;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
     int ret = -1;
-    char *path = fs_path->data;
+    int dirfd;
+
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
     if ((credp->fc_uid == -1 && credp->fc_gid == -1) ||
         (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
         (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        ret = lchown(buffer, credp->fc_uid, credp->fc_gid);
-        g_free(buffer);
+        ret = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                       AT_SYMLINK_NOFOLLOW);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        ret = local_set_xattr(buffer, credp);
-        g_free(buffer);
+        ret = local_set_xattrat(dirfd, name, credp);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        return local_set_mapped_file_attr(fs_ctx, path, credp);
+        ret = local_set_mapped_file_attrat(dirfd, name, credp);
     }
+
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return ret;
 }
 
 static int local_utimensat(FsContext *s, V9fsPath *fs_path,
                            const struct timespec *buf)
 {
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
+    int dirfd, ret = -1;
+
+    dirfd = local_opendir_nofollow(s, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
-    buffer = rpath(s, path);
-    ret = qemu_utimens(buffer, buf);
-    g_free(buffer);
+    ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(name);
     return ret;
 }
 
-static int local_remove(FsContext *ctx, const char *path)
+static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name,
+                                 int flags)
 {
-    int err;
-    struct stat stbuf;
-    char *buffer;
+    int ret = -1;
 
     if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(ctx, path);
-        err =  lstat(buffer, &stbuf);
-        g_free(buffer);
-        if (err) {
-            goto err_out;
-        }
-        /*
-         * If directory remove .virtfs_metadata contained in the
-         * directory
-         */
-        if (S_ISDIR(stbuf.st_mode)) {
-            buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root,
-                                     path, VIRTFS_META_DIR);
-            err = remove(buffer);
-            g_free(buffer);
-            if (err < 0 && errno != ENOENT) {
+        int map_dirfd;
+
+        if (flags == AT_REMOVEDIR) {
+            int fd;
+
+            fd = openat(dirfd, name, O_RDONLY | O_DIRECTORY | O_PATH);
+            if (fd == -1) {
+                goto err_out;
+            }
+            /*
+             * If directory remove .virtfs_metadata contained in the
+             * directory
+             */
+            ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR);
+            close_preserve_errno(fd);
+            if (ret < 0 && errno != ENOENT) {
                 /*
                  * We didn't had the .virtfs_metadata file. May be file created
                  * in non-mapped mode ?. Ignore ENOENT.
@@ -972,12 +979,12 @@ static int local_remove(FsContext *ctx, const char *path)
         }
         /*
          * Now remove the name from parent directory
-         * .virtfs_metadata directory
+         * .virtfs_metadata directory.
          */
-        buffer = local_mapped_attr_path(ctx, path);
-        err = remove(buffer);
-        g_free(buffer);
-        if (err < 0 && errno != ENOENT) {
+        map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+        ret = unlinkat(map_dirfd, name, 0);
+        close_preserve_errno(map_dirfd);
+        if (ret < 0 && errno != ENOENT) {
             /*
              * We didn't had the .virtfs_metadata file. May be file created
              * in non-mapped mode ?. Ignore ENOENT.
@@ -986,10 +993,39 @@ static int local_remove(FsContext *ctx, const char *path)
         }
     }
 
-    buffer = rpath(ctx, path);
-    err = remove(buffer);
-    g_free(buffer);
+    ret = unlinkat(dirfd, name, flags);
+err_out:
+    return ret;
+}
+
+static int local_remove(FsContext *ctx, const char *path)
+{
+    struct stat stbuf;
+    char *dirpath = g_path_get_dirname(path);
+    char *name = g_path_get_basename(path);
+    int flags = 0;
+    int dirfd;
+    int err = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd) {
+        goto out;
+    }
+
+    if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) {
+        goto err_out;
+    }
+
+    if (S_ISDIR(stbuf.st_mode)) {
+        flags |= AT_REMOVEDIR;
+    }
+
+    err = local_unlinkat_common(ctx, dirfd, name, flags);
 err_out:
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return err;
 }
 
@@ -1013,13 +1049,11 @@ static int local_fsync(FsContext *ctx, int fid_type,
 
 static int local_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf)
 {
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+    int fd, ret;
 
-    buffer = rpath(s, path);
-    ret = statfs(buffer, stbuf);
-    g_free(buffer);
+    fd = local_open_nofollow(s, fs_path->data, O_RDONLY, 0);
+    ret = fstatfs(fd, stbuf);
+    close_preserve_errno(fd);
     return ret;
 }
 
@@ -1071,70 +1105,105 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir,
                           const char *new_name)
 {
     int ret;
-    V9fsString old_full_name, new_full_name;
+    int odirfd, ndirfd;
+
+    odirfd = local_opendir_nofollow(ctx, olddir->data);
+    if (odirfd == -1) {
+        return -1;
+    }
+
+    ndirfd = local_opendir_nofollow(ctx, newdir->data);
+    if (ndirfd == -1) {
+        close_preserve_errno(odirfd);
+        return -1;
+    }
+
+    ret = renameat(odirfd, old_name, ndirfd, new_name);
+    if (ret < 0) {
+        goto out;
+    }
+
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        int omap_dirfd, nmap_dirfd;
 
-    v9fs_string_init(&old_full_name);
-    v9fs_string_init(&new_full_name);
+        ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700);
+        if (ret < 0 && errno != EEXIST) {
+            goto err_undo_rename;
+        }
+
+        omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR);
+        if (omap_dirfd == -1) {
+            goto err;
+        }
+
+        nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR);
+        if (nmap_dirfd == -1) {
+            close_preserve_errno(omap_dirfd);
+            goto err;
+        }
 
-    v9fs_string_sprintf(&old_full_name, "%s/%s", olddir->data, old_name);
-    v9fs_string_sprintf(&new_full_name, "%s/%s", newdir->data, new_name);
+        /* rename the .virtfs_metadata files */
+        ret = renameat(omap_dirfd, old_name, nmap_dirfd, new_name);
+        close_preserve_errno(nmap_dirfd);
+        close_preserve_errno(omap_dirfd);
+        if (ret < 0 && errno != ENOENT) {
+            goto err_undo_rename;
+        }
 
-    ret = local_rename(ctx, old_full_name.data, new_full_name.data);
-    v9fs_string_free(&old_full_name);
-    v9fs_string_free(&new_full_name);
+        ret = 0;
+    }
+    goto out;
+
+err:
+    ret = -1;
+err_undo_rename:
+    renameat_preserve_errno(ndirfd, new_name, odirfd, old_name);
+out:
+    close_preserve_errno(ndirfd);
+    close_preserve_errno(odirfd);
     return ret;
 }
 
+static void v9fs_path_init_dirname(V9fsPath *path, const char *str)
+{
+    path->data = g_path_get_dirname(str);
+    path->size = strlen(path->data) + 1;
+}
+
+static int local_rename(FsContext *ctx, const char *oldpath,
+                        const char *newpath)
+{
+    int err;
+    char *oname = g_path_get_basename(oldpath);
+    char *nname = g_path_get_basename(newpath);
+    V9fsPath olddir, newdir;
+
+    v9fs_path_init_dirname(&olddir, oldpath);
+    v9fs_path_init_dirname(&newdir, newpath);
+
+    err = local_renameat(ctx, &olddir, oname, &newdir, nname);
+
+    v9fs_path_free(&newdir);
+    v9fs_path_free(&olddir);
+    g_free(nname);
+    g_free(oname);
+
+    return err;
+}
+
 static int local_unlinkat(FsContext *ctx, V9fsPath *dir,
                           const char *name, int flags)
 {
     int ret;
-    V9fsString fullname;
-    char *buffer;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-
-    v9fs_string_sprintf(&fullname, "%s/%s", dir->data, name);
-    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        if (flags == AT_REMOVEDIR) {
-            /*
-             * If directory remove .virtfs_metadata contained in the
-             * directory
-             */
-            buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root,
-                                     fullname.data, VIRTFS_META_DIR);
-            ret = remove(buffer);
-            g_free(buffer);
-            if (ret < 0 && errno != ENOENT) {
-                /*
-                 * We didn't had the .virtfs_metadata file. May be file created
-                 * in non-mapped mode ?. Ignore ENOENT.
-                 */
-                goto err_out;
-            }
-        }
-        /*
-         * Now remove the name from parent directory
-         * .virtfs_metadata directory.
-         */
-        buffer = local_mapped_attr_path(ctx, fullname.data);
-        ret = remove(buffer);
-        g_free(buffer);
-        if (ret < 0 && errno != ENOENT) {
-            /*
-             * We didn't had the .virtfs_metadata file. May be file created
-             * in non-mapped mode ?. Ignore ENOENT.
-             */
-            goto err_out;
-        }
+    dirfd = local_opendir_nofollow(ctx, dir->data);
+    if (dirfd == -1) {
+        return -1;
     }
-    /* Remove the name finally */
-    buffer = rpath(ctx, fullname.data);
-    ret = remove(buffer);
-    g_free(buffer);
 
-err_out:
-    v9fs_string_free(&fullname);
+    ret = local_unlinkat_common(ctx, dirfd, name, flags);
+    close_preserve_errno(dirfd);
     return ret;
 }
 
@@ -1168,8 +1237,31 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path,
 
 static int local_init(FsContext *ctx)
 {
-    int err = 0;
     struct statfs stbuf;
+    LocalData *data = g_malloc(sizeof(*data));
+
+    data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY);
+    if (data->mountfd == -1) {
+        goto err;
+    }
+
+#ifdef FS_IOC_GETVERSION
+    /*
+     * use ioc_getversion only if the ioctl is definied
+     */
+    if (fstatfs(data->mountfd, &stbuf) < 0) {
+        close_preserve_errno(data->mountfd);
+        goto err;
+    }
+    switch (stbuf.f_type) {
+    case EXT2_SUPER_MAGIC:
+    case BTRFS_SUPER_MAGIC:
+    case REISERFS_SUPER_MAGIC:
+    case XFS_SUPER_MAGIC:
+        ctx->exops.get_st_gen = local_ioc_getversion;
+        break;
+    }
+#endif
 
     if (ctx->export_flags & V9FS_SM_PASSTHROUGH) {
         ctx->xops = passthrough_xattr_ops;
@@ -1185,29 +1277,28 @@ static int local_init(FsContext *ctx)
         ctx->xops = passthrough_xattr_ops;
     }
     ctx->export_flags |= V9FS_PATHNAME_FSCONTEXT;
-#ifdef FS_IOC_GETVERSION
-    /*
-     * use ioc_getversion only if the iocl is definied
-     */
-    err = statfs(ctx->fs_root, &stbuf);
-    if (!err) {
-        switch (stbuf.f_type) {
-        case EXT2_SUPER_MAGIC:
-        case BTRFS_SUPER_MAGIC:
-        case REISERFS_SUPER_MAGIC:
-        case XFS_SUPER_MAGIC:
-            ctx->exops.get_st_gen = local_ioc_getversion;
-            break;
-        }
-    }
-#endif
-    return err;
+
+    ctx->private = data;
+    return 0;
+
+err:
+    g_free(data);
+    return -1;
+}
+
+static void local_cleanup(FsContext *ctx)
+{
+    LocalData *data = ctx->private;
+
+    close(data->mountfd);
+    g_free(data);
 }
 
 static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 {
     const char *sec_model = qemu_opt_get(opts, "security_model");
     const char *path = qemu_opt_get(opts, "path");
+    Error *err = NULL;
 
     if (!sec_model) {
         error_report("Security model not specified, local fs needs security model");
@@ -1236,6 +1327,13 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
         error_report("fsdev: No path specified");
         return -1;
     }
+
+    fsdev_throttle_parse_opts(opts, &fse->fst, &err);
+    if (err) {
+        error_reportf_err(err, "Throttle configuration is not valid: ");
+        return -1;
+    }
+
     fse->path = g_strdup(path);
 
     return 0;
@@ -1244,6 +1342,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 FileOperations local_ops = {
     .parse_opts = local_parse_opts,
     .init  = local_init,
+    .cleanup = local_cleanup,
     .lstat = local_lstat,
     .readlink = local_readlink,
     .close = local_close,
diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h
new file mode 100644
index 0000000000..32c72749d9
--- /dev/null
+++ b/hw/9pfs/9p-local.h
@@ -0,0 +1,20 @@
+/*
+ * 9p local backend utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_LOCAL_H
+#define QEMU_9P_LOCAL_H
+
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+                        mode_t mode);
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path);
+
+#endif
diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c
index ec003181cd..bbf89064f7 100644
--- a/hw/9pfs/9p-posix-acl.c
+++ b/hw/9pfs/9p-posix-acl.c
@@ -25,13 +25,7 @@
 static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size);
 }
 
 static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
@@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
 static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size,
+                                   flags);
 }
 
 static int mp_pacl_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
     int ret;
-    char *buffer;
 
-    buffer = rpath(ctx, path);
-    ret  = lremovexattr(buffer, MAP_ACL_ACCESS);
+    ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS);
     if (ret == -1 && errno == ENODATA) {
         /*
          * We don't get ENODATA error when trying to remove a
@@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx,
         errno = 0;
         ret = 0;
     }
-    g_free(buffer);
     return ret;
 }
 
 static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size);
 }
 
 static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
@@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
 static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size,
+                                   flags);
 }
 
 static int mp_dacl_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
     int ret;
-    char *buffer;
 
-    buffer = rpath(ctx, path);
-    ret  = lremovexattr(buffer, MAP_ACL_DEFAULT);
+    ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT);
     if (ret == -1 && errno == ENODATA) {
         /*
          * We don't get ENODATA error when trying to remove a
@@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx,
         errno = 0;
         ret = 0;
     }
-    g_free(buffer);
     return ret;
 }
 
diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c
new file mode 100644
index 0000000000..fdb4d57376
--- /dev/null
+++ b/hw/9pfs/9p-util.c
@@ -0,0 +1,69 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+                             mode_t mode)
+{
+    int fd;
+
+    fd = dup(dirfd);
+    if (fd == -1) {
+        return -1;
+    }
+
+    while (*path) {
+        const char *c;
+        int next_fd;
+        char *head;
+
+        /* Only relative paths without consecutive slashes */
+        assert(path[0] != '/');
+
+        head = g_strdup(path);
+        c = strchr(path, '/');
+        if (c) {
+            head[c - path] = 0;
+            next_fd = openat_dir(fd, head);
+        } else {
+            next_fd = openat_file(fd, head, flags, mode);
+        }
+        g_free(head);
+        if (next_fd == -1) {
+            close_preserve_errno(fd);
+            return -1;
+        }
+        close(fd);
+        fd = next_fd;
+
+        if (!c) {
+            break;
+        }
+        path = c + 1;
+    }
+
+    return fd;
+}
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                             void *value, size_t size)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lgetxattr(proc_path, name, value, size);
+    g_free(proc_path);
+    return ret;
+}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
new file mode 100644
index 0000000000..091f3ce88e
--- /dev/null
+++ b/hw/9pfs/9p-util.h
@@ -0,0 +1,54 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_UTIL_H
+#define QEMU_9P_UTIL_H
+
+static inline void close_preserve_errno(int fd)
+{
+    int serrno = errno;
+    close(fd);
+    errno = serrno;
+}
+
+static inline int openat_dir(int dirfd, const char *name)
+{
+    return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH);
+}
+
+static inline int openat_file(int dirfd, const char *name, int flags,
+                              mode_t mode)
+{
+    int fd, serrno, ret;
+
+    fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
+                mode);
+    if (fd == -1) {
+        return -1;
+    }
+
+    serrno = errno;
+    /* O_NONBLOCK was only needed to open the file. Let's drop it. */
+    ret = fcntl(fd, F_SETFL, flags);
+    assert(!ret);
+    errno = serrno;
+    return fd;
+}
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+                             mode_t mode);
+ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name,
+                             void *value, size_t size);
+int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
+                         void *value, size_t size, int flags);
+
+#endif
diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c
index f87530c8b5..2c90817b75 100644
--- a/hw/9pfs/9p-xattr-user.c
+++ b/hw/9pfs/9p-xattr-user.c
@@ -20,9 +20,6 @@
 static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
         errno = ENOATTR;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, name, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, name, value, size);
 }
 
 static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
@@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
 static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
         errno = EACCES;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, name, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, name, value, size, flags);
 }
 
 static int mp_user_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
-    char *buffer;
-    int ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx,
         errno = EACCES;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lremovexattr(buffer, name);
-    g_free(buffer);
-    return ret;
+    return local_removexattr_nofollow(ctx, path, name);
 }
 
 XattrOperations mapped_user_xattr = {
diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c
index 5d8595ed93..eec160b3c2 100644
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -15,6 +15,8 @@
 #include "9p.h"
 #include "fsdev/file-op-9p.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
+#include "9p-local.h"
 
 
 static XattrOperations *get_xattr_operations(XattrOperations **h,
@@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path,
     return name_size;
 }
 
+static ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+                                     char *list, size_t size)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = llistxattr(proc_path, list, size);
+    g_free(proc_path);
+    return ret;
+}
 
 /*
  * Get the list and pass to each layer to find out whether
@@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path,
                         void *value, size_t vsize)
 {
     ssize_t size = 0;
-    char *buffer;
     void *ovalue = value;
     XattrOperations *xops;
     char *orig_value, *orig_value_start;
     ssize_t xattr_len, parsed_len = 0, attr_len;
+    char *dirpath, *name;
+    int dirfd;
 
     /* Get the actual len */
-    buffer = rpath(ctx, path);
-    xattr_len = llistxattr(buffer, value, 0);
+    dirpath = g_path_get_dirname(path);
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    g_free(dirpath);
+    if (dirfd == -1) {
+        return -1;
+    }
+
+    name = g_path_get_basename(path);
+    xattr_len = flistxattrat_nofollow(dirfd, name, value, 0);
     if (xattr_len <= 0) {
-        g_free(buffer);
+        g_free(name);
+        close_preserve_errno(dirfd);
         return xattr_len;
     }
 
     /* Now fetch the xattr and find the actual size */
     orig_value = g_malloc(xattr_len);
-    xattr_len = llistxattr(buffer, orig_value, xattr_len);
-    g_free(buffer);
+    xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len);
+    g_free(name);
+    close_preserve_errno(dirfd);
+    if (xattr_len < 0) {
+        return -1;
+    }
 
     /* store the orig pointer */
     orig_value_start = orig_value;
@@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx,
 
 }
 
+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fgetxattrat_nofollow(dirfd, filename, name, value, size);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size)
+{
+    return local_getxattr_nofollow(ctx, path, name, value, size);
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                         void *value, size_t size, int flags)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lsetxattr(proc_path, name, value, size, flags);
+    g_free(proc_path);
+    return ret;
+}
+
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size,
+                                int flags)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+                size_t size, int flags)
+{
+    return local_setxattr_nofollow(ctx, path, name, value, size, flags);
+}
+
+static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+                                       const char *name)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lremovexattr(proc_path, name);
+    g_free(proc_path);
+    return ret;
+}
+
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+                                   const char *name)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fremovexattrat_nofollow(dirfd, filename, name);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+int pt_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+    return local_removexattr_nofollow(ctx, path, name);
+}
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+                        void *value, size_t size)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size, int flags)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+                         void *value, size_t size)
+{
+    return 0;
+}
+
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
 XattrOperations *mapped_xattr_ops[] = {
     &mapped_user_xattr,
     &mapped_pacl_xattr,
diff --git a/hw/9pfs/9p-xattr.h b/hw/9pfs/9p-xattr.h
index a853ea641c..0d83996575 100644
--- a/hw/9pfs/9p-xattr.h
+++ b/hw/9pfs/9p-xattr.h
@@ -29,6 +29,13 @@ typedef struct xattr_operations
                        const char *path, const char *name);
 } XattrOperations;
 
+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size);
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size,
+                                int flags);
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+                                   const char *name);
 
 extern XattrOperations mapped_user_xattr;
 extern XattrOperations passthrough_user_xattr;
@@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value,
 int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name,
                           void *value, size_t size, int flags);
 int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name);
+
 ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value,
                      size_t size);
-
-static inline ssize_t pt_getxattr(FsContext *ctx, const char *path,
-                                  const char *name, void *value, size_t size)
-{
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, name, value, size);
-    g_free(buffer);
-    return ret;
-}
-
-static inline int pt_setxattr(FsContext *ctx, const char *path,
-                              const char *name, void *value,
-                              size_t size, int flags)
-{
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, name, value, size, flags);
-    g_free(buffer);
-    return ret;
-}
-
-static inline int pt_removexattr(FsContext *ctx,
-                                 const char *path, const char *name)
-{
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lremovexattr(path, name);
-    g_free(buffer);
-    return ret;
-}
-
-static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path,
-                                      const char *name, void *value,
-                                      size_t size)
-{
-    errno = ENOTSUP;
-    return -1;
-}
-
-static inline int notsup_setxattr(FsContext *ctx, const char *path,
-                                  const char *name, void *value,
-                                  size_t size, int flags)
-{
-    errno = ENOTSUP;
-    return -1;
-}
-
-static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path,
-                                       char *name, void *value, size_t size)
-{
-    return 0;
-}
-
-static inline int notsup_removexattr(FsContext *ctx,
-                                     const char *path, const char *name)
-{
-    errno = ENOTSUP;
-    return -1;
-}
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size);
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+                size_t size, int flags);
+int pt_removexattr(FsContext *ctx, const char *path, const char *name);
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+                        void *value, size_t size);
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size, int flags);
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+                         void *value, size_t size);
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name);
 
 #endif
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 3af1c93dc8..76c9247c77 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -3010,7 +3010,6 @@ out_nofid:
  */
 static void coroutine_fn v9fs_lock(void *opaque)
 {
-    int8_t status;
     V9fsFlock flock;
     size_t offset = 7;
     struct stat stbuf;
@@ -3018,7 +3017,6 @@ static void coroutine_fn v9fs_lock(void *opaque)
     int32_t fid, err = 0;
     V9fsPDU *pdu = opaque;
 
-    status = P9_LOCK_ERROR;
     v9fs_string_init(&flock.client_id);
     err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
                         &flock.flags, &flock.start, &flock.length,
@@ -3044,15 +3042,15 @@ static void coroutine_fn v9fs_lock(void *opaque)
     if (err < 0) {
         goto out;
     }
-    status = P9_LOCK_SUCCESS;
+    err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
+    if (err < 0) {
+        goto out;
+    }
+    err += offset;
+    trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    err = pdu_marshal(pdu, offset, "b", status);
-    if (err > 0) {
-        err += offset;
-    }
-    trace_v9fs_lock_return(pdu->tag, pdu->id, status);
     pdu_complete(pdu, err);
     v9fs_string_free(&flock.client_id);
 }
@@ -3531,6 +3529,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
         error_setg(errp, "share path %s is not a directory", fse->path);
         goto out;
     }
+
+    s->ctx.fst = &fse->fst;
+    fsdev_throttle_init(s->ctx.fst);
+
     v9fs_path_free(&path);
 
     rc = 0;
@@ -3551,6 +3553,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
     if (s->ops->cleanup) {
         s->ops->cleanup(&s->ctx);
     }
+    fsdev_throttle_cleanup(s->ctx.fst);
     g_free(s->tag);
     g_free(s->ctx.fs_root);
 }
diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs
index da0ae0cfdb..32197e6671 100644
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,4 +1,4 @@
-common-obj-y  = 9p.o
+common-obj-y  = 9p.o 9p-util.o
 common-obj-y += 9p-local.o 9p-xattr.o
 common-obj-y += 9p-xattr-user.o 9p-posix-acl.o
 common-obj-y += coth.o cofs.o codir.o cofile.o
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 120e267108..88791bc327 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -247,6 +247,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
+    fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset);
@@ -266,6 +267,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
+    fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset);
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 6acf79860a..11c35bcb44 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -5,6 +5,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o
 common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
 common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
+common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
 common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 
 common-obj-y += acpi_interface.o
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index b2a1e4033b..c6f2032dec 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1559,6 +1559,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
     tables->rsdp = g_array_new(false, true /* clear */, 1);
     tables->table_data = g_array_new(false, true /* clear */, 1);
     tables->tcpalog = g_array_new(false, true /* clear */, 1);
+    tables->vmgenid = g_array_new(false, true /* clear */, 1);
     tables->linker = bios_linker_loader_init();
 }
 
@@ -1568,6 +1569,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
     g_array_free(tables->rsdp, true);
     g_array_free(tables->table_data, true);
     g_array_free(tables->tcpalog, mfre);
+    g_array_free(tables->vmgenid, mfre);
 }
 
 /* Build rsdt table */
diff --git a/hw/acpi/bios-linker-loader.c b/hw/acpi/bios-linker-loader.c
index d963ebe241..046183a0f1 100644
--- a/hw/acpi/bios-linker-loader.c
+++ b/hw/acpi/bios-linker-loader.c
@@ -78,6 +78,21 @@ struct BiosLinkerLoaderEntry {
             uint32_t length;
         } cksum;
 
+        /*
+         * COMMAND_WRITE_POINTER - write the fw_cfg file (originating from
+         * @dest_file) at @wr_pointer.offset, by adding a pointer to
+         * @src_offset within the table originating from @src_file.
+         * 1,2,4 or 8 byte unsigned addition is used depending on
+         * @wr_pointer.size.
+         */
+        struct {
+            char dest_file[BIOS_LINKER_LOADER_FILESZ];
+            char src_file[BIOS_LINKER_LOADER_FILESZ];
+            uint32_t dst_offset;
+            uint32_t src_offset;
+            uint8_t size;
+        } wr_pointer;
+
         /* padding */
         char pad[124];
     };
@@ -85,9 +100,10 @@ struct BiosLinkerLoaderEntry {
 typedef struct BiosLinkerLoaderEntry BiosLinkerLoaderEntry;
 
 enum {
-    BIOS_LINKER_LOADER_COMMAND_ALLOCATE     = 0x1,
-    BIOS_LINKER_LOADER_COMMAND_ADD_POINTER  = 0x2,
-    BIOS_LINKER_LOADER_COMMAND_ADD_CHECKSUM = 0x3,
+    BIOS_LINKER_LOADER_COMMAND_ALLOCATE          = 0x1,
+    BIOS_LINKER_LOADER_COMMAND_ADD_POINTER       = 0x2,
+    BIOS_LINKER_LOADER_COMMAND_ADD_CHECKSUM      = 0x3,
+    BIOS_LINKER_LOADER_COMMAND_WRITE_POINTER     = 0x4,
 };
 
 enum {
@@ -278,3 +294,47 @@ void bios_linker_loader_add_pointer(BIOSLinker *linker,
 
     g_array_append_vals(linker->cmd_blob, &entry, sizeof entry);
 }
+
+/*
+ * bios_linker_loader_write_pointer: ask guest to write a pointer to the
+ * source file into the destination file, and write it back to QEMU via
+ * fw_cfg DMA.
+ *
+ * @linker: linker object instance
+ * @dest_file: destination file that must be written
+ * @dst_patched_offset: location within destination file blob to be patched
+ *                      with the pointer to @src_file, in bytes
+ * @dst_patched_offset_size: size of the pointer to be patched
+ *                      at @dst_patched_offset in @dest_file blob, in bytes
+ * @src_file: source file who's address must be taken
+ * @src_offset: location within source file blob to which
+ *              @dest_file+@dst_patched_offset will point to after
+ *              firmware's executed WRITE_POINTER command
+ */
+void bios_linker_loader_write_pointer(BIOSLinker *linker,
+                                    const char *dest_file,
+                                    uint32_t dst_patched_offset,
+                                    uint8_t dst_patched_size,
+                                    const char *src_file,
+                                    uint32_t src_offset)
+{
+    BiosLinkerLoaderEntry entry;
+    const BiosLinkerFileEntry *source_file =
+        bios_linker_find_file(linker, src_file);
+
+    assert(source_file);
+    assert(src_offset < source_file->blob->len);
+    memset(&entry, 0, sizeof entry);
+    strncpy(entry.wr_pointer.dest_file, dest_file,
+            sizeof entry.wr_pointer.dest_file - 1);
+    strncpy(entry.wr_pointer.src_file, src_file,
+            sizeof entry.wr_pointer.src_file - 1);
+    entry.command = cpu_to_le32(BIOS_LINKER_LOADER_COMMAND_WRITE_POINTER);
+    entry.wr_pointer.dst_offset = cpu_to_le32(dst_patched_offset);
+    entry.wr_pointer.src_offset = cpu_to_le32(src_offset);
+    entry.wr_pointer.size = dst_patched_size;
+    assert(dst_patched_size == 1 || dst_patched_size == 2 ||
+           dst_patched_size == 4 || dst_patched_size == 8);
+
+    g_array_append_vals(linker->cmd_blob, &entry, sizeof entry);
+}
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 6017ca04bf..8c719d3f9d 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -198,7 +198,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
     state->dev_count = id_list->len;
     state->devs = g_new0(typeof(*state->devs), state->dev_count);
     for (i = 0; i < id_list->len; i++) {
-        state->devs[i].cpu =  id_list->cpus[i].cpu;
+        state->devs[i].cpu =  CPU(id_list->cpus[i].cpu);
         state->devs[i].arch_id = id_list->cpus[i].arch_id;
     }
     memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state,
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index d957d1e30d..2b0f3e1bfb 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -49,7 +49,6 @@
 
 #define ACPI_PCIHP_ADDR 0xae00
 #define ACPI_PCIHP_SIZE 0x0014
-#define ACPI_PCIHP_LEGACY_SIZE 0x000f
 #define PCI_UP_BASE 0x0000
 #define PCI_DOWN_BASE 0x0004
 #define PCI_EJ_BASE 0x0008
@@ -302,16 +301,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
     s->root= root_bus;
     s->legacy_piix = !bridges_enabled;
 
-    if (s->legacy_piix) {
-        unsigned *bus_bsel = g_malloc(sizeof *bus_bsel);
-
-        s->io_len = ACPI_PCIHP_LEGACY_SIZE;
-
-        *bus_bsel = ACPI_PCIHP_BSEL_DEFAULT;
-        object_property_add_uint32_ptr(OBJECT(root_bus), ACPI_PCIHP_PROP_BSEL,
-                                       bus_bsel, NULL);
-    }
-
     memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
                           "acpi-pci-hotplug", s->io_len);
     memory_region_add_subregion(address_space_io, s->io_base, &s->io);
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 6d99fe407c..a553a7e110 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -440,6 +440,8 @@ static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
 {
     PIIX4PMState *s = opaque;
 
+    /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive
+     * and it's not hot-unpluggable */
     qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort);
 }
 
diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c
index 8ce7daf23a..b4adac88cd 100644
--- a/hw/acpi/tco.c
+++ b/hw/acpi/tco.c
@@ -49,6 +49,7 @@ static inline void tco_timer_reload(TCOIORegs *tr)
 static inline void tco_timer_stop(TCOIORegs *tr)
 {
     tr->expire_time = -1;
+    timer_del(tr->tco_timer);
 }
 
 static void tco_timer_expired(void *opaque)
diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
new file mode 100644
index 0000000000..744f2847da
--- /dev/null
+++ b/hw/acpi/vmgenid.c
@@ -0,0 +1,258 @@
+/*
+ *  Virtual Machine Generation ID Device
+ *
+ *  Copyright (C) 2017 Skyport Systems.
+ *
+ *  Author: Ben Warren <ben@skyportsystems.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qmp-commands.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/vmgenid.h"
+#include "hw/nvram/fw_cfg.h"
+#include "sysemu/sysemu.h"
+
+void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
+                        BIOSLinker *linker)
+{
+    Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
+    uint32_t vgia_offset;
+    QemuUUID guid_le;
+
+    /* Fill in the GUID values.  These need to be converted to little-endian
+     * first, since that's what the guest expects
+     */
+    g_array_set_size(guid, VMGENID_FW_CFG_SIZE - ARRAY_SIZE(guid_le.data));
+    guid_le = vms->guid;
+    qemu_uuid_bswap(&guid_le);
+    /* The GUID is written at a fixed offset into the fw_cfg file
+     * in order to implement the "OVMF SDT Header probe suppressor"
+     * see docs/specs/vmgenid.txt for more details
+     */
+    g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data,
+                        ARRAY_SIZE(guid_le.data));
+
+    /* Put this in a separate SSDT table */
+    ssdt = init_aml_allocator();
+
+    /* Reserve space for header */
+    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
+
+    /* Storage for the GUID address */
+    vgia_offset = table_data->len +
+        build_append_named_dword(ssdt->buf, "VGIA");
+    scope = aml_scope("\\_SB");
+    dev = aml_device("VGEN");
+    aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVGID")));
+    aml_append(dev, aml_name_decl("_CID", aml_string("VM_Gen_Counter")));
+    aml_append(dev, aml_name_decl("_DDN", aml_string("VM_Gen_Counter")));
+
+    /* Simple status method to check that address is linked and non-zero */
+    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_int(0xf), addr));
+    if_ctx = aml_if(aml_equal(aml_name("VGIA"), aml_int(0)));
+    aml_append(if_ctx, aml_store(aml_int(0), addr));
+    aml_append(method, if_ctx);
+    aml_append(method, aml_return(addr));
+    aml_append(dev, method);
+
+    /* the ADDR method returns two 32-bit words representing the lower and
+     * upper halves * of the physical address of the fw_cfg blob
+     * (holding the GUID)
+     */
+    method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
+
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_package(2), addr));
+
+    aml_append(method, aml_store(aml_add(aml_name("VGIA"),
+                                         aml_int(VMGENID_GUID_OFFSET), NULL),
+                                 aml_index(addr, aml_int(0))));
+    aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1))));
+    aml_append(method, aml_return(addr));
+
+    aml_append(dev, method);
+    aml_append(scope, dev);
+    aml_append(ssdt, scope);
+
+    /* attach an ACPI notify */
+    method = aml_method("\\_GPE._E05", 0, AML_NOTSERIALIZED);
+    aml_append(method, aml_notify(aml_name("\\_SB.VGEN"), aml_int(0x80)));
+    aml_append(ssdt, method);
+
+    g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
+
+    /* Allocate guest memory for the Data fw_cfg blob */
+    bios_linker_loader_alloc(linker, VMGENID_GUID_FW_CFG_FILE, guid, 4096,
+                             false /* page boundary, high memory */);
+
+    /* Patch address of GUID fw_cfg blob into the ADDR fw_cfg blob
+     * so QEMU can write the GUID there.  The address is expected to be
+     * < 4GB, but write 64 bits anyway.
+     * The address that is patched in is offset in order to implement
+     * the "OVMF SDT Header probe suppressor"
+     * see docs/specs/vmgenid.txt for more details.
+     */
+    bios_linker_loader_write_pointer(linker,
+        VMGENID_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
+        VMGENID_GUID_FW_CFG_FILE, VMGENID_GUID_OFFSET);
+
+    /* Patch address of GUID fw_cfg blob into the AML so OSPM can retrieve
+     * and read it.  Note that while we provide storage for 64 bits, only
+     * the least-signficant 32 get patched into AML.
+     */
+    bios_linker_loader_add_pointer(linker,
+        ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t),
+        VMGENID_GUID_FW_CFG_FILE, 0);
+
+    build_header(linker, table_data,
+        (void *)(table_data->data + table_data->len - ssdt->buf->len),
+        "SSDT", ssdt->buf->len, 1, NULL, "VMGENID");
+    free_aml_allocator();
+}
+
+void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid)
+{
+    /* Create a read-only fw_cfg file for GUID */
+    fw_cfg_add_file(s, VMGENID_GUID_FW_CFG_FILE, guid->data,
+                    VMGENID_FW_CFG_SIZE);
+    /* Create a read-write fw_cfg file for Address */
+    fw_cfg_add_file_callback(s, VMGENID_ADDR_FW_CFG_FILE, NULL, NULL,
+                             vms->vmgenid_addr_le,
+                             ARRAY_SIZE(vms->vmgenid_addr_le), false);
+}
+
+static void vmgenid_update_guest(VmGenIdState *vms)
+{
+    Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL);
+    uint32_t vmgenid_addr;
+    QemuUUID guid_le;
+
+    if (obj) {
+        /* Write the GUID to guest memory */
+        memcpy(&vmgenid_addr, vms->vmgenid_addr_le, sizeof(vmgenid_addr));
+        vmgenid_addr = le32_to_cpu(vmgenid_addr);
+        /* A zero value in vmgenid_addr means that BIOS has not yet written
+         * the address
+         */
+        if (vmgenid_addr) {
+            /* QemuUUID has the first three words as big-endian, and expect
+             * that any GUIDs passed in will always be BE.  The guest,
+             * however, will expect the fields to be little-endian.
+             * Perform a byte swap immediately before writing.
+             */
+            guid_le = vms->guid;
+            qemu_uuid_bswap(&guid_le);
+            /* The GUID is written at a fixed offset into the fw_cfg file
+             * in order to implement the "OVMF SDT Header probe suppressor"
+             * see docs/specs/vmgenid.txt for more details.
+             */
+            cpu_physical_memory_write(vmgenid_addr, guid_le.data,
+                                      sizeof(guid_le.data));
+            /* Send _GPE.E05 event */
+            acpi_send_event(DEVICE(obj), ACPI_VMGENID_CHANGE_STATUS);
+        }
+    }
+}
+
+static void vmgenid_set_guid(Object *obj, const char *value, Error **errp)
+{
+    VmGenIdState *vms = VMGENID(obj);
+
+    if (!strcmp(value, "auto")) {
+        qemu_uuid_generate(&vms->guid);
+    } else if (qemu_uuid_parse(value, &vms->guid) < 0) {
+        error_setg(errp, "'%s. %s': Failed to parse GUID string: %s",
+                   object_get_typename(OBJECT(vms)), VMGENID_GUID, value);
+        return;
+    }
+
+    vmgenid_update_guest(vms);
+}
+
+/* After restoring an image, we need to update the guest memory and notify
+ * it of a potential change to VM Generation ID
+ */
+static int vmgenid_post_load(void *opaque, int version_id)
+{
+    VmGenIdState *vms = opaque;
+    vmgenid_update_guest(vms);
+    return 0;
+}
+
+static const VMStateDescription vmstate_vmgenid = {
+    .name = "vmgenid",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = vmgenid_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(vmgenid_addr_le, VmGenIdState, sizeof(uint64_t)),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void vmgenid_handle_reset(void *opaque)
+{
+    VmGenIdState *vms = VMGENID(opaque);
+    /* Clear the guest-allocated GUID address when the VM resets */
+    memset(vms->vmgenid_addr_le, 0, ARRAY_SIZE(vms->vmgenid_addr_le));
+}
+
+static void vmgenid_realize(DeviceState *dev, Error **errp)
+{
+    VmGenIdState *vms = VMGENID(dev);
+    qemu_register_reset(vmgenid_handle_reset, vms);
+}
+
+static void vmgenid_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_vmgenid;
+    dc->realize = vmgenid_realize;
+    dc->hotpluggable = false;
+
+    object_class_property_add_str(klass, VMGENID_GUID, NULL,
+                                  vmgenid_set_guid, NULL);
+    object_class_property_set_description(klass, VMGENID_GUID,
+                                    "Set Global Unique Identifier "
+                                    "(big-endian) or auto for random value",
+                                    NULL);
+}
+
+static const TypeInfo vmgenid_device_info = {
+    .name          = VMGENID_DEVICE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(VmGenIdState),
+    .class_init    = vmgenid_device_class_init,
+};
+
+static void vmgenid_register_types(void)
+{
+    type_register_static(&vmgenid_device_info);
+}
+
+type_init(vmgenid_register_types)
+
+GuidInfo *qmp_query_vm_generation_id(Error **errp)
+{
+    GuidInfo *info;
+    VmGenIdState *vms;
+    Object *obj = find_vmgenid_dev();
+
+    if (!obj) {
+        return NULL;
+    }
+    vms = VMGENID(obj);
+
+    info = g_malloc0(sizeof(*info));
+    info->guid = qemu_uuid_unparse_strdup(&vms->guid);
+    return info;
+}
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 0c9ca7bfa0..c8a11f2b53 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/arm/armv7m.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -17,147 +18,260 @@
 #include "elf.h"
 #include "sysemu/qtest.h"
 #include "qemu/error-report.h"
+#include "exec/address-spaces.h"
 
 /* Bitbanded IO.  Each word corresponds to a single bit.  */
 
 /* Get the byte address of the real memory for a bitband access.  */
-static inline uint32_t bitband_addr(void * opaque, uint32_t addr)
+static inline hwaddr bitband_addr(BitBandState *s, hwaddr offset)
 {
-    uint32_t res;
-
-    res = *(uint32_t *)opaque;
-    res |= (addr & 0x1ffffff) >> 5;
-    return res;
-
+    return s->base | (offset & 0x1ffffff) >> 5;
 }
 
-static uint32_t bitband_readb(void *opaque, hwaddr offset)
+static MemTxResult bitband_read(void *opaque, hwaddr offset,
+                                uint64_t *data, unsigned size, MemTxAttrs attrs)
 {
-    uint8_t v;
-    cpu_physical_memory_read(bitband_addr(opaque, offset), &v, 1);
-    return (v & (1 << ((offset >> 2) & 7))) != 0;
+    BitBandState *s = opaque;
+    uint8_t buf[4];
+    MemTxResult res;
+    int bitpos, bit;
+    hwaddr addr;
+
+    assert(size <= 4);
+
+    /* Find address in underlying memory and round down to multiple of size */
+    addr = bitband_addr(s, offset) & (-size);
+    res = address_space_read(s->source_as, addr, attrs, buf, size);
+    if (res) {
+        return res;
+    }
+    /* Bit position in the N bytes read... */
+    bitpos = (offset >> 2) & ((size * 8) - 1);
+    /* ...converted to byte in buffer and bit in byte */
+    bit = (buf[bitpos >> 3] >> (bitpos & 7)) & 1;
+    *data = bit;
+    return MEMTX_OK;
 }
 
-static void bitband_writeb(void *opaque, hwaddr offset,
-                           uint32_t value)
+static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value,
+                                 unsigned size, MemTxAttrs attrs)
 {
-    uint32_t addr;
-    uint8_t mask;
-    uint8_t v;
-    addr = bitband_addr(opaque, offset);
-    mask = (1 << ((offset >> 2) & 7));
-    cpu_physical_memory_read(addr, &v, 1);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 1);
+    BitBandState *s = opaque;
+    uint8_t buf[4];
+    MemTxResult res;
+    int bitpos, bit;
+    hwaddr addr;
+
+    assert(size <= 4);
+
+    /* Find address in underlying memory and round down to multiple of size */
+    addr = bitband_addr(s, offset) & (-size);
+    res = address_space_read(s->source_as, addr, attrs, buf, size);
+    if (res) {
+        return res;
+    }
+    /* Bit position in the N bytes read... */
+    bitpos = (offset >> 2) & ((size * 8) - 1);
+    /* ...converted to byte in buffer and bit in byte */
+    bit = 1 << (bitpos & 7);
+    if (value & 1) {
+        buf[bitpos >> 3] |= bit;
+    } else {
+        buf[bitpos >> 3] &= ~bit;
+    }
+    return address_space_write(s->source_as, addr, attrs, buf, size);
 }
 
-static uint32_t bitband_readw(void *opaque, hwaddr offset)
+static const MemoryRegionOps bitband_ops = {
+    .read_with_attrs = bitband_read,
+    .write_with_attrs = bitband_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 4,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+};
+
+static void bitband_init(Object *obj)
 {
-    uint32_t addr;
-    uint16_t mask;
-    uint16_t v;
-    addr = bitband_addr(opaque, offset) & ~1;
-    mask = (1 << ((offset >> 2) & 15));
-    mask = tswap16(mask);
-    cpu_physical_memory_read(addr, &v, 2);
-    return (v & mask) != 0;
+    BitBandState *s = BITBAND(obj);
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+
+    object_property_add_link(obj, "source-memory",
+                             TYPE_MEMORY_REGION,
+                             (Object **)&s->source_memory,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
+    memory_region_init_io(&s->iomem, obj, &bitband_ops, s,
+                          "bitband", 0x02000000);
+    sysbus_init_mmio(dev, &s->iomem);
 }
 
-static void bitband_writew(void *opaque, hwaddr offset,
-                           uint32_t value)
+static void bitband_realize(DeviceState *dev, Error **errp)
 {
-    uint32_t addr;
-    uint16_t mask;
-    uint16_t v;
-    addr = bitband_addr(opaque, offset) & ~1;
-    mask = (1 << ((offset >> 2) & 15));
-    mask = tswap16(mask);
-    cpu_physical_memory_read(addr, &v, 2);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 2);
+    BitBandState *s = BITBAND(dev);
+
+    if (!s->source_memory) {
+        error_setg(errp, "source-memory property not set");
+        return;
+    }
+
+    s->source_as = address_space_init_shareable(s->source_memory,
+                                                "bitband-source");
 }
 
-static uint32_t bitband_readl(void *opaque, hwaddr offset)
+/* Board init.  */
+
+static const hwaddr bitband_input_addr[ARMV7M_NUM_BITBANDS] = {
+    0x20000000, 0x40000000
+};
+
+static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = {
+    0x22000000, 0x42000000
+};
+
+static void armv7m_instance_init(Object *obj)
 {
-    uint32_t addr;
-    uint32_t mask;
-    uint32_t v;
-    addr = bitband_addr(opaque, offset) & ~3;
-    mask = (1 << ((offset >> 2) & 31));
-    mask = tswap32(mask);
-    cpu_physical_memory_read(addr, &v, 4);
-    return (v & mask) != 0;
+    ARMv7MState *s = ARMV7M(obj);
+    int i;
+
+    /* Can't init the cpu here, we don't yet know which model to use */
+
+    object_property_add_link(obj, "memory",
+                             TYPE_MEMORY_REGION,
+                             (Object **)&s->board_memory,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
+    memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX);
+
+    object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic");
+    qdev_set_parent_bus(DEVICE(&s->nvic), sysbus_get_default());
+    object_property_add_alias(obj, "num-irq",
+                              OBJECT(&s->nvic), "num-irq", &error_abort);
+
+    for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
+        object_initialize(&s->bitband[i], sizeof(s->bitband[i]), TYPE_BITBAND);
+        qdev_set_parent_bus(DEVICE(&s->bitband[i]), sysbus_get_default());
+    }
 }
 
-static void bitband_writel(void *opaque, hwaddr offset,
-                           uint32_t value)
+static void armv7m_realize(DeviceState *dev, Error **errp)
 {
-    uint32_t addr;
-    uint32_t mask;
-    uint32_t v;
-    addr = bitband_addr(opaque, offset) & ~3;
-    mask = (1 << ((offset >> 2) & 31));
-    mask = tswap32(mask);
-    cpu_physical_memory_read(addr, &v, 4);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 4);
-}
+    ARMv7MState *s = ARMV7M(dev);
+    SysBusDevice *sbd;
+    Error *err = NULL;
+    int i;
+    char **cpustr;
+    ObjectClass *oc;
+    const char *typename;
+    CPUClass *cc;
+
+    if (!s->board_memory) {
+        error_setg(errp, "memory property was not set");
+        return;
+    }
 
-static const MemoryRegionOps bitband_ops = {
-    .old_mmio = {
-        .read = { bitband_readb, bitband_readw, bitband_readl, },
-        .write = { bitband_writeb, bitband_writew, bitband_writel, },
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
+    memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1);
 
-#define TYPE_BITBAND "ARM,bitband-memory"
-#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
+    cpustr = g_strsplit(s->cpu_model, ",", 2);
 
-typedef struct {
-    /*< private >*/
-    SysBusDevice parent_obj;
-    /*< public >*/
+    oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
+    if (!oc) {
+        error_setg(errp, "Unknown CPU model %s", cpustr[0]);
+        g_strfreev(cpustr);
+        return;
+    }
 
-    MemoryRegion iomem;
-    uint32_t base;
-} BitBandState;
+    cc = CPU_CLASS(oc);
+    typename = object_class_get_name(oc);
+    cc->parse_features(typename, cpustr[1], &err);
+    g_strfreev(cpustr);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
-static void bitband_init(Object *obj)
-{
-    BitBandState *s = BITBAND(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+    s->cpu = ARM_CPU(object_new(typename));
+    if (!s->cpu) {
+        error_setg(errp, "Unknown CPU model %s", s->cpu_model);
+        return;
+    }
 
-    memory_region_init_io(&s->iomem, obj, &bitband_ops, &s->base,
-                          "bitband", 0x02000000);
-    sysbus_init_mmio(dev, &s->iomem);
+    object_property_set_link(OBJECT(s->cpu), OBJECT(&s->container), "memory",
+                             &error_abort);
+    object_property_set_bool(OBJECT(s->cpu), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Note that we must realize the NVIC after the CPU */
+    object_property_set_bool(OBJECT(&s->nvic), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Alias the NVIC's input and output GPIOs as our own so the board
+     * code can wire them up. (We do this in realize because the
+     * NVIC doesn't create the input GPIO array until realize.)
+     */
+    qdev_pass_gpios(DEVICE(&s->nvic), dev, NULL);
+    qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ");
+
+    /* Wire the NVIC up to the CPU */
+    sbd = SYS_BUS_DEVICE(&s->nvic);
+    sysbus_connect_irq(sbd, 0,
+                       qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
+    s->cpu->env.nvic = &s->nvic;
+
+    memory_region_add_subregion(&s->container, 0xe000e000,
+                                sysbus_mmio_get_region(sbd, 0));
+
+    for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
+        Object *obj = OBJECT(&s->bitband[i]);
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->bitband[i]);
+
+        object_property_set_int(obj, bitband_input_addr[i], "base", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+        object_property_set_link(obj, OBJECT(s->board_memory),
+                                 "source-memory", &error_abort);
+        object_property_set_bool(obj, true, "realized", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+
+        memory_region_add_subregion(&s->container, bitband_output_addr[i],
+                                    sysbus_mmio_get_region(sbd, 0));
+    }
 }
 
-static void armv7m_bitband_init(void)
-{
-    DeviceState *dev;
+static Property armv7m_properties[] = {
+    DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model),
+    DEFINE_PROP_END_OF_LIST(),
+};
 
-    dev = qdev_create(NULL, TYPE_BITBAND);
-    qdev_prop_set_uint32(dev, "base", 0x20000000);
-    qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x22000000);
+static void armv7m_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
-    dev = qdev_create(NULL, TYPE_BITBAND);
-    qdev_prop_set_uint32(dev, "base", 0x40000000);
-    qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x42000000);
+    dc->realize = armv7m_realize;
+    dc->props = armv7m_properties;
 }
 
-/* Board init.  */
+static const TypeInfo armv7m_info = {
+    .name = TYPE_ARMV7M,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(ARMv7MState),
+    .instance_init = armv7m_instance_init,
+    .class_init = armv7m_class_init,
+};
 
 static void armv7m_reset(void *opaque)
 {
@@ -168,37 +282,35 @@ static void armv7m_reset(void *opaque)
 
 /* Init CPU and memory for a v7-M based board.
    mem_size is in bytes.
-   Returns the NVIC array.  */
+   Returns the ARMv7M device.  */
 
 DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
                       const char *kernel_filename, const char *cpu_model)
 {
-    ARMCPU *cpu;
-    CPUARMState *env;
-    DeviceState *nvic;
-    int image_size;
-    uint64_t entry;
-    uint64_t lowaddr;
-    int big_endian;
+    DeviceState *armv7m;
 
     if (cpu_model == NULL) {
-	cpu_model = "cortex-m3";
-    }
-    cpu = cpu_arm_init(cpu_model);
-    if (cpu == NULL) {
-        fprintf(stderr, "Unable to find CPU definition\n");
-        exit(1);
+        cpu_model = "cortex-m3";
     }
-    env = &cpu->env;
 
-    armv7m_bitband_init();
+    armv7m = qdev_create(NULL, "armv7m");
+    qdev_prop_set_uint32(armv7m, "num-irq", num_irq);
+    qdev_prop_set_string(armv7m, "cpu-model", cpu_model);
+    object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()),
+                                     "memory", &error_abort);
+    /* This will exit with an error if the user passed us a bad cpu_model */
+    qdev_init_nofail(armv7m);
+
+    armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size);
+    return armv7m;
+}
 
-    nvic = qdev_create(NULL, "armv7m_nvic");
-    qdev_prop_set_uint32(nvic, "num-irq", num_irq);
-    env->nvic = nvic;
-    qdev_init_nofail(nvic);
-    sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0,
-                       qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ));
+void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
+{
+    int image_size;
+    uint64_t entry;
+    uint64_t lowaddr;
+    int big_endian;
 
 #ifdef TARGET_WORDS_BIGENDIAN
     big_endian = 1;
@@ -224,8 +336,15 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
         }
     }
 
+    /* CPU objects (unlike devices) are not automatically reset on system
+     * reset, so we must always register a handler to do so. Unlike
+     * A-profile CPUs, we don't need to do anything special in the
+     * handler to arrange that it starts correctly.
+     * This is arguably the wrong place to do this, but it matches the
+     * way A-profile does it. Note that this means that every M profile
+     * board must call this function!
+     */
     qemu_register_reset(armv7m_reset, cpu);
-    return nvic;
 }
 
 static Property bitband_properties[] = {
@@ -237,6 +356,7 @@ static void bitband_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
+    dc->realize = bitband_realize;
     dc->props = bitband_properties;
 }
 
@@ -251,6 +371,7 @@ static const TypeInfo bitband_info = {
 static void armv7m_register_types(void)
 {
     type_register_static(&bitband_info);
+    type_register_static(&armv7m_info);
 }
 
 type_init(armv7m_register_types)
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index 2e641a3989..369ef1e3bd 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -86,11 +86,21 @@ static void bcm2835_peripherals_init(Object *obj)
     object_property_add_const_link(OBJECT(&s->property), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
 
+    /* Random Number Generator */
+    object_initialize(&s->rng, sizeof(s->rng), TYPE_BCM2835_RNG);
+    object_property_add_child(obj, "rng", OBJECT(&s->rng), NULL);
+    qdev_set_parent_bus(DEVICE(&s->rng), sysbus_get_default());
+
     /* Extended Mass Media Controller */
     object_initialize(&s->sdhci, sizeof(s->sdhci), TYPE_SYSBUS_SDHCI);
     object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL);
     qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default());
 
+    /* SDHOST */
+    object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST);
+    object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL);
+    qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default());
+
     /* DMA Channels */
     object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA);
     object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL);
@@ -98,6 +108,16 @@ static void bcm2835_peripherals_init(Object *obj)
 
     object_property_add_const_link(OBJECT(&s->dma), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
+
+    /* GPIO */
+    object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO);
+    object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL);
+    qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default());
+
+    object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci",
+                                   OBJECT(&s->sdhci.sdbus), &error_abort);
+    object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost",
+                                   OBJECT(&s->sdhost.sdbus), &error_abort);
 }
 
 static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
@@ -226,6 +246,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->property), 0,
                       qdev_get_gpio_in(DEVICE(&s->mboxes), MBOX_CHAN_PROPERTY));
 
+    /* Random Number Generator */
+    object_property_set_bool(OBJECT(&s->rng), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    memory_region_add_subregion(&s->peri_mr, RNG_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->rng), 0));
+
     /* Extended Mass Media Controller */
     object_property_set_int(OBJECT(&s->sdhci), BCM2835_SDHC_CAPAREG, "capareg",
                             &err);
@@ -252,13 +282,20 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0,
         qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
                                INTERRUPT_ARASANSDIO));
-    object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->sdhci), "sd-bus",
-                              &err);
+
+    /* SDHOST */
+    object_property_set_bool(OBJECT(&s->sdhost), true, "realized", &err);
     if (err) {
         error_propagate(errp, err);
         return;
     }
 
+    memory_region_add_subregion(&s->peri_mr, MMCI0_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->sdhost), 0));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhost), 0,
+        qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
+                               INTERRUPT_SDIO));
+
     /* DMA Channels */
     object_property_set_bool(OBJECT(&s->dma), true, "realized", &err);
     if (err) {
@@ -277,6 +314,23 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
                                                   BCM2835_IC_GPU_IRQ,
                                                   INTERRUPT_DMA0 + n));
     }
+
+    /* GPIO */
+    object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    memory_region_add_subregion(&s->peri_mr, GPIO_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gpio), 0));
+
+    object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus",
+                              &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 }
 
 static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index be3c96d21e..1d2b50cc4e 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/log.h"
 #include "cpu.h"
 #include "hw/boards.h"
 #include "sysemu/sysemu.h"
@@ -74,6 +75,9 @@
 /* PMU SFR base address */
 #define EXYNOS4210_PMU_BASE_ADDR            0x10020000
 
+/* Clock controller SFR base address */
+#define EXYNOS4210_CLK_BASE_ADDR            0x10030000
+
 /* Display controllers (FIMD) */
 #define EXYNOS4210_FIMD0_BASE_ADDR          0x11C00000
 
@@ -138,6 +142,16 @@ void exynos4210_write_secondary(ARMCPU *cpu,
                        info->smp_loader_start);
 }
 
+static uint64_t exynos4210_calc_affinity(int cpu)
+{
+    uint64_t mp_affinity;
+
+    /* Exynos4210 has 0x9 as cluster ID */
+    mp_affinity = (0x9 << ARM_AFF1_SHIFT) | cpu;
+
+    return mp_affinity;
+}
+
 Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
         unsigned long ram_size)
 {
@@ -163,6 +177,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
         }
 
         s->cpu[n] = ARM_CPU(cpuobj);
+        object_property_set_int(cpuobj, exynos4210_calc_affinity(n),
+                                "mp-affinity", &error_abort);
         object_property_set_int(cpuobj, EXYNOS4210_SMP_PRIVATE_BASE_ADDR,
                                 "reset-cbar", &error_abort);
         object_property_set_bool(cpuobj, true, "realized", &error_fatal);
@@ -297,6 +313,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
     */
     sysbus_create_simple("exynos4210.pmu", EXYNOS4210_PMU_BASE_ADDR, NULL);
 
+    sysbus_create_simple("exynos4210.clk", EXYNOS4210_CLK_BASE_ADDR, NULL);
+
     /* PWM */
     sysbus_create_varargs("exynos4210.pwm", EXYNOS4210_PWM_BASE_ADDR,
                           s->irq_table[exynos4210_get_irq(22, 0)],
diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c
index 23d792837f..3cfe332dd1 100644
--- a/hw/arm/netduino2.c
+++ b/hw/arm/netduino2.c
@@ -27,17 +27,18 @@
 #include "hw/boards.h"
 #include "qemu/error-report.h"
 #include "hw/arm/stm32f205_soc.h"
+#include "hw/arm/arm.h"
 
 static void netduino2_init(MachineState *machine)
 {
     DeviceState *dev;
 
     dev = qdev_create(NULL, TYPE_STM32F205_SOC);
-    if (machine->kernel_filename) {
-        qdev_prop_set_string(dev, "kernel-filename", machine->kernel_filename);
-    }
     qdev_prop_set_string(dev, "cpu-model", "cortex-m3");
     object_property_set_bool(OBJECT(dev), true, "realized", &error_fatal);
+
+    armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
+                       FLASH_SIZE);
 }
 
 static void netduino2_machine_init(MachineClass *mc)
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 38425bda6c..6e1260d2ed 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -49,6 +49,9 @@ static void stm32f205_soc_initfn(Object *obj)
     STM32F205State *s = STM32F205_SOC(obj);
     int i;
 
+    object_initialize(&s->armv7m, sizeof(s->armv7m), TYPE_ARMV7M);
+    qdev_set_parent_bus(DEVICE(&s->armv7m), sysbus_get_default());
+
     object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F2XX_SYSCFG);
     qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default());
 
@@ -82,7 +85,7 @@ static void stm32f205_soc_initfn(Object *obj)
 static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
 {
     STM32F205State *s = STM32F205_SOC(dev_soc);
-    DeviceState *dev, *nvic;
+    DeviceState *dev, *armv7m;
     SysBusDevice *busdev;
     Error *err = NULL;
     int i;
@@ -110,8 +113,16 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     vmstate_register_ram_global(sram);
     memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
 
-    nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
-                       s->kernel_filename, s->cpu_model);
+    armv7m = DEVICE(&s->armv7m);
+    qdev_prop_set_uint32(armv7m, "num-irq", 96);
+    qdev_prop_set_string(armv7m, "cpu-model", s->cpu_model);
+    object_property_set_link(OBJECT(&s->armv7m), OBJECT(get_system_memory()),
+                                     "memory", &error_abort);
+    object_property_set_bool(OBJECT(&s->armv7m), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
 
     /* System configuration controller */
     dev = DEVICE(&s->syscfg);
@@ -122,7 +133,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     }
     busdev = SYS_BUS_DEVICE(dev);
     sysbus_mmio_map(busdev, 0, 0x40013800);
-    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71));
+    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, 71));
 
     /* Attach UART (uses USART registers) and USART controllers */
     for (i = 0; i < STM_NUM_USARTS; i++) {
@@ -136,7 +147,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, usart_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i]));
     }
 
     /* Timer 2 to 5 */
@@ -150,7 +161,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, timer_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, timer_irq[i]));
     }
 
     /* ADC 1 to 3 */
@@ -162,7 +173,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         return;
     }
     qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0,
-                          qdev_get_gpio_in(nvic, ADC_IRQ));
+                          qdev_get_gpio_in(armv7m, ADC_IRQ));
 
     for (i = 0; i < STM_NUM_ADCS; i++) {
         dev = DEVICE(&(s->adc[i]));
@@ -187,12 +198,11 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, spi_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i]));
     }
 }
 
 static Property stm32f205_soc_properties[] = {
-    DEFINE_PROP_STRING("kernel-filename", STM32F205State, kernel_filename),
     DEFINE_PROP_STRING("cpu-model", STM32F205State, cpu_model),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f3440f2ccb..5f62a0321e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -535,7 +535,6 @@ static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
 static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 {
     /* We create a standalone GIC */
-    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     DeviceState *gicdev;
     SysBusDevice *gicbusdev;
     const char *gictype;
@@ -605,7 +604,7 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 
     fdt_add_gic_node(vms);
 
-    if (type == 3 && !vmc->no_its) {
+    if (type == 3 && vms->its) {
         create_its(vms, gicdev);
     } else if (type == 2) {
         create_v2m(vms, pic);
@@ -1378,6 +1377,7 @@ static void machvirt_init(MachineState *machine)
         }
 
         object_property_set_bool(cpuobj, true, "realized", NULL);
+        object_unref(cpuobj);
     }
     fdt_add_timer_nodes(vms);
     fdt_add_cpu_nodes(vms);
@@ -1480,6 +1480,20 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
     vms->highmem = value;
 }
 
+static bool virt_get_its(Object *obj, Error **errp)
+{
+    VirtMachineState *vms = VIRT_MACHINE(obj);
+
+    return vms->its;
+}
+
+static void virt_set_its(Object *obj, bool value, Error **errp)
+{
+    VirtMachineState *vms = VIRT_MACHINE(obj);
+
+    vms->its = value;
+}
+
 static char *virt_get_gic_version(Object *obj, Error **errp)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -1540,6 +1554,7 @@ type_init(machvirt_machine_init);
 static void virt_2_9_instance_init(Object *obj)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
 
     /* EL3 is disabled by default on virt: this makes us consistent
      * between KVM and TCG for this board, and it also allows us to
@@ -1579,6 +1594,19 @@ static void virt_2_9_instance_init(Object *obj)
                                     "Set GIC version. "
                                     "Valid values are 2, 3 and host", NULL);
 
+    if (vmc->no_its) {
+        vms->its = false;
+    } else {
+        /* Default allows ITS instantiation */
+        vms->its = true;
+        object_property_add_bool(obj, "its", virt_get_its,
+                                 virt_set_its, NULL);
+        object_property_set_description(obj, "its",
+                                        "Set on/off to enable/disable "
+                                        "ITS instantiation",
+                                        NULL);
+    }
+
     vms->memmap = a15memmap;
     vms->irqmap = a15irqmap;
 }
diff --git a/hw/block/block.c b/hw/block/block.c
index 8dc9d84a39..27878d0087 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf)
     }
 }
 
-void blkconf_apply_backend_options(BlockConf *conf)
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+                                   bool resizable, Error **errp)
 {
     BlockBackend *blk = conf->blk;
     BlockdevOnError rerror, werror;
+    uint64_t perm, shared_perm;
     bool wce;
+    int ret;
+
+    perm = BLK_PERM_CONSISTENT_READ;
+    if (!readonly) {
+        perm |= BLK_PERM_WRITE;
+    }
+
+    shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                  BLK_PERM_GRAPH_MOD;
+    if (resizable) {
+        shared_perm |= BLK_PERM_RESIZE;
+    }
+    if (conf->share_rw) {
+        shared_perm |= BLK_PERM_WRITE;
+    }
+
+    ret = blk_set_perm(blk, perm, shared_perm, errp);
+    if (ret < 0) {
+        return;
+    }
 
     switch (conf->wce) {
     case ON_OFF_AUTO_ON:    wce = true; break;
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 17d29e7bc5..a328693d15 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -186,6 +186,7 @@ typedef enum FDiskFlags {
 struct FDrive {
     FDCtrl *fdctrl;
     BlockBackend *blk;
+    BlockConf *conf;
     /* Drive status */
     FloppyDriveType drive;    /* CMOS drive type        */
     uint8_t perpendicular;    /* 2.88 MB access mode    */
@@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv)
     }
 }
 
-static void fd_change_cb(void *opaque, bool load)
+static void fd_change_cb(void *opaque, bool load, Error **errp)
 {
     FDrive *drive = opaque;
+    Error *local_err = NULL;
+
+    if (!load) {
+        blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort);
+    } else {
+        blkconf_apply_backend_options(drive->conf,
+                                      blk_is_read_only(drive->blk), false,
+                                      &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
 
     drive->media_changed = 1;
     drive->media_validated = false;
@@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev)
     FloppyDrive *dev = FLOPPY_DRIVE(qdev);
     FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
     FDrive *drive;
+    Error *local_err = NULL;
     int ret;
 
     if (dev->unit == -1) {
@@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev)
 
     if (!dev->conf.blk) {
         /* Anonymous BlockBackend for an empty drive */
-        dev->conf.blk = blk_new();
+        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
         ret = blk_attach_dev(dev->conf.blk, qdev);
         assert(ret == 0);
     }
@@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev)
      * blkconf_apply_backend_options(). */
     dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
     dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
-    blkconf_apply_backend_options(&dev->conf);
+
+    blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk),
+                                  false, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
 
     /* 'enospc' is the default for -drive, 'report' is what blk_new() gives us
      * for empty drives. */
@@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev)
         return -1;
     }
 
+    drive->conf = &dev->conf;
     drive->blk = dev->conf.blk;
     drive->fdctrl = bus->fdc;
 
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 2d6eb46a04..190573cefa 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
 {
     Flash *s = M25P80(ss);
     M25P80Class *mc = M25P80_GET_CLASS(s);
+    int ret;
 
     s->pi = mc->pi;
 
@@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
     s->dirty_page = -1;
 
     if (s->blk) {
+        uint64_t perm = BLK_PERM_CONSISTENT_READ |
+                        (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+
         DB_PRINT_L(0, "Binding to IF_MTD drive\n");
         s->storage = blk_blockalign(s->blk, s->size);
 
diff --git a/hw/block/nand.c b/hw/block/nand.c
index c69e6755d9..0d33ac281f 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp)
 {
     int pagesize;
     NANDFlashState *s = NAND(dev);
+    int ret;
+
 
     s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
     s->size = nand_flash_ids[s->chip_id].size << 20;
@@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp)
             error_setg(errp, "Can't use a read-only drive");
             return;
         }
+        ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
         if (blk_getlength(s->blk) >=
                 (s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
             pagesize = 0;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ae91a18f17..ae303d44e5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev)
     int i;
     int64_t bs_size;
     uint8_t *pci_conf;
+    Error *local_err = NULL;
 
     if (!n->conf.blk) {
         return -1;
@@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev)
         return -1;
     }
     blkconf_blocksizes(&n->conf);
-    blkconf_apply_backend_options(&n->conf);
+    blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
+                                  false, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
 
     pci_conf = pci_dev->config;
     pci_conf[PCI_INTERRUPT_PIN] = 1;
diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index 8d8422739e..ddf5492426 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd)
     OneNANDState *s = ONE_NAND(dev);
     uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7));
     void *ram;
+    Error *local_err = NULL;
 
     s->base = (hwaddr)-1;
     s->rdy = NULL;
@@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd)
             error_report("Can't use a read-only drive");
             return -1;
         }
+        blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                     BLK_PERM_ALL, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
         s->blk_cur = s->blk;
     }
     s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT),
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 71b98a3eef..594d4cf6fe 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -758,6 +758,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
 
     if (pfl->blk) {
+        uint64_t perm;
+        pfl->ro = blk_is_read_only(pfl->blk);
+        perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+    } else {
+        pfl->ro = 0;
+    }
+
+    if (pfl->blk) {
         /* read the initial flash content */
         ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
 
@@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    if (pfl->blk) {
-        pfl->ro = blk_is_read_only(pfl->blk);
-    } else {
-        pfl->ro = 0;
-    }
-
     /* Default to devices being used at their maximum device width. This was
      * assumed before the device_width support was added.
      */
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index ef71322759..e6c5c6c25d 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl));
     pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem);
     pfl->chip_len = chip_len;
+
+    if (pfl->blk) {
+        uint64_t perm;
+        pfl->ro = blk_is_read_only(pfl->blk);
+        perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+    } else {
+        pfl->ro = 0;
+    }
+
     if (pfl->blk) {
         /* read the initial flash content */
         ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);
@@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     pfl->rom_mode = 1;
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
 
-    if (pfl->blk) {
-        pfl->ro = blk_is_read_only(pfl->blk);
-    } else {
-        pfl->ro = 0;
-    }
-
     pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl);
     pfl->wcycle = 0;
     pfl->cmd = 0;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 843bd2fa73..98c16a7a9a 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     blkconf_serial(&conf->conf, &conf->serial);
-    blkconf_apply_backend_options(&conf->conf);
+    blkconf_apply_backend_options(&conf->conf,
+                                  blk_is_read_only(conf->conf.blk), true,
+                                  &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
     s->original_wce = blk_enable_write_cache(conf->conf.blk);
     blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err);
     if (err) {
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index 7f8c9dc659..91450b2eab 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -13,6 +13,7 @@ common-obj-$(CONFIG_PTIMER) += ptimer.o
 common-obj-$(CONFIG_SOFTMMU) += sysbus.o
 common-obj-$(CONFIG_SOFTMMU) += machine.o
 common-obj-$(CONFIG_SOFTMMU) += loader.o
+common-obj-$(CONFIG_FITLOADER) += loader-fit.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
 common-obj-$(CONFIG_SOFTMMU) += register.o
 common-obj-$(CONFIG_SOFTMMU) += or-irq.o
diff --git a/hw/core/bus.c b/hw/core/bus.c
index cf383fc1af..4651f24486 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -197,7 +197,7 @@ static void qbus_initfn(Object *obj)
                              TYPE_HOTPLUG_HANDLER,
                              (Object **)&bus->hotplug_handler,
                              object_property_allow_set_link,
-                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             0,
                              NULL);
     object_property_add_bool(obj, "realized",
                              bus_get_realized, bus_set_realized, NULL);
diff --git a/hw/core/irq.c b/hw/core/irq.c
index 49ff2e64fe..b98d1d69f5 100644
--- a/hw/core/irq.c
+++ b/hw/core/irq.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "qemu-common.h"
 #include "hw/irq.h"
 #include "qom/object.h"
diff --git a/hw/core/loader-fit.c b/hw/core/loader-fit.c
new file mode 100644
index 0000000000..0c4a7207f4
--- /dev/null
+++ b/hw/core/loader-fit.c
@@ -0,0 +1,325 @@
+/*
+ * Flattened Image Tree loader.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "hw/loader.h"
+#include "hw/loader-fit.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/sysemu.h"
+
+#include <libfdt.h>
+#include <zlib.h>
+
+#define FIT_LOADER_MAX_PATH (128)
+
+static const void *fit_load_image_alloc(const void *itb, const char *name,
+                                        int *poff, size_t *psz)
+{
+    const void *data;
+    const char *comp;
+    void *uncomp_data;
+    char path[FIT_LOADER_MAX_PATH];
+    int off, sz;
+    ssize_t uncomp_len;
+
+    snprintf(path, sizeof(path), "/images/%s", name);
+
+    off = fdt_path_offset(itb, path);
+    if (off < 0) {
+        return NULL;
+    }
+    if (poff) {
+        *poff = off;
+    }
+
+    data = fdt_getprop(itb, off, "data", &sz);
+    if (!data) {
+        return NULL;
+    }
+
+    comp = fdt_getprop(itb, off, "compression", NULL);
+    if (!comp || !strcmp(comp, "none")) {
+        if (psz) {
+            *psz = sz;
+        }
+        uncomp_data = g_malloc(sz);
+        memmove(uncomp_data, data, sz);
+        return uncomp_data;
+    }
+
+    if (!strcmp(comp, "gzip")) {
+        uncomp_len = UBOOT_MAX_GUNZIP_BYTES;
+        uncomp_data = g_malloc(uncomp_len);
+
+        uncomp_len = gunzip(uncomp_data, uncomp_len, (void *) data, sz);
+        if (uncomp_len < 0) {
+            error_printf("unable to decompress %s image\n", name);
+            g_free(uncomp_data);
+            return NULL;
+        }
+
+        data = g_realloc(uncomp_data, uncomp_len);
+        if (psz) {
+            *psz = uncomp_len;
+        }
+        return data;
+    }
+
+    error_printf("unknown compression '%s'\n", comp);
+    return NULL;
+}
+
+static int fit_image_addr(const void *itb, int img, const char *name,
+                          hwaddr *addr)
+{
+    const void *prop;
+    int len;
+
+    prop = fdt_getprop(itb, img, name, &len);
+    if (!prop) {
+        return -ENOENT;
+    }
+
+    switch (len) {
+    case 4:
+        *addr = fdt32_to_cpu(*(fdt32_t *)prop);
+        return 0;
+    case 8:
+        *addr = fdt64_to_cpu(*(fdt64_t *)prop);
+        return 0;
+    default:
+        error_printf("invalid %s address length %d\n", name, len);
+        return -EINVAL;
+    }
+}
+
+static int fit_load_kernel(const struct fit_loader *ldr, const void *itb,
+                           int cfg, void *opaque, hwaddr *pend)
+{
+    const char *name;
+    const void *data;
+    const void *load_data;
+    hwaddr load_addr, entry_addr;
+    int img_off, err;
+    size_t sz;
+    int ret;
+
+    name = fdt_getprop(itb, cfg, "kernel", NULL);
+    if (!name) {
+        error_printf("no kernel specified by FIT configuration\n");
+        return -EINVAL;
+    }
+
+    load_data = data = fit_load_image_alloc(itb, name, &img_off, &sz);
+    if (!data) {
+        error_printf("unable to load kernel image from FIT\n");
+        return -EINVAL;
+    }
+
+    err = fit_image_addr(itb, img_off, "load", &load_addr);
+    if (err) {
+        error_printf("unable to read kernel load address from FIT\n");
+        ret = err;
+        goto out;
+    }
+
+    err = fit_image_addr(itb, img_off, "entry", &entry_addr);
+    if (err) {
+        error_printf("unable to read kernel entry address from FIT\n");
+        ret = err;
+        goto out;
+    }
+
+    if (ldr->kernel_filter) {
+        load_data = ldr->kernel_filter(opaque, data, &load_addr, &entry_addr);
+    }
+
+    if (pend) {
+        *pend = load_addr + sz;
+    }
+
+    load_addr = ldr->addr_to_phys(opaque, load_addr);
+    rom_add_blob_fixed(name, load_data, sz, load_addr);
+
+    ret = 0;
+out:
+    g_free((void *) data);
+    if (data != load_data) {
+        g_free((void *) load_data);
+    }
+    return ret;
+}
+
+static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
+                        int cfg, void *opaque, const void *match_data,
+                        hwaddr kernel_end)
+{
+    const char *name;
+    const void *data;
+    const void *load_data;
+    hwaddr load_addr;
+    int img_off, err;
+    size_t sz;
+    int ret;
+
+    name = fdt_getprop(itb, cfg, "fdt", NULL);
+    if (!name) {
+        return 0;
+    }
+
+    load_data = data = fit_load_image_alloc(itb, name, &img_off, &sz);
+    if (!data) {
+        error_printf("unable to load FDT image from FIT\n");
+        return -EINVAL;
+    }
+
+    err = fit_image_addr(itb, img_off, "load", &load_addr);
+    if (err == -ENOENT) {
+        load_addr = ROUND_UP(kernel_end, 64 * K_BYTE) + (10 * M_BYTE);
+    } else if (err) {
+        ret = err;
+        goto out;
+    }
+
+    if (ldr->fdt_filter) {
+        load_data = ldr->fdt_filter(opaque, data, match_data, &load_addr);
+    }
+
+    load_addr = ldr->addr_to_phys(opaque, load_addr);
+    sz = fdt_totalsize(load_data);
+    rom_add_blob_fixed(name, load_data, sz, load_addr);
+
+    ret = 0;
+out:
+    g_free((void *) data);
+    if (data != load_data) {
+        g_free((void *) load_data);
+    }
+    return ret;
+}
+
+static bool fit_cfg_compatible(const void *itb, int cfg, const char *compat)
+{
+    const void *fdt;
+    const char *fdt_name;
+    bool ret;
+
+    fdt_name = fdt_getprop(itb, cfg, "fdt", NULL);
+    if (!fdt_name) {
+        return false;
+    }
+
+    fdt = fit_load_image_alloc(itb, fdt_name, NULL, NULL);
+    if (!fdt) {
+        return false;
+    }
+
+    if (fdt_check_header(fdt)) {
+        ret = false;
+        goto out;
+    }
+
+    if (fdt_node_check_compatible(fdt, 0, compat)) {
+        ret = false;
+        goto out;
+    }
+
+    ret = true;
+out:
+    g_free((void *) fdt);
+    return ret;
+}
+
+int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque)
+{
+    const struct fit_loader_match *match;
+    const void *itb, *match_data = NULL;
+    const char *def_cfg_name;
+    char path[FIT_LOADER_MAX_PATH];
+    int itb_size, configs, cfg_off, off, err;
+    hwaddr kernel_end;
+    int ret;
+
+    itb = load_device_tree(filename, &itb_size);
+    if (!itb) {
+        return -EINVAL;
+    }
+
+    configs = fdt_path_offset(itb, "/configurations");
+    if (configs < 0) {
+        ret = configs;
+        goto out;
+    }
+
+    cfg_off = -FDT_ERR_NOTFOUND;
+
+    if (ldr->matches) {
+        for (match = ldr->matches; match->compatible; match++) {
+            off = fdt_first_subnode(itb, configs);
+            while (off >= 0) {
+                if (fit_cfg_compatible(itb, off, match->compatible)) {
+                    cfg_off = off;
+                    match_data = match->data;
+                    break;
+                }
+
+                off = fdt_next_subnode(itb, off);
+            }
+
+            if (cfg_off >= 0) {
+                break;
+            }
+        }
+    }
+
+    if (cfg_off < 0) {
+        def_cfg_name = fdt_getprop(itb, configs, "default", NULL);
+        if (def_cfg_name) {
+            snprintf(path, sizeof(path), "/configurations/%s", def_cfg_name);
+            cfg_off = fdt_path_offset(itb, path);
+        }
+    }
+
+    if (cfg_off < 0) {
+        /* couldn't find a configuration to use */
+        ret = cfg_off;
+        goto out;
+    }
+
+    err = fit_load_kernel(ldr, itb, cfg_off, opaque, &kernel_end);
+    if (err) {
+        ret = err;
+        goto out;
+    }
+
+    err = fit_load_fdt(ldr, itb, cfg_off, opaque, match_data, kernel_end);
+    if (err) {
+        ret = err;
+        goto out;
+    }
+
+    ret = 0;
+out:
+    g_free((void *) itb);
+    return ret;
+}
diff --git a/hw/core/loader.c b/hw/core/loader.c
index ee5abd6eb7..bf17b42cbe 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -435,6 +435,19 @@ int load_elf_as(const char *filename,
                 uint64_t *highaddr, int big_endian, int elf_machine,
                 int clear_lsb, int data_swab, AddressSpace *as)
 {
+    return load_elf_ram(filename, translate_fn, translate_opaque,
+                        pentry, lowaddr, highaddr, big_endian, elf_machine,
+                        clear_lsb, data_swab, as, true);
+}
+
+/* return < 0 if error, otherwise the number of bytes loaded in memory */
+int load_elf_ram(const char *filename,
+                 uint64_t (*translate_fn)(void *, uint64_t),
+                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                 uint64_t *highaddr, int big_endian, int elf_machine,
+                 int clear_lsb, int data_swab, AddressSpace *as,
+                 bool load_rom)
+{
     int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
     uint8_t e_ident[EI_NIDENT];
 
@@ -473,11 +486,11 @@ int load_elf_as(const char *filename,
     if (e_ident[EI_CLASS] == ELFCLASS64) {
         ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab,
                          pentry, lowaddr, highaddr, elf_machine, clear_lsb,
-                         data_swab, as);
+                         data_swab, as, load_rom);
     } else {
         ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab,
                          pentry, lowaddr, highaddr, elf_machine, clear_lsb,
-                         data_swab, as);
+                         data_swab, as, load_rom);
     }
 
  fail:
@@ -527,12 +540,7 @@ static void zfree(void *x, void *addr)
 
 #define DEFLATED	8
 
-/* This is the usual maximum in uboot, so if a uImage overflows this, it would
- * overflow on real hardware too. */
-#define UBOOT_MAX_GUNZIP_BYTES (64 << 20)
-
-static ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src,
-                      size_t srclen)
+ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen)
 {
     z_stream s;
     ssize_t dstbytes;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index b0fd91f6cd..0699750336 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -357,6 +357,37 @@ static void machine_init_notify(Notifier *notifier, void *data)
     foreach_dynamic_sysbus_device(error_on_sysbus_device, NULL);
 }
 
+HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine)
+{
+    int i;
+    Object *cpu;
+    HotpluggableCPUList *head = NULL;
+    const char *cpu_type;
+
+    cpu = machine->possible_cpus->cpus[0].cpu;
+    assert(cpu); /* Boot cpu is always present */
+    cpu_type = object_get_typename(cpu);
+    for (i = 0; i < machine->possible_cpus->len; i++) {
+        HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
+        HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
+
+        cpu_item->type = g_strdup(cpu_type);
+        cpu_item->vcpus_count = machine->possible_cpus->cpus[i].vcpus_count;
+        cpu_item->props = g_memdup(&machine->possible_cpus->cpus[i].props,
+                                   sizeof(*cpu_item->props));
+
+        cpu = machine->possible_cpus->cpus[i].cpu;
+        if (cpu) {
+            cpu_item->has_qom_path = true;
+            cpu_item->qom_path = object_get_canonical_path(cpu);
+        }
+        list_item->value = cpu_item;
+        list_item->next = head;
+        head = list_item;
+    }
+    return head;
+}
+
 static void machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c
index 1ac090d1a4..1485d5b285 100644
--- a/hw/core/or-irq.c
+++ b/hw/core/or-irq.c
@@ -89,6 +89,9 @@ static void or_irq_class_init(ObjectClass *klass, void *data)
     dc->props = or_irq_properties;
     dc->realize = or_irq_realize;
     dc->vmsd = &vmstate_or_irq;
+
+    /* Reason: Needs to be wired up to work, e.g. see stm32f205_soc.c */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo or_irq_type_info = {
diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c
index 3af82afe78..59ccb00550 100644
--- a/hw/core/ptimer.c
+++ b/hw/core/ptimer.c
@@ -12,6 +12,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/replay.h"
 #include "sysemu/qtest.h"
+#include "block/aio.h"
 
 #define DELTA_ADJUST     1
 #define DELTA_NO_ADJUST -1
@@ -353,3 +354,10 @@ ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask)
     s->policy_mask = policy_mask;
     return s;
 }
+
+void ptimer_free(ptimer_state *s)
+{
+    qemu_bh_delete(s->bh);
+    timer_free(s->timer);
+    g_free(s);
+}
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 94f4d8bde4..c34be1c1ba 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr,
 {
     BlockBackend *blk;
     bool blk_created = false;
+    int ret;
 
     blk = blk_by_name(str);
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
         if (bs) {
-            blk = blk_new();
-            blk_insert_bs(blk, bs);
+            blk = blk_new(0, BLK_PERM_ALL);
             blk_created = true;
+
+            ret = blk_insert_bs(blk, bs, errp);
+            if (ret < 0) {
+                goto fail;
+            }
         }
     }
     if (!blk) {
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 06ba02e2a3..1e7fb33246 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -37,6 +37,7 @@
 #include "hw/boards.h"
 #include "hw/sysbus.h"
 #include "qapi-event.h"
+#include "migration/migration.h"
 
 int qdev_hotplug = 0;
 static bool qdev_hot_added = false;
@@ -102,9 +103,23 @@ static void bus_add_child(BusState *bus, DeviceState *child)
 
 void qdev_set_parent_bus(DeviceState *dev, BusState *bus)
 {
+    bool replugging = dev->parent_bus != NULL;
+
+    if (replugging) {
+        /* Keep a reference to the device while it's not plugged into
+         * any bus, to avoid it potentially evaporating when it is
+         * dereffed in bus_remove_child().
+         */
+        object_ref(OBJECT(dev));
+        bus_remove_child(dev->parent_bus, dev);
+        object_unref(OBJECT(dev->parent_bus));
+    }
     dev->parent_bus = bus;
     object_ref(OBJECT(bus));
     bus_add_child(bus, dev);
+    if (replugging) {
+        object_unref(OBJECT(dev));
+    }
 }
 
 /* Create a new device.  This only initializes the device state
@@ -889,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
     Error *local_err = NULL;
     bool unattached_parent = false;
     static int unattached_count;
+    int ret;
 
     if (dev->hotplugged && !dc->hotpluggable) {
         error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
@@ -896,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
     }
 
     if (value && !dev->realized) {
+        ret = check_migratable(obj, &local_err);
+        if (ret < 0) {
+            goto fail;
+        }
+
         if (!obj->parent) {
             gchar *name = g_strdup_printf("device[%d]", unattached_count++);
 
diff --git a/hw/core/register.c b/hw/core/register.c
index 4bfbc508de..dc335a79a9 100644
--- a/hw/core/register.c
+++ b/hw/core/register.c
@@ -59,6 +59,15 @@ static inline uint64_t register_read_val(RegisterInfo *reg)
     return 0; /* unreachable */
 }
 
+static inline uint64_t register_enabled_mask(int data_size, unsigned size)
+{
+    if (data_size < size) {
+        size = data_size;
+    }
+
+    return MAKE_64BIT_MASK(0, size * 8);
+}
+
 void register_write(RegisterInfo *reg, uint64_t val, uint64_t we,
                     const char *prefix, bool debug)
 {
@@ -192,11 +201,7 @@ void register_write_memory(void *opaque, hwaddr addr,
     }
 
     /* Generate appropriate write enable mask */
-    if (reg->data_size < size) {
-        we = MAKE_64BIT_MASK(0, reg->data_size * 8);
-    } else {
-        we = MAKE_64BIT_MASK(0, size * 8);
-    }
+    we = register_enabled_mask(reg->data_size, size);
 
     register_write(reg, value, we, reg_array->prefix,
                    reg_array->debug);
@@ -208,6 +213,7 @@ uint64_t register_read_memory(void *opaque, hwaddr addr,
     RegisterInfoArray *reg_array = opaque;
     RegisterInfo *reg = NULL;
     uint64_t read_val;
+    uint64_t re;
     int i;
 
     for (i = 0; i < reg_array->num_elements; i++) {
@@ -223,7 +229,10 @@ uint64_t register_read_memory(void *opaque, hwaddr addr,
         return 0;
     }
 
-    read_val = register_read(reg, size * 8, reg_array->prefix,
+    /* Generate appropriate read enable mask */
+    re = register_enabled_mask(reg->data_size, size);
+
+    read_val = register_read(reg, re, reg_array->prefix,
                              reg_array->debug);
 
     return extract64(read_val, 0, size * 8);
@@ -274,9 +283,18 @@ void register_finalize_block(RegisterInfoArray *r_array)
     g_free(r_array);
 }
 
+static void register_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    /* Reason: needs to be wired up to work */
+    dc->cannot_instantiate_with_device_add_yet = true;
+}
+
 static const TypeInfo register_info = {
     .name  = TYPE_REGISTER,
     .parent = TYPE_DEVICE,
+    .class_init = register_class_init,
 };
 
 static void register_register_types(void)
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 1deb52070a..b9e7cb1df1 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -900,6 +900,10 @@ static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 {
     int w;
 
+    if (blit_is_unsafe(s, true)) {
+        return 0;
+    }
+
     s->cirrus_blt_mode &= ~CIRRUS_BLTMODE_MEMSYSSRC;
     s->cirrus_srcptr = &s->cirrus_bltbuf[0];
     s->cirrus_srcptr_end = &s->cirrus_bltbuf[0];
@@ -925,6 +929,10 @@ static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 	}
         s->cirrus_srccounter = s->cirrus_blt_srcpitch * s->cirrus_blt_height;
     }
+
+    /* the blit_is_unsafe call above should catch this */
+    assert(s->cirrus_blt_srcpitch <= CIRRUS_BLTBUFSIZE);
+
     s->cirrus_srcptr = s->cirrus_bltbuf;
     s->cirrus_srcptr_end = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
     cirrus_update_memory_access(s);
diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c
index 7528665510..59120ddb67 100644
--- a/hw/display/milkymist-tmu2.c
+++ b/hw/display/milkymist-tmu2.c
@@ -293,7 +293,7 @@ static void tmu2_start(MilkymistTMU2State *s)
     cpu_physical_memory_unmap(mesh, mesh_len, 0, mesh_len);
 
     /* Write back the OpenGL framebuffer to the QEMU framebuffer */
-    fb_len = 2 * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
+    fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
     fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, 1);
     if (fb == NULL) {
         glDeleteTextures(1, &texture);
diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
index ecb09d17a1..f49b7fe8cd 100644
--- a/hw/display/virtio-gpu-3d.c
+++ b/hw/display/virtio-gpu-3d.c
@@ -177,16 +177,15 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
         qemu_console_resize(g->scanout[ss.scanout_id].con,
                             ss.r.width, ss.r.height);
         virgl_renderer_force_ctx_0();
-        dpy_gl_scanout(g->scanout[ss.scanout_id].con, info.tex_id,
-                       info.flags & 1 /* FIXME: Y_0_TOP */,
-                       info.width, info.height,
-                       ss.r.x, ss.r.y, ss.r.width, ss.r.height);
+        dpy_gl_scanout_texture(g->scanout[ss.scanout_id].con, info.tex_id,
+                               info.flags & 1 /* FIXME: Y_0_TOP */,
+                               info.width, info.height,
+                               ss.r.x, ss.r.y, ss.r.width, ss.r.height);
     } else {
         if (ss.scanout_id != 0) {
             dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, NULL);
         }
-        dpy_gl_scanout(g->scanout[ss.scanout_id].con, 0, false,
-                       0, 0, 0, 0, 0, 0);
+        dpy_gl_scanout_disable(g->scanout[ss.scanout_id].con);
     }
     g->scanout[ss.scanout_id].resource_id = ss.resource_id;
 }
@@ -597,7 +596,7 @@ void virtio_gpu_virgl_reset(VirtIOGPU *g)
         if (i != 0) {
             dpy_gfx_replace_surface(g->scanout[i].con, NULL);
         }
-        dpy_gl_scanout(g->scanout[i].con, 0, false, 0, 0, 0, 0, 0, 0);
+        dpy_gl_scanout_disable(g->scanout[i].con);
     }
 }
 
diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs
index a43c7cf442..fa0a72e6d0 100644
--- a/hw/gpio/Makefile.objs
+++ b/hw/gpio/Makefile.objs
@@ -7,3 +7,4 @@ common-obj-$(CONFIG_GPIO_KEY) += gpio_key.o
 
 obj-$(CONFIG_OMAP) += omap_gpio.o
 obj-$(CONFIG_IMX) += imx_gpio.o
+obj-$(CONFIG_RASPI) += bcm2835_gpio.o
diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c
new file mode 100644
index 0000000000..acc2e3cf9e
--- /dev/null
+++ b/hw/gpio/bcm2835_gpio.c
@@ -0,0 +1,353 @@
+/*
+ * Raspberry Pi (BCM2835) GPIO Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+#include "hw/gpio/bcm2835_gpio.h"
+
+#define GPFSEL0   0x00
+#define GPFSEL1   0x04
+#define GPFSEL2   0x08
+#define GPFSEL3   0x0C
+#define GPFSEL4   0x10
+#define GPFSEL5   0x14
+#define GPSET0    0x1C
+#define GPSET1    0x20
+#define GPCLR0    0x28
+#define GPCLR1    0x2C
+#define GPLEV0    0x34
+#define GPLEV1    0x38
+#define GPEDS0    0x40
+#define GPEDS1    0x44
+#define GPREN0    0x4C
+#define GPREN1    0x50
+#define GPFEN0    0x58
+#define GPFEN1    0x5C
+#define GPHEN0    0x64
+#define GPHEN1    0x68
+#define GPLEN0    0x70
+#define GPLEN1    0x74
+#define GPAREN0   0x7C
+#define GPAREN1   0x80
+#define GPAFEN0   0x88
+#define GPAFEN1   0x8C
+#define GPPUD     0x94
+#define GPPUDCLK0 0x98
+#define GPPUDCLK1 0x9C
+
+static uint32_t gpfsel_get(BCM2835GpioState *s, uint8_t reg)
+{
+    int i;
+    uint32_t value = 0;
+    for (i = 0; i < 10; i++) {
+        uint32_t index = 10 * reg + i;
+        if (index < sizeof(s->fsel)) {
+            value |= (s->fsel[index] & 0x7) << (3 * i);
+        }
+    }
+    return value;
+}
+
+static void gpfsel_set(BCM2835GpioState *s, uint8_t reg, uint32_t value)
+{
+    int i;
+    for (i = 0; i < 10; i++) {
+        uint32_t index = 10 * reg + i;
+        if (index < sizeof(s->fsel)) {
+            int fsel = (value >> (3 * i)) & 0x7;
+            s->fsel[index] = fsel;
+        }
+    }
+
+    /* SD controller selection (48-53) */
+    if (s->sd_fsel != 0
+            && (s->fsel[48] == 0) /* SD_CLK_R */
+            && (s->fsel[49] == 0) /* SD_CMD_R */
+            && (s->fsel[50] == 0) /* SD_DATA0_R */
+            && (s->fsel[51] == 0) /* SD_DATA1_R */
+            && (s->fsel[52] == 0) /* SD_DATA2_R */
+            && (s->fsel[53] == 0) /* SD_DATA3_R */
+            ) {
+        /* SDHCI controller selected */
+        sdbus_reparent_card(s->sdbus_sdhost, s->sdbus_sdhci);
+        s->sd_fsel = 0;
+    } else if (s->sd_fsel != 4
+            && (s->fsel[48] == 4) /* SD_CLK_R */
+            && (s->fsel[49] == 4) /* SD_CMD_R */
+            && (s->fsel[50] == 4) /* SD_DATA0_R */
+            && (s->fsel[51] == 4) /* SD_DATA1_R */
+            && (s->fsel[52] == 4) /* SD_DATA2_R */
+            && (s->fsel[53] == 4) /* SD_DATA3_R */
+            ) {
+        /* SDHost controller selected */
+        sdbus_reparent_card(s->sdbus_sdhci, s->sdbus_sdhost);
+        s->sd_fsel = 4;
+    }
+}
+
+static int gpfsel_is_out(BCM2835GpioState *s, int index)
+{
+    if (index >= 0 && index < 54) {
+        return s->fsel[index] == 1;
+    }
+    return 0;
+}
+
+static void gpset(BCM2835GpioState *s,
+        uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
+{
+    uint32_t changes = val & ~*lev;
+    uint32_t cur = 1;
+
+    int i;
+    for (i = 0; i < count; i++) {
+        if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
+            qemu_set_irq(s->out[start + i], 1);
+        }
+        cur <<= 1;
+    }
+
+    *lev |= val;
+}
+
+static void gpclr(BCM2835GpioState *s,
+        uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
+{
+    uint32_t changes = val & *lev;
+    uint32_t cur = 1;
+
+    int i;
+    for (i = 0; i < count; i++) {
+        if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
+            qemu_set_irq(s->out[start + i], 0);
+        }
+        cur <<= 1;
+    }
+
+    *lev &= ~val;
+}
+
+static uint64_t bcm2835_gpio_read(void *opaque, hwaddr offset,
+        unsigned size)
+{
+    BCM2835GpioState *s = (BCM2835GpioState *)opaque;
+
+    switch (offset) {
+    case GPFSEL0:
+    case GPFSEL1:
+    case GPFSEL2:
+    case GPFSEL3:
+    case GPFSEL4:
+    case GPFSEL5:
+        return gpfsel_get(s, offset / 4);
+    case GPSET0:
+    case GPSET1:
+        /* Write Only */
+        return 0;
+    case GPCLR0:
+    case GPCLR1:
+        /* Write Only */
+        return 0;
+    case GPLEV0:
+        return s->lev0;
+    case GPLEV1:
+        return s->lev1;
+    case GPEDS0:
+    case GPEDS1:
+    case GPREN0:
+    case GPREN1:
+    case GPFEN0:
+    case GPFEN1:
+    case GPHEN0:
+    case GPHEN1:
+    case GPLEN0:
+    case GPLEN1:
+    case GPAREN0:
+    case GPAREN1:
+    case GPAFEN0:
+    case GPAFEN1:
+    case GPPUD:
+    case GPPUDCLK0:
+    case GPPUDCLK1:
+        /* Not implemented */
+        return 0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                __func__, offset);
+        break;
+    }
+
+    return 0;
+}
+
+static void bcm2835_gpio_write(void *opaque, hwaddr offset,
+        uint64_t value, unsigned size)
+{
+    BCM2835GpioState *s = (BCM2835GpioState *)opaque;
+
+    switch (offset) {
+    case GPFSEL0:
+    case GPFSEL1:
+    case GPFSEL2:
+    case GPFSEL3:
+    case GPFSEL4:
+    case GPFSEL5:
+        gpfsel_set(s, offset / 4, value);
+        break;
+    case GPSET0:
+        gpset(s, value, 0, 32, &s->lev0);
+        break;
+    case GPSET1:
+        gpset(s, value, 32, 22, &s->lev1);
+        break;
+    case GPCLR0:
+        gpclr(s, value, 0, 32, &s->lev0);
+        break;
+    case GPCLR1:
+        gpclr(s, value, 32, 22, &s->lev1);
+        break;
+    case GPLEV0:
+    case GPLEV1:
+        /* Read Only */
+        break;
+    case GPEDS0:
+    case GPEDS1:
+    case GPREN0:
+    case GPREN1:
+    case GPFEN0:
+    case GPFEN1:
+    case GPHEN0:
+    case GPHEN1:
+    case GPLEN0:
+    case GPLEN1:
+    case GPAREN0:
+    case GPAREN1:
+    case GPAFEN0:
+    case GPAFEN1:
+    case GPPUD:
+    case GPPUDCLK0:
+    case GPPUDCLK1:
+        /* Not implemented */
+        break;
+    default:
+        goto err_out;
+    }
+    return;
+
+err_out:
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+            __func__, offset);
+}
+
+static void bcm2835_gpio_reset(DeviceState *dev)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(dev);
+
+    int i;
+    for (i = 0; i < 6; i++) {
+        gpfsel_set(s, i, 0);
+    }
+
+    s->sd_fsel = 0;
+
+    /* SDHCI is selected by default */
+    sdbus_reparent_card(&s->sdbus, s->sdbus_sdhci);
+
+    s->lev0 = 0;
+    s->lev1 = 0;
+}
+
+static const MemoryRegionOps bcm2835_gpio_ops = {
+    .read = bcm2835_gpio_read,
+    .write = bcm2835_gpio_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_gpio = {
+    .name = "bcm2835_gpio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(fsel, BCM2835GpioState, 54),
+        VMSTATE_UINT32(lev0, BCM2835GpioState),
+        VMSTATE_UINT32(lev1, BCM2835GpioState),
+        VMSTATE_UINT8(sd_fsel, BCM2835GpioState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_gpio_init(Object *obj)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(obj);
+    DeviceState *dev = DEVICE(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
+                        TYPE_SD_BUS, DEVICE(s), "sd-bus");
+
+    memory_region_init_io(&s->iomem, obj,
+            &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+    qdev_init_gpio_out(dev, s->out, 54);
+}
+
+static void bcm2835_gpio_realize(DeviceState *dev, Error **errp)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "sdbus-sdhci", &err);
+    if (obj == NULL) {
+        error_setg(errp, "%s: required sdhci link not found: %s",
+                __func__, error_get_pretty(err));
+        return;
+    }
+    s->sdbus_sdhci = SD_BUS(obj);
+
+    obj = object_property_get_link(OBJECT(dev), "sdbus-sdhost", &err);
+    if (obj == NULL) {
+        error_setg(errp, "%s: required sdhost link not found: %s",
+                __func__, error_get_pretty(err));
+        return;
+    }
+    s->sdbus_sdhost = SD_BUS(obj);
+}
+
+static void bcm2835_gpio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_bcm2835_gpio;
+    dc->realize = &bcm2835_gpio_realize;
+    dc->reset = &bcm2835_gpio_reset;
+}
+
+static const TypeInfo bcm2835_gpio_info = {
+    .name          = TYPE_BCM2835_GPIO,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835GpioState),
+    .instance_init = bcm2835_gpio_init,
+    .class_init    = bcm2835_gpio_class_init,
+};
+
+static void bcm2835_gpio_register_types(void)
+{
+    type_register_static(&bcm2835_gpio_info);
+}
+
+type_init(bcm2835_gpio_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 1c928abb28..2073108577 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -42,6 +42,7 @@
 #include "hw/acpi/memory_hotplug.h"
 #include "sysemu/tpm.h"
 #include "hw/acpi/tpm.h"
+#include "hw/acpi/vmgenid.h"
 #include "sysemu/tpm_backend.h"
 #include "hw/timer/mc146818rtc_regs.h"
 #include "sysemu/numa.h"
@@ -462,7 +463,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 
         *bus_bsel = (*bsel_alloc)++;
         object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL,
-                                       bus_bsel, NULL);
+                                       bus_bsel, &error_abort);
     }
 
     return bsel_alloc;
@@ -471,7 +472,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 static void acpi_set_pci_info(void)
 {
     PCIBus *bus = find_i440fx(); /* TODO: Q35 support */
-    unsigned bsel_alloc = 0;
+    unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT;
 
     if (bus) {
         /* Scan all PCI buses. Set property to enable acpi based hotplug. */
@@ -1803,7 +1804,7 @@ static Aml *build_q35_osc_method(void)
     Aml *else_ctx;
     Aml *method;
     Aml *a_cwd1 = aml_name("CDW1");
-    Aml *a_ctrl = aml_name("CTRL");
+    Aml *a_ctrl = aml_local(0);
 
     method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
     aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
@@ -1813,7 +1814,6 @@ static Aml *build_q35_osc_method(void)
     aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2"));
     aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3"));
 
-    aml_append(if_ctx, aml_store(aml_name("CDW2"), aml_name("SUPP")));
     aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl));
 
     /*
@@ -1898,8 +1898,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
         aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
         aml_append(dev, aml_name_decl("_UID", aml_int(1)));
-        aml_append(dev, aml_name_decl("SUPP", aml_int(0)));
-        aml_append(dev, aml_name_decl("CTRL", aml_int(0)));
         aml_append(dev, build_q35_osc_method());
         aml_append(sb_scope, dev);
         aml_append(dsdt, sb_scope);
@@ -1964,6 +1962,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
             aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
             aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
             aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
+            if (pci_bus_is_express(bus)) {
+                aml_append(dev, build_q35_osc_method());
+            }
 
             if (numa_node != NUMA_NODE_UNASSIGNED) {
                 aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node)));
@@ -2610,6 +2611,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     size_t aml_len = 0;
     GArray *tables_blob = tables->table_data;
     AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
+    Object *vmgenid_dev;
 
     acpi_get_pm_info(&pm);
     acpi_get_misc_info(&misc);
@@ -2653,6 +2655,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     acpi_add_table(table_offsets, tables_blob);
     build_madt(tables_blob, tables->linker, pcms);
 
+    vmgenid_dev = find_vmgenid_dev();
+    if (vmgenid_dev) {
+        acpi_add_table(table_offsets, tables_blob);
+        vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob,
+                           tables->vmgenid, tables->linker);
+    }
+
     if (misc.has_hpet) {
         acpi_add_table(table_offsets, tables_blob);
         build_hpet(tables_blob, tables->linker);
@@ -2823,6 +2832,7 @@ void acpi_setup(void)
     PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     AcpiBuildTables tables;
     AcpiBuildState *build_state;
+    Object *vmgenid_dev;
 
     if (!pcms->fw_cfg) {
         ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
@@ -2859,6 +2869,12 @@ void acpi_setup(void)
     fw_cfg_add_file(pcms->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
                     tables.tcpalog->data, acpi_data_len(tables.tcpalog));
 
+    vmgenid_dev = find_vmgenid_dev();
+    if (vmgenid_dev) {
+        vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg,
+                           tables.vmgenid);
+    }
+
     if (!pcmc->rsdp_in_ram) {
         /*
          * Keep for compatibility with old machine types.
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 7135633863..82a49556af 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -457,8 +457,8 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
     resume_all_vcpus();
 
     if (!kvm_enabled()) {
-        /* tb_lock will be reset when cpu_loop_exit_noexc longjmps
-         * back into the cpu_exec loop. */
+        /* Both tb_lock and iothread_mutex will be reset when
+         *  longjmps back into the cpu_exec loop. */
         tb_lock();
         tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
         cpu_loop_exit_noexc(cs);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 60b0946be3..d24388e05f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -707,7 +707,8 @@ static void pc_build_smbios(PCMachineState *pcms)
     size_t smbios_tables_len, smbios_anchor_len;
     struct smbios_phys_mem_area *mem_array;
     unsigned i, array_count;
-    X86CPU *cpu = X86_CPU(pcms->possible_cpus->cpus[0].cpu);
+    MachineState *ms = MACHINE(pcms);
+    X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
 
     /* tell smbios about cpuid version and features */
     smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
@@ -1111,7 +1112,7 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp)
 void pc_hot_add_cpu(const int64_t id, Error **errp)
 {
     ObjectClass *oc;
-    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+    MachineState *ms = MACHINE(qdev_get_machine());
     int64_t apic_id = x86_cpu_apic_id_from_index(id);
     Error *local_err = NULL;
 
@@ -1127,8 +1128,8 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
         return;
     }
 
-    assert(pcms->possible_cpus->cpus[0].cpu); /* BSP is always present */
-    oc = OBJECT_CLASS(CPU_GET_CLASS(pcms->possible_cpus->cpus[0].cpu));
+    assert(ms->possible_cpus->cpus[0].cpu); /* BSP is always present */
+    oc = OBJECT_CLASS(CPU_GET_CLASS(ms->possible_cpus->cpus[0].cpu));
     pc_new_cpu(object_class_get_name(oc), apic_id, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -1143,7 +1144,9 @@ void pc_cpus_init(PCMachineState *pcms)
     ObjectClass *oc;
     const char *typename;
     gchar **model_pieces;
+    const CPUArchIdList *possible_cpus;
     MachineState *machine = MACHINE(pcms);
+    MachineClass *mc = MACHINE_GET_CLASS(pcms);
 
     /* init CPUs */
     if (machine->cpu_model == NULL) {
@@ -1178,20 +1181,16 @@ void pc_cpus_init(PCMachineState *pcms)
      * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
      */
     pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
-    pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
-                                    sizeof(CPUArchId) * max_cpus);
-    for (i = 0; i < max_cpus; i++) {
-        pcms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
-        pcms->possible_cpus->len++;
-        if (i < smp_cpus) {
-            pc_new_cpu(typename, x86_cpu_apic_id_from_index(i), &error_fatal);
-        }
+    possible_cpus = mc->possible_cpu_arch_ids(machine);
+    for (i = 0; i < smp_cpus; i++) {
+        pc_new_cpu(typename, possible_cpus->cpus[i].arch_id, &error_fatal);
     }
 }
 
 static void pc_build_feature_control_file(PCMachineState *pcms)
 {
-    X86CPU *cpu = X86_CPU(pcms->possible_cpus->cpus[0].cpu);
+    MachineState *ms = MACHINE(pcms);
+    X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
     CPUX86State *env = &cpu->env;
     uint32_t unused, ecx, edx;
     uint64_t feature_control_bits = 0;
@@ -1787,21 +1786,19 @@ static int pc_apic_cmp(const void *a, const void *b)
 }
 
 /* returns pointer to CPUArchId descriptor that matches CPU's apic_id
- * in pcms->possible_cpus->cpus, if pcms->possible_cpus->cpus has no
+ * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no
  * entry corresponding to CPU's apic_id returns NULL.
  */
-static CPUArchId *pc_find_cpu_slot(PCMachineState *pcms, CPUState *cpu,
-                                   int *idx)
+static CPUArchId *pc_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUArchId apic_id, *found_cpu;
 
-    apic_id.arch_id = cc->get_arch_id(CPU(cpu));
-    found_cpu = bsearch(&apic_id, pcms->possible_cpus->cpus,
-        pcms->possible_cpus->len, sizeof(*pcms->possible_cpus->cpus),
+    apic_id.arch_id = id;
+    found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus,
+        ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus),
         pc_apic_cmp);
     if (found_cpu && idx) {
-        *idx = found_cpu - pcms->possible_cpus->cpus;
+        *idx = found_cpu - ms->possible_cpus->cpus;
     }
     return found_cpu;
 }
@@ -1812,6 +1809,7 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
     CPUArchId *found_cpu;
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
+    X86CPU *cpu = X86_CPU(dev);
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
 
     if (pcms->acpi_dev) {
@@ -1831,8 +1829,8 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
         fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
     }
 
-    found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL);
-    found_cpu->cpu = CPU(dev);
+    found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
+    found_cpu->cpu = OBJECT(dev);
 out:
     error_propagate(errp, local_err);
 }
@@ -1842,9 +1840,10 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
     int idx = -1;
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
+    X86CPU *cpu = X86_CPU(dev);
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
 
-    pc_find_cpu_slot(pcms, CPU(dev), &idx);
+    pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx);
     assert(idx != -1);
     if (idx == 0) {
         error_setg(&local_err, "Boot CPU is unpluggable");
@@ -1869,6 +1868,7 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
     CPUArchId *found_cpu;
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
+    X86CPU *cpu = X86_CPU(dev);
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
 
     hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
@@ -1878,7 +1878,7 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL);
+    found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
     found_cpu->cpu = NULL;
     object_unparent(OBJECT(dev));
 
@@ -1936,13 +1936,15 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
         cpu->apic_id = apicid_from_topo_ids(smp_cores, smp_threads, &topo);
     }
 
-    cpu_slot = pc_find_cpu_slot(pcms, CPU(dev), &idx);
+    cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx);
     if (!cpu_slot) {
+        MachineState *ms = MACHINE(pcms);
+
         x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo);
         error_setg(errp, "Invalid CPU [socket: %u, core: %u, thread: %u] with"
                   " APIC ID %" PRIu32 ", valid index range 0:%d",
                    topo.pkg_id, topo.core_id, topo.smt_id, cpu->apic_id,
-                   pcms->possible_cpus->len - 1);
+                   ms->possible_cpus->len - 1);
         return;
     }
 
@@ -1953,7 +1955,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
     }
 
     /* if 'address' properties socket-id/core-id/thread-id are not set, set them
-     * so that query_hotpluggable_cpus would show correct values
+     * so that machine_query_hotpluggable_cpus would show correct values
      */
     /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn()
      * once -smp refactoring is complete and there will be CPU private
@@ -2251,55 +2253,37 @@ static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
     return topo.pkg_id;
 }
 
-static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *machine)
-{
-    PCMachineState *pcms = PC_MACHINE(machine);
-    assert(pcms->possible_cpus);
-    return pcms->possible_cpus;
-}
-
-static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine)
+static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
 {
     int i;
-    CPUState *cpu;
-    HotpluggableCPUList *head = NULL;
-    PCMachineState *pcms = PC_MACHINE(machine);
-    const char *cpu_type;
 
-    cpu = pcms->possible_cpus->cpus[0].cpu;
-    assert(cpu); /* BSP is always present */
-    cpu_type = object_class_get_name(OBJECT_CLASS(CPU_GET_CLASS(cpu)));
+    if (ms->possible_cpus) {
+        /*
+         * make sure that max_cpus hasn't changed since the first use, i.e.
+         * -smp hasn't been parsed after it
+        */
+        assert(ms->possible_cpus->len == max_cpus);
+        return ms->possible_cpus;
+    }
 
-    for (i = 0; i < pcms->possible_cpus->len; i++) {
+    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+                                  sizeof(CPUArchId) * max_cpus);
+    ms->possible_cpus->len = max_cpus;
+    for (i = 0; i < ms->possible_cpus->len; i++) {
         X86CPUTopoInfo topo;
-        HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
-        HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
-        CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1);
-        const uint32_t apic_id = pcms->possible_cpus->cpus[i].arch_id;
-
-        x86_topo_ids_from_apicid(apic_id, smp_cores, smp_threads, &topo);
-
-        cpu_item->type = g_strdup(cpu_type);
-        cpu_item->vcpus_count = 1;
-        cpu_props->has_socket_id = true;
-        cpu_props->socket_id = topo.pkg_id;
-        cpu_props->has_core_id = true;
-        cpu_props->core_id = topo.core_id;
-        cpu_props->has_thread_id = true;
-        cpu_props->thread_id = topo.smt_id;
-        cpu_item->props = cpu_props;
-
-        cpu = pcms->possible_cpus->cpus[i].cpu;
-        if (cpu) {
-            cpu_item->has_qom_path = true;
-            cpu_item->qom_path = object_get_canonical_path(OBJECT(cpu));
-        }
 
-        list_item->value = cpu_item;
-        list_item->next = head;
-        head = list_item;
+        ms->possible_cpus->cpus[i].vcpus_count = 1;
+        ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
+        x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id,
+                                 smp_cores, smp_threads, &topo);
+        ms->possible_cpus->cpus[i].props.has_socket_id = true;
+        ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id;
+        ms->possible_cpus->cpus[i].props.has_core_id = true;
+        ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
+        ms->possible_cpus->cpus[i].props.has_thread_id = true;
+        ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
     }
-    return head;
+    return ms->possible_cpus;
 }
 
 static void x86_nmi(NMIState *n, int cpu_index, Error **errp)
@@ -2342,7 +2326,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     mc->get_hotplug_handler = pc_get_hotpug_handler;
     mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
     mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
-    mc->query_hotpluggable_cpus = pc_query_hotpluggable_cpus;
+    mc->has_hotpluggable_cpus = true;
     mc->default_boot_order = "cad";
     mc->hot_add_cpu = pc_hot_add_cpu;
     mc->block_default_type = IF_IDE;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index cfa5de6ebf..db509b3e15 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s)
 }
 
 /* called when the inserted state of the media has changed */
-static void ide_cd_change_cb(void *opaque, bool load)
+static void ide_cd_change_cb(void *opaque, bool load, Error **errp)
 {
     IDEState *s = opaque;
     uint64_t nb_sectors;
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index dbaa75cf59..4383cd111d 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
             return -1;
         } else {
             /* Anonymous BlockBackend for an empty drive */
-            dev->conf.blk = blk_new();
+            dev->conf.blk = blk_new(0, BLK_PERM_ALL);
         }
     }
 
@@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
             return -1;
         }
     }
-    blkconf_apply_backend_options(&dev->conf);
+    blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD,
+                                  &err);
+    if (err) {
+        error_report_err(err);
+        return -1;
+    }
 
     if (ide_init_drive(s, dev->conf.blk, kind,
                        dev->version, dev->serial, dev->model, dev->wwn,
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 8948106ac4..adedd0da5f 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -24,7 +24,7 @@ obj-$(CONFIG_APIC) += apic.o apic_common.o
 obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o
 obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o
 obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o
-obj-$(CONFIG_STELLARIS) += armv7m_nvic.o
+obj-$(CONFIG_ARM_V7M) += armv7m_nvic.o
 obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o
 obj-$(CONFIG_GRLIB) += grlib_irqmp.o
 obj-$(CONFIG_IOAPIC) += ioapic.o
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 521aac3cc6..8e5a9d8a3e 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -156,17 +156,6 @@ static void gic_set_irq_11mpcore(GICState *s, int irq, int level,
     }
 }
 
-static void gic_set_irq_nvic(GICState *s, int irq, int level,
-                                 int cm, int target)
-{
-    if (level) {
-        GIC_SET_LEVEL(irq, cm);
-        GIC_SET_PENDING(irq, target);
-    } else {
-        GIC_CLEAR_LEVEL(irq, cm);
-    }
-}
-
 static void gic_set_irq_generic(GICState *s, int irq, int level,
                                 int cm, int target)
 {
@@ -214,8 +203,6 @@ static void gic_set_irq(void *opaque, int irq, int level)
 
     if (s->revision == REV_11MPCORE) {
         gic_set_irq_11mpcore(s, irq, level, cm, target);
-    } else if (s->revision == REV_NVIC) {
-        gic_set_irq_nvic(s, irq, level, cm, target);
     } else {
         gic_set_irq_generic(s, irq, level, cm, target);
     }
@@ -367,7 +354,7 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu, MemTxAttrs attrs)
         return 1023;
     }
 
-    if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+    if (s->revision == REV_11MPCORE) {
         /* Clear pending flags for both level and edge triggered interrupts.
          * Level triggered IRQs will be reasserted once they become inactive.
          */
@@ -589,11 +576,6 @@ void gic_complete_irq(GICState *s, int cpu, int irq, MemTxAttrs attrs)
             DPRINTF("Set %d pending mask %x\n", irq, cm);
             GIC_SET_PENDING(irq, cm);
         }
-    } else if (s->revision == REV_NVIC) {
-        if (GIC_TEST_LEVEL(irq, cm)) {
-            DPRINTF("Set nvic %d pending mask %x\n", irq, cm);
-            GIC_SET_PENDING(irq, cm);
-        }
     }
 
     group = gic_has_groups(s) && GIC_TEST_GROUP(irq, cm);
@@ -768,7 +750,7 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs)
     } else if (offset < 0xf10) {
         goto bad_reg;
     } else if (offset < 0xf30) {
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
 
@@ -802,9 +784,6 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs)
             case 2:
                 res = gic_id_gicv2[(offset - 0xfd0) >> 2];
                 break;
-            case REV_NVIC:
-                /* Shouldn't be able to get here */
-                abort();
             default:
                 res = 0;
             }
@@ -1028,7 +1007,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
                 continue; /* Ignore Non-secure access of Group0 IRQ */
             }
 
-            if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+            if (s->revision == REV_11MPCORE) {
                 if (value & (1 << (i * 2))) {
                     GIC_SET_MODEL(irq + i);
                 } else {
@@ -1046,7 +1025,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         goto bad_reg;
     } else if (offset < 0xf20) {
         /* GICD_CPENDSGIRn */
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
         irq = (offset - 0xf10);
@@ -1060,7 +1039,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         }
     } else if (offset < 0xf30) {
         /* GICD_SPENDSGIRn */
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
         irq = (offset - 0xf20);
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 4a8df44fb1..70f1134823 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -99,9 +99,7 @@ void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler,
      *  [N+32..N+63] PPIs for CPU 1
      *   ...
      */
-    if (s->revision != REV_NVIC) {
-        i += (GIC_INTERNAL * s->num_cpu);
-    }
+    i += (GIC_INTERNAL * s->num_cpu);
     qdev_init_gpio_in(DEVICE(s), handler, i);
 
     for (i = 0; i < s->num_cpu; i++) {
@@ -121,16 +119,12 @@ void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler,
     memory_region_init_io(&s->iomem, OBJECT(s), ops, s, "gic_dist", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
 
-    if (s->revision != REV_NVIC) {
-        /* This is the main CPU interface "for this core". It is always
-         * present because it is required by both software emulation and KVM.
-         * NVIC is not handled here because its CPU interface is different,
-         * neither it can use KVM.
-         */
-        memory_region_init_io(&s->cpuiomem[0], OBJECT(s), ops ? &ops[1] : NULL,
-                              s, "gic_cpu", s->revision == 2 ? 0x2000 : 0x100);
-        sysbus_init_mmio(sbd, &s->cpuiomem[0]);
-    }
+    /* This is the main CPU interface "for this core". It is always
+     * present because it is required by both software emulation and KVM.
+     */
+    memory_region_init_io(&s->cpuiomem[0], OBJECT(s), ops ? &ops[1] : NULL,
+                          s, "gic_cpu", s->revision == 2 ? 0x2000 : 0x100);
+    sysbus_init_mmio(sbd, &s->cpuiomem[0]);
 }
 
 static void arm_gic_common_realize(DeviceState *dev, Error **errp)
@@ -162,7 +156,7 @@ static void arm_gic_common_realize(DeviceState *dev, Error **errp)
     }
 
     if (s->security_extn &&
-        (s->revision == REV_11MPCORE || s->revision == REV_NVIC)) {
+        (s->revision == REV_11MPCORE)) {
         error_setg(errp, "this GIC revision does not implement "
                    "the security extensions");
         return;
@@ -255,7 +249,6 @@ static Property arm_gic_common_properties[] = {
     DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32),
     /* Revision can be 1 or 2 for GIC architecture specification
      * versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC.
-     * (Internally, 0xffffffff also indicates "not a GIC but an NVIC".)
      */
     DEFINE_PROP_UINT32("revision", GICState, revision, 1),
     /* True if the GIC should implement the security extensions */
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 16b9b0f7eb..c6493d6c07 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
     }
 };
 
+static int icc_sre_el1_reg_pre_load(void *opaque)
+{
+    GICv3CPUState *cs = opaque;
+
+   /*
+    * If the sre_el1 subsection is not transferred this
+    * means SRE_EL1 is 0x7 (which might not be the same as
+    * our reset value).
+    */
+    cs->icc_sre_el1 = 0x7;
+    return 0;
+}
+
+static bool icc_sre_el1_reg_needed(void *opaque)
+{
+    GICv3CPUState *cs = opaque;
+
+    return cs->icc_sre_el1 != 7;
+}
+
+const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
+    .name = "arm_gicv3_cpu/sre_el1",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = icc_sre_el1_reg_pre_load,
+    .needed = icc_sre_el1_reg_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_gicv3_cpu = {
     .name = "arm_gicv3_cpu",
     .version_id = 1,
@@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
     .subsections = (const VMStateDescription * []) {
         &vmstate_gicv3_cpu_virt,
         NULL
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_gicv3_cpu_sre_el1,
+        NULL
     }
 };
 
@@ -216,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
 
         s->cpu[i].cpu = cpu;
         s->cpu[i].gic = s;
+        /* Store GICv3CPUState in CPUARMState gicv3state pointer */
+        gicv3_set_gicv3state(cpu, &s->cpu[i]);
 
         /* Pre-construct the GICR_TYPER:
          * For our implementation:
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index c25ee03556..0b208560bd 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -14,10 +14,19 @@
 
 #include "qemu/osdep.h"
 #include "qemu/bitops.h"
+#include "qemu/main-loop.h"
 #include "trace.h"
 #include "gicv3_internal.h"
 #include "cpu.h"
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+
+    env->gicv3state = (void *)s;
+};
+
 static GICv3CPUState *icc_cs_from_env(CPUARMState *env)
 {
     /* Given the CPU, find the right GICv3CPUState struct.
@@ -733,6 +742,8 @@ void gicv3_cpuif_update(GICv3CPUState *cs)
     ARMCPU *cpu = ARM_CPU(cs->cpu);
     CPUARMState *env = &cpu->env;
 
+    g_assert(qemu_mutex_iothread_locked());
+
     trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq,
                              cs->hppi.grp, cs->hppi.prio);
 
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index d69dc47370..81f0403117 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,10 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +46,32 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
      OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2)         \
+                             (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+                              ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+                              ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+                              ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+                              ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1     \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
     ARMGICv3CommonClass parent_class;
     DeviceRealize parent_realize;
@@ -57,16 +85,549 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level)
     kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+    ((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+                                   uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                      KVM_VGIC_ATTR(offset, 0),
+                      val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+                                   uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+                      KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+                      val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+                                   uint64_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+                      KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+                      val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+                                             uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+                      KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+                      (VGIC_LEVEL_INFO_LINE_LEVEL <<
+                       KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+                      val, write);
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+    for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+    uint32_t reg, *field;
+    int irq;
+
+    field = (uint32_t *)bmp;
+    for_each_dist_irq_reg(irq, s->num_irq, 8) {
+        kvm_gicd_access(s, offset, &reg, false);
+        *field = reg;
+        offset += 4;
+        field++;
+    }
+}
+
+static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+    uint32_t reg, *field;
+    int irq;
+
+    field = (uint32_t *)bmp;
+    for_each_dist_irq_reg(irq, s->num_irq, 8) {
+        reg = *field;
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+        field++;
+    }
+}
+
+static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
+                                      uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 2) {
+        kvm_gicd_access(s, offset, &reg, false);
+        reg = half_unshuffle32(reg >> 1);
+        if (irq % 32 != 0) {
+            reg = (reg << 16);
+        }
+        *gic_bmp_ptr32(bmp, irq) |=  reg;
+        offset += 4;
+    }
+}
+
+static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
+                                      uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 2) {
+        reg = *gic_bmp_ptr32(bmp, irq);
+        if (irq % 32 != 0) {
+            reg = (reg & 0xffff0000) >> 16;
+        } else {
+            reg = reg & 0xffff;
+        }
+        reg = half_shuffle32(reg) << 1;
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+    }
+}
+
+static void kvm_gic_get_line_level_bmp(GICv3State *s, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        kvm_gic_line_level_access(s, irq, 0, &reg, false);
+        *gic_bmp_ptr32(bmp, irq) = reg;
+    }
+}
+
+static void kvm_gic_put_line_level_bmp(GICv3State *s, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        reg = *gic_bmp_ptr32(bmp, irq);
+        kvm_gic_line_level_access(s, irq, 0, &reg, true);
+    }
+}
+
+/* Read a bitmap register group from the kernel VGIC. */
+static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        kvm_gicd_access(s, offset, &reg, false);
+        *gic_bmp_ptr32(bmp, irq) = reg;
+        offset += 4;
+    }
+}
+
+static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
+                            uint32_t clroffset, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        /* If this bitmap is a set/clear register pair, first write to the
+         * clear-reg to clear all bits before using the set-reg to write
+         * the 1 bits.
+         */
+        if (clroffset != 0) {
+            reg = 0;
+            kvm_gicd_access(s, clroffset, &reg, true);
+        }
+        reg = *gic_bmp_ptr32(bmp, irq);
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+    }
+}
+
+static void kvm_arm_gicv3_check(GICv3State *s)
+{
+    uint32_t reg;
+    uint32_t num_irq;
+
+    /* Sanity checking s->num_irq */
+    kvm_gicd_access(s, GICD_TYPER, &reg, false);
+    num_irq = ((reg & 0x1f) + 1) * 32;
+
+    if (num_irq < s->num_irq) {
+        error_report("Model requests %u IRQs, but kernel supports max %u",
+                     s->num_irq, num_irq);
+        abort();
+    }
+}
+
 static void kvm_arm_gicv3_put(GICv3State *s)
 {
-    /* TODO */
-    DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+    uint32_t regl, regh, reg;
+    uint64_t reg64, redist_typer;
+    int ncpu, i;
+
+    kvm_arm_gicv3_check(s);
+
+    kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
+    kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
+    redist_typer = ((uint64_t)regh << 32) | regl;
+
+    reg = s->gicd_ctlr;
+    kvm_gicd_access(s, GICD_CTLR, &reg, true);
+
+    if (redist_typer & GICR_TYPER_PLPIS) {
+        /* Set base addresses before LPIs are enabled by GICR_CTLR write */
+        for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+            GICv3CPUState *c = &s->cpu[ncpu];
+
+            reg64 = c->gicr_propbaser;
+            regl = (uint32_t)reg64;
+            kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, true);
+            regh = (uint32_t)(reg64 >> 32);
+            kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, true);
+
+            reg64 = c->gicr_pendbaser;
+            if (!c->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) {
+                /* Setting PTZ is advised if LPIs are disabled, to reduce
+                 * GIC initialization time.
+                 */
+                reg64 |= GICR_PENDBASER_PTZ;
+            }
+            regl = (uint32_t)reg64;
+            kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, true);
+            regh = (uint32_t)(reg64 >> 32);
+            kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, true);
+        }
+    }
+
+    /* Redistributor state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+
+        reg = c->gicr_ctlr;
+        kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, true);
+
+        reg = c->gicr_statusr[GICV3_NS];
+        kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, true);
+
+        reg = c->gicr_waker;
+        kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, true);
+
+        reg = c->gicr_igroupr0;
+        kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICENABLER0, ncpu, &reg, true);
+        reg = c->gicr_ienabler0;
+        kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, true);
+
+        /* Restore config before pending so we treat level/edge correctly */
+        reg = half_shuffle32(c->edge_trigger >> 16) << 1;
+        kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, true);
+
+        reg = c->level;
+        kvm_gic_line_level_access(s, 0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICPENDR0, ncpu, &reg, true);
+        reg = c->gicr_ipendr0;
+        kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICACTIVER0, ncpu, &reg, true);
+        reg = c->gicr_iactiver0;
+        kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, true);
+
+        for (i = 0; i < GIC_INTERNAL; i += 4) {
+            reg = c->gicr_ipriorityr[i] |
+                (c->gicr_ipriorityr[i + 1] << 8) |
+                (c->gicr_ipriorityr[i + 2] << 16) |
+                (c->gicr_ipriorityr[i + 3] << 24);
+            kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, true);
+        }
+    }
+
+    /* Distributor state (shared between all CPUs */
+    reg = s->gicd_statusr[GICV3_NS];
+    kvm_gicd_access(s, GICD_STATUSR, &reg, true);
+
+    /* s->enable bitmap -> GICD_ISENABLERn */
+    kvm_dist_putbmp(s, GICD_ISENABLER, GICD_ICENABLER, s->enabled);
+
+    /* s->group bitmap -> GICD_IGROUPRn */
+    kvm_dist_putbmp(s, GICD_IGROUPR, 0, s->group);
+
+    /* Restore targets before pending to ensure the pending state is set on
+     * the appropriate CPU interfaces in the kernel
+     */
+
+    /* s->gicd_irouter[irq] -> GICD_IROUTERn
+     * We can't use kvm_dist_put() here because the registers are 64-bit
+     */
+    for (i = GIC_INTERNAL; i < s->num_irq; i++) {
+        uint32_t offset;
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i);
+        reg = (uint32_t)s->gicd_irouter[i];
+        kvm_gicd_access(s, offset, &reg, true);
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
+        reg = (uint32_t)(s->gicd_irouter[i] >> 32);
+        kvm_gicd_access(s, offset, &reg, true);
+    }
+
+    /* s->trigger bitmap -> GICD_ICFGRn
+     * (restore configuration registers before pending IRQs so we treat
+     * level/edge correctly)
+     */
+    kvm_dist_put_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
+
+    /* s->level bitmap ->  line_level */
+    kvm_gic_put_line_level_bmp(s, s->level);
+
+    /* s->pending bitmap -> GICD_ISPENDRn */
+    kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending);
+
+    /* s->active bitmap -> GICD_ISACTIVERn */
+    kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active);
+
+    /* s->gicd_ipriority[] -> GICD_IPRIORITYRn */
+    kvm_dist_put_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
+
+    /* CPU Interface state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+        int num_pri_bits;
+
+        kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true);
+        kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
+                        &c->icc_ctlr_el1[GICV3_NS], true);
+        kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G0], true);
+        kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G1NS], true);
+        kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, true);
+        kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], true);
+        kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], true);
+
+        num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
+                        ICC_CTLR_EL1_PRIBITS_MASK) >>
+                        ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
+
+        switch (num_pri_bits) {
+        case 7:
+            reg64 = c->icc_apr[GICV3_G0][3];
+            kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, true);
+            reg64 = c->icc_apr[GICV3_G0][2];
+            kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, true);
+        case 6:
+            reg64 = c->icc_apr[GICV3_G0][1];
+            kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, true);
+        default:
+            reg64 = c->icc_apr[GICV3_G0][0];
+            kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, true);
+        }
+
+        switch (num_pri_bits) {
+        case 7:
+            reg64 = c->icc_apr[GICV3_G1NS][3];
+            kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, true);
+            reg64 = c->icc_apr[GICV3_G1NS][2];
+            kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, true);
+        case 6:
+            reg64 = c->icc_apr[GICV3_G1NS][1];
+            kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, true);
+        default:
+            reg64 = c->icc_apr[GICV3_G1NS][0];
+            kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, true);
+        }
+    }
 }
 
 static void kvm_arm_gicv3_get(GICv3State *s)
 {
-    /* TODO */
-    DPRINTF("Cannot get kernel gic state, no kernel interface\n");
+    uint32_t regl, regh, reg;
+    uint64_t reg64, redist_typer;
+    int ncpu, i;
+
+    kvm_arm_gicv3_check(s);
+
+    kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
+    kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
+    redist_typer = ((uint64_t)regh << 32) | regl;
+
+    kvm_gicd_access(s, GICD_CTLR, &reg, false);
+    s->gicd_ctlr = reg;
+
+    /* Redistributor state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+
+        kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, false);
+        c->gicr_ctlr = reg;
+
+        kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, false);
+        c->gicr_statusr[GICV3_NS] = reg;
+
+        kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, false);
+        c->gicr_waker = reg;
+
+        kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, false);
+        c->gicr_igroupr0 = reg;
+        kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, false);
+        c->gicr_ienabler0 = reg;
+        kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, false);
+        c->edge_trigger = half_unshuffle32(reg >> 1) << 16;
+        kvm_gic_line_level_access(s, 0, ncpu, &reg, false);
+        c->level = reg;
+        kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, false);
+        c->gicr_ipendr0 = reg;
+        kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, false);
+        c->gicr_iactiver0 = reg;
+
+        for (i = 0; i < GIC_INTERNAL; i += 4) {
+            kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, false);
+            c->gicr_ipriorityr[i] = extract32(reg, 0, 8);
+            c->gicr_ipriorityr[i + 1] = extract32(reg, 8, 8);
+            c->gicr_ipriorityr[i + 2] = extract32(reg, 16, 8);
+            c->gicr_ipriorityr[i + 3] = extract32(reg, 24, 8);
+        }
+    }
+
+    if (redist_typer & GICR_TYPER_PLPIS) {
+        for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+            GICv3CPUState *c = &s->cpu[ncpu];
+
+            kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, false);
+            kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, false);
+            c->gicr_propbaser = ((uint64_t)regh << 32) | regl;
+
+            kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, false);
+            kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, false);
+            c->gicr_pendbaser = ((uint64_t)regh << 32) | regl;
+        }
+    }
+
+    /* Distributor state (shared between all CPUs */
+
+    kvm_gicd_access(s, GICD_STATUSR, &reg, false);
+    s->gicd_statusr[GICV3_NS] = reg;
+
+    /* GICD_IGROUPRn -> s->group bitmap */
+    kvm_dist_getbmp(s, GICD_IGROUPR, s->group);
+
+    /* GICD_ISENABLERn -> s->enabled bitmap */
+    kvm_dist_getbmp(s, GICD_ISENABLER, s->enabled);
+
+    /* Line level of irq */
+    kvm_gic_get_line_level_bmp(s, s->level);
+    /* GICD_ISPENDRn -> s->pending bitmap */
+    kvm_dist_getbmp(s, GICD_ISPENDR, s->pending);
+
+    /* GICD_ISACTIVERn -> s->active bitmap */
+    kvm_dist_getbmp(s, GICD_ISACTIVER, s->active);
+
+    /* GICD_ICFGRn -> s->trigger bitmap */
+    kvm_dist_get_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
+
+    /* GICD_IPRIORITYRn -> s->gicd_ipriority[] */
+    kvm_dist_get_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
+
+    /* GICD_IROUTERn -> s->gicd_irouter[irq] */
+    for (i = GIC_INTERNAL; i < s->num_irq; i++) {
+        uint32_t offset;
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i);
+        kvm_gicd_access(s, offset, &regl, false);
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
+        kvm_gicd_access(s, offset, &regh, false);
+        s->gicd_irouter[i] = ((uint64_t)regh << 32) | regl;
+    }
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+        int num_pri_bits;
+
+        kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false);
+        kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
+                        &c->icc_ctlr_el1[GICV3_NS], false);
+        kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G0], false);
+        kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G1NS], false);
+        kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, false);
+        kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], false);
+        kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], false);
+        num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
+                        ICC_CTLR_EL1_PRIBITS_MASK) >>
+                        ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
+
+        switch (num_pri_bits) {
+        case 7:
+            kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][3] = reg64;
+            kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][2] = reg64;
+        case 6:
+            kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][1] = reg64;
+        default:
+            kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][0] = reg64;
+        }
+
+        switch (num_pri_bits) {
+        case 7:
+            kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][3] = reg64;
+            kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][2] = reg64;
+        case 6:
+            kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][1] = reg64;
+        default:
+            kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][0] = reg64;
+        }
+    }
+}
+
+static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu;
+    GICv3State *s;
+    GICv3CPUState *c;
+
+    c = (GICv3CPUState *)env->gicv3state;
+    s = c->gic;
+    cpu = ARM_CPU(c->cpu);
+
+    /* Initialize to actual HW supported configuration */
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+                      KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+                      &c->icc_ctlr_el1[GICV3_NS], false);
+
+    c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+    c->icc_pmr_el1 = 0;
+    c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+    c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+    c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+    c->icc_sre_el1 = 0x7;
+    memset(c->icc_apr, 0, sizeof(c->icc_apr));
+    memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
 }
 
 static void kvm_arm_gicv3_reset(DeviceState *dev)
@@ -77,9 +638,43 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
     DPRINTF("Reset\n");
 
     kgc->parent_reset(dev);
+
+    if (s->migration_blocker) {
+        DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+        return;
+    }
+
     kvm_arm_gicv3_put(s);
 }
 
+/*
+ * CPU interface registers of GIC needs to be reset on CPU reset.
+ * For the calling arm_gicv3_icc_reset() on CPU reset, we register
+ * below ARMCPRegInfo. As we reset the whole cpu interface under single
+ * register reset, we define only one register of CPU interface instead
+ * of defining all the registers.
+ */
+static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
+    { .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
+      /*
+       * If ARM_CP_NOP is used, resetfn is not called,
+       * So ARM_CP_NO_RAW is appropriate type.
+       */
+      .type = ARM_CP_NO_RAW,
+      .access = PL1_RW,
+      .readfn = arm_cp_read_zero,
+      .writefn = arm_cp_write_ignore,
+      /*
+       * We hang the whole cpu interface reset routine off here
+       * rather than parcelling it out into one little function
+       * per register
+       */
+      .resetfn = arm_gicv3_icc_reset,
+    },
+    REGINFO_SENTINEL
+};
+
 static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 {
     GICv3State *s = KVM_ARM_GICV3(dev);
@@ -103,16 +698,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 
     gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
-    /* Block migration of a KVM GICv3 device: the API for saving and restoring
-     * the state in the kernel is not yet finalised in the kernel or
-     * implemented in QEMU.
-     */
-    error_setg(&s->migration_blocker, "vGICv3 migration is not implemented");
-    migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        return;
+    for (i = 0; i < s->num_cpu; i++) {
+        ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+
+        define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
     }
 
     /* Try to create the device via the device control API */
@@ -145,6 +734,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 
         kvm_irqchip_commit_routes(kvm_state);
     }
+
+    if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                               GICD_CTLR)) {
+        error_setg(&s->migration_blocker, "This operating system kernel does "
+                                          "not support vGICv3 migration");
+        migrate_add_blocker(s->migration_blocker, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_free(s->migration_blocker);
+            return;
+        }
+    }
 }
 
 static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index fe5c303de9..32ffa0bf35 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -17,213 +17,425 @@
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "hw/arm/arm.h"
-#include "exec/address-spaces.h"
-#include "gic_internal.h"
+#include "hw/arm/armv7m_nvic.h"
+#include "target/arm/cpu.h"
 #include "qemu/log.h"
+#include "trace.h"
 
-typedef struct {
-    GICState gic;
-    ARMCPU *cpu;
-    struct {
-        uint32_t control;
-        uint32_t reload;
-        int64_t tick;
-        QEMUTimer *timer;
-    } systick;
-    MemoryRegion sysregmem;
-    MemoryRegion gic_iomem_alias;
-    MemoryRegion container;
-    uint32_t num_irq;
-    qemu_irq sysresetreq;
-} nvic_state;
-
-#define TYPE_NVIC "armv7m_nvic"
-/**
- * NVICClass:
- * @parent_reset: the parent class' reset handler.
+/* IRQ number counting:
  *
- * A model of the v7M NVIC and System Controller
+ * the num-irq property counts the number of external IRQ lines
+ *
+ * NVICState::num_irq counts the total number of exceptions
+ * (external IRQs, the 15 internal exceptions including reset,
+ * and one for the unused exception number 0).
+ *
+ * NVIC_MAX_IRQ is the highest permitted number of external IRQ lines.
+ *
+ * NVIC_MAX_VECTORS is the highest permitted number of exceptions.
+ *
+ * Iterating through all exceptions should typically be done with
+ * for (i = 1; i < s->num_irq; i++) to avoid the unused slot 0.
+ *
+ * The external qemu_irq lines are the NVIC's external IRQ lines,
+ * so line 0 is exception 16.
+ *
+ * In the terminology of the architecture manual, "interrupts" are
+ * a subcategory of exception referring to the external interrupts
+ * (which are exception numbers NVIC_FIRST_IRQ and upward).
+ * For historical reasons QEMU tends to use "interrupt" and
+ * "exception" more or less interchangeably.
+ */
+#define NVIC_FIRST_IRQ 16
+#define NVIC_MAX_IRQ (NVIC_MAX_VECTORS - NVIC_FIRST_IRQ)
+
+/* Effective running priority of the CPU when no exception is active
+ * (higher than the highest possible priority value)
  */
-typedef struct NVICClass {
-    /*< private >*/
-    ARMGICClass parent_class;
-    /*< public >*/
-    DeviceRealize parent_realize;
-    void (*parent_reset)(DeviceState *dev);
-} NVICClass;
-
-#define NVIC_CLASS(klass) \
-    OBJECT_CLASS_CHECK(NVICClass, (klass), TYPE_NVIC)
-#define NVIC_GET_CLASS(obj) \
-    OBJECT_GET_CLASS(NVICClass, (obj), TYPE_NVIC)
-#define NVIC(obj) \
-    OBJECT_CHECK(nvic_state, (obj), TYPE_NVIC)
+#define NVIC_NOEXC_PRIO 0x100
 
 static const uint8_t nvic_id[] = {
     0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1
 };
 
-/* qemu timers run at 1GHz.   We want something closer to 1MHz.  */
-#define SYSTICK_SCALE 1000ULL
+static int nvic_pending_prio(NVICState *s)
+{
+    /* return the priority of the current pending interrupt,
+     * or NVIC_NOEXC_PRIO if no interrupt is pending
+     */
+    return s->vectpending ? s->vectors[s->vectpending].prio : NVIC_NOEXC_PRIO;
+}
 
-#define SYSTICK_ENABLE    (1 << 0)
-#define SYSTICK_TICKINT   (1 << 1)
-#define SYSTICK_CLKSOURCE (1 << 2)
-#define SYSTICK_COUNTFLAG (1 << 16)
+/* Return the value of the ISCR RETTOBASE bit:
+ * 1 if there is exactly one active exception
+ * 0 if there is more than one active exception
+ * UNKNOWN if there are no active exceptions (we choose 1,
+ * which matches the choice Cortex-M3 is documented as making).
+ *
+ * NB: some versions of the documentation talk about this
+ * counting "active exceptions other than the one shown by IPSR";
+ * this is only different in the obscure corner case where guest
+ * code has manually deactivated an exception and is about
+ * to fail an exception-return integrity check. The definition
+ * above is the one from the v8M ARM ARM and is also in line
+ * with the behaviour documented for the Cortex-M3.
+ */
+static bool nvic_rettobase(NVICState *s)
+{
+    int irq, nhand = 0;
 
-int system_clock_scale;
+    for (irq = ARMV7M_EXCP_RESET; irq < s->num_irq; irq++) {
+        if (s->vectors[irq].active) {
+            nhand++;
+            if (nhand == 2) {
+                return 0;
+            }
+        }
+    }
 
-/* Conversion factor from qemu timer to SysTick frequencies.  */
-static inline int64_t systick_scale(nvic_state *s)
-{
-    if (s->systick.control & SYSTICK_CLKSOURCE)
-        return system_clock_scale;
-    else
-        return 1000;
+    return 1;
 }
 
-static void systick_reload(nvic_state *s, int reset)
+/* Return the value of the ISCR ISRPENDING bit:
+ * 1 if an external interrupt is pending
+ * 0 if no external interrupt is pending
+ */
+static bool nvic_isrpending(NVICState *s)
 {
-    /* The Cortex-M3 Devices Generic User Guide says that "When the
-     * ENABLE bit is set to 1, the counter loads the RELOAD value from the
-     * SYST RVR register and then counts down". So, we need to check the
-     * ENABLE bit before reloading the value.
+    int irq;
+
+    /* We can shortcut if the highest priority pending interrupt
+     * happens to be external or if there is nothing pending.
      */
-    if ((s->systick.control & SYSTICK_ENABLE) == 0) {
-        return;
+    if (s->vectpending > NVIC_FIRST_IRQ) {
+        return true;
+    }
+    if (s->vectpending == 0) {
+        return false;
+    }
+
+    for (irq = NVIC_FIRST_IRQ; irq < s->num_irq; irq++) {
+        if (s->vectors[irq].pending) {
+            return true;
+        }
     }
+    return false;
+}
 
-    if (reset)
-        s->systick.tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    s->systick.tick += (s->systick.reload + 1) * systick_scale(s);
-    timer_mod(s->systick.timer, s->systick.tick);
+/* Return a mask word which clears the subpriority bits from
+ * a priority value for an M-profile exception, leaving only
+ * the group priority.
+ */
+static inline uint32_t nvic_gprio_mask(NVICState *s)
+{
+    return ~0U << (s->prigroup + 1);
 }
 
-static void systick_timer_tick(void * opaque)
+/* Recompute vectpending and exception_prio */
+static void nvic_recompute_state(NVICState *s)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    s->systick.control |= SYSTICK_COUNTFLAG;
-    if (s->systick.control & SYSTICK_TICKINT) {
-        /* Trigger the interrupt.  */
-        armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
+    int i;
+    int pend_prio = NVIC_NOEXC_PRIO;
+    int active_prio = NVIC_NOEXC_PRIO;
+    int pend_irq = 0;
+
+    for (i = 1; i < s->num_irq; i++) {
+        VecInfo *vec = &s->vectors[i];
+
+        if (vec->enabled && vec->pending && vec->prio < pend_prio) {
+            pend_prio = vec->prio;
+            pend_irq = i;
+        }
+        if (vec->active && vec->prio < active_prio) {
+            active_prio = vec->prio;
+        }
     }
-    if (s->systick.reload == 0) {
-        s->systick.control &= ~SYSTICK_ENABLE;
+
+    s->vectpending = pend_irq;
+    s->exception_prio = active_prio & nvic_gprio_mask(s);
+
+    trace_nvic_recompute_state(s->vectpending, s->exception_prio);
+}
+
+/* Return the current execution priority of the CPU
+ * (equivalent to the pseudocode ExecutionPriority function).
+ * This is a value between -2 (NMI priority) and NVIC_NOEXC_PRIO.
+ */
+static inline int nvic_exec_prio(NVICState *s)
+{
+    CPUARMState *env = &s->cpu->env;
+    int running;
+
+    if (env->daif & PSTATE_F) { /* FAULTMASK */
+        running = -1;
+    } else if (env->daif & PSTATE_I) { /* PRIMASK */
+        running = 0;
+    } else if (env->v7m.basepri > 0) {
+        running = env->v7m.basepri & nvic_gprio_mask(s);
     } else {
-        systick_reload(s, 0);
+        running = NVIC_NOEXC_PRIO; /* lower than any possible priority */
     }
+    /* consider priority of active handler */
+    return MIN(running, s->exception_prio);
 }
 
-static void systick_reset(nvic_state *s)
+bool armv7m_nvic_can_take_pending_exception(void *opaque)
 {
-    s->systick.control = 0;
-    s->systick.reload = 0;
-    s->systick.tick = 0;
-    timer_del(s->systick.timer);
+    NVICState *s = opaque;
+
+    return nvic_exec_prio(s) > nvic_pending_prio(s);
+}
+
+/* caller must call nvic_irq_update() after this */
+static void set_prio(NVICState *s, unsigned irq, uint8_t prio)
+{
+    assert(irq > ARMV7M_EXCP_NMI); /* only use for configurable prios */
+    assert(irq < s->num_irq);
+
+    s->vectors[irq].prio = prio;
+
+    trace_nvic_set_prio(irq, prio);
+}
+
+/* Recompute state and assert irq line accordingly.
+ * Must be called after changes to:
+ *  vec->active, vec->enabled, vec->pending or vec->prio for any vector
+ *  prigroup
+ */
+static void nvic_irq_update(NVICState *s)
+{
+    int lvl;
+    int pend_prio;
+
+    nvic_recompute_state(s);
+    pend_prio = nvic_pending_prio(s);
+
+    /* Raise NVIC output if this IRQ would be taken, except that we
+     * ignore the effects of the BASEPRI, FAULTMASK and PRIMASK (which
+     * will be checked for in arm_v7m_cpu_exec_interrupt()); changes
+     * to those CPU registers don't cause us to recalculate the NVIC
+     * pending info.
+     */
+    lvl = (pend_prio < s->exception_prio);
+    trace_nvic_irq_update(s->vectpending, pend_prio, s->exception_prio, lvl);
+    qemu_set_irq(s->excpout, lvl);
+}
+
+static void armv7m_nvic_clear_pending(void *opaque, int irq)
+{
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+    trace_nvic_clear_pending(irq, vec->enabled, vec->prio);
+    if (vec->pending) {
+        vec->pending = 0;
+        nvic_irq_update(s);
+    }
 }
 
-/* The external routines use the hardware vector numbering, ie. the first
-   IRQ is #16.  The internal GIC routines use #32 as the first IRQ.  */
 void armv7m_nvic_set_pending(void *opaque, int irq)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    if (irq >= 16)
-        irq += 16;
-    gic_set_pending_private(&s->gic, 0, irq);
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+    trace_nvic_set_pending(irq, vec->enabled, vec->prio);
+
+
+    if (irq >= ARMV7M_EXCP_HARD && irq < ARMV7M_EXCP_PENDSV) {
+        /* If a synchronous exception is pending then it may be
+         * escalated to HardFault if:
+         *  * it is equal or lower priority to current execution
+         *  * it is disabled
+         * (ie we need to take it immediately but we can't do so).
+         * Asynchronous exceptions (and interrupts) simply remain pending.
+         *
+         * For QEMU, we don't have any imprecise (asynchronous) faults,
+         * so we can assume that PREFETCH_ABORT and DATA_ABORT are always
+         * synchronous.
+         * Debug exceptions are awkward because only Debug exceptions
+         * resulting from the BKPT instruction should be escalated,
+         * but we don't currently implement any Debug exceptions other
+         * than those that result from BKPT, so we treat all debug exceptions
+         * as needing escalation.
+         *
+         * This all means we can identify whether to escalate based only on
+         * the exception number and don't (yet) need the caller to explicitly
+         * tell us whether this exception is synchronous or not.
+         */
+        int running = nvic_exec_prio(s);
+        bool escalate = false;
+
+        if (vec->prio >= running) {
+            trace_nvic_escalate_prio(irq, vec->prio, running);
+            escalate = true;
+        } else if (!vec->enabled) {
+            trace_nvic_escalate_disabled(irq);
+            escalate = true;
+        }
+
+        if (escalate) {
+            if (running < 0) {
+                /* We want to escalate to HardFault but we can't take a
+                 * synchronous HardFault at this point either. This is a
+                 * Lockup condition due to a guest bug. We don't model
+                 * Lockup, so report via cpu_abort() instead.
+                 */
+                cpu_abort(&s->cpu->parent_obj,
+                          "Lockup: can't escalate %d to HardFault "
+                          "(current priority %d)\n", irq, running);
+            }
+
+            /* We can do the escalation, so we take HardFault instead */
+            irq = ARMV7M_EXCP_HARD;
+            vec = &s->vectors[irq];
+            s->cpu->env.v7m.hfsr |= R_V7M_HFSR_FORCED_MASK;
+        }
+    }
+
+    if (!vec->pending) {
+        vec->pending = 1;
+        nvic_irq_update(s);
+    }
 }
 
 /* Make pending IRQ active.  */
-int armv7m_nvic_acknowledge_irq(void *opaque)
+void armv7m_nvic_acknowledge_irq(void *opaque)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    uint32_t irq;
-
-    irq = gic_acknowledge_irq(&s->gic, 0, MEMTXATTRS_UNSPECIFIED);
-    if (irq == 1023)
-        hw_error("Interrupt but no vector\n");
-    if (irq >= 32)
-        irq -= 16;
-    return irq;
+    NVICState *s = (NVICState *)opaque;
+    CPUARMState *env = &s->cpu->env;
+    const int pending = s->vectpending;
+    const int running = nvic_exec_prio(s);
+    int pendgroupprio;
+    VecInfo *vec;
+
+    assert(pending > ARMV7M_EXCP_RESET && pending < s->num_irq);
+
+    vec = &s->vectors[pending];
+
+    assert(vec->enabled);
+    assert(vec->pending);
+
+    pendgroupprio = vec->prio & nvic_gprio_mask(s);
+    assert(pendgroupprio < running);
+
+    trace_nvic_acknowledge_irq(pending, vec->prio);
+
+    vec->active = 1;
+    vec->pending = 0;
+
+    env->v7m.exception = s->vectpending;
+
+    nvic_irq_update(s);
 }
 
-void armv7m_nvic_complete_irq(void *opaque, int irq)
+int armv7m_nvic_complete_irq(void *opaque, int irq)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    if (irq >= 16)
-        irq += 16;
-    gic_complete_irq(&s->gic, 0, irq, MEMTXATTRS_UNSPECIFIED);
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+    int ret;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+
+    trace_nvic_complete_irq(irq);
+
+    if (!vec->active) {
+        /* Tell the caller this was an illegal exception return */
+        return -1;
+    }
+
+    ret = nvic_rettobase(s);
+
+    vec->active = 0;
+    if (vec->level) {
+        /* Re-pend the exception if it's still held high; only
+         * happens for extenal IRQs
+         */
+        assert(irq >= NVIC_FIRST_IRQ);
+        vec->pending = 1;
+    }
+
+    nvic_irq_update(s);
+
+    return ret;
+}
+
+/* callback when external interrupt line is changed */
+static void set_irq_level(void *opaque, int n, int level)
+{
+    NVICState *s = opaque;
+    VecInfo *vec;
+
+    n += NVIC_FIRST_IRQ;
+
+    assert(n >= NVIC_FIRST_IRQ && n < s->num_irq);
+
+    trace_nvic_set_irq_level(n, level);
+
+    /* The pending status of an external interrupt is
+     * latched on rising edge and exception handler return.
+     *
+     * Pulsing the IRQ will always run the handler
+     * once, and the handler will re-run until the
+     * level is low when the handler completes.
+     */
+    vec = &s->vectors[n];
+    if (level != vec->level) {
+        vec->level = level;
+        if (level) {
+            armv7m_nvic_set_pending(s, n);
+        }
+    }
 }
 
-static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
+static uint32_t nvic_readl(NVICState *s, uint32_t offset)
 {
     ARMCPU *cpu = s->cpu;
     uint32_t val;
-    int irq;
 
     switch (offset) {
     case 4: /* Interrupt Control Type.  */
-        return (s->num_irq / 32) - 1;
-    case 0x10: /* SysTick Control and Status.  */
-        val = s->systick.control;
-        s->systick.control &= ~SYSTICK_COUNTFLAG;
-        return val;
-    case 0x14: /* SysTick Reload Value.  */
-        return s->systick.reload;
-    case 0x18: /* SysTick Current Value.  */
-        {
-            int64_t t;
-            if ((s->systick.control & SYSTICK_ENABLE) == 0)
-                return 0;
-            t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-            if (t >= s->systick.tick)
-                return 0;
-            val = ((s->systick.tick - (t + 1)) / systick_scale(s)) + 1;
-            /* The interrupt in triggered when the timer reaches zero.
-               However the counter is not reloaded until the next clock
-               tick.  This is a hack to return zero during the first tick.  */
-            if (val > s->systick.reload)
-                val = 0;
-            return val;
-        }
-    case 0x1c: /* SysTick Calibration Value.  */
-        return 10000;
+        return ((s->num_irq - NVIC_FIRST_IRQ) / 32) - 1;
     case 0xd00: /* CPUID Base.  */
         return cpu->midr;
     case 0xd04: /* Interrupt Control State.  */
         /* VECTACTIVE */
         val = cpu->env.v7m.exception;
-        if (val == 1023) {
-            val = 0;
-        } else if (val >= 32) {
-            val -= 16;
-        }
         /* VECTPENDING */
-        if (s->gic.current_pending[0] != 1023)
-            val |= (s->gic.current_pending[0] << 12);
-        /* ISRPENDING and RETTOBASE */
-        for (irq = 32; irq < s->num_irq; irq++) {
-            if (s->gic.irq_state[irq].pending) {
-                val |= (1 << 22);
-                break;
-            }
-            if (irq != cpu->env.v7m.exception && s->gic.irq_state[irq].active) {
-                val |= (1 << 11);
-            }
+        val |= (s->vectpending & 0xff) << 12;
+        /* ISRPENDING - set if any external IRQ is pending */
+        if (nvic_isrpending(s)) {
+            val |= (1 << 22);
+        }
+        /* RETTOBASE - set if only one handler is active */
+        if (nvic_rettobase(s)) {
+            val |= (1 << 11);
         }
         /* PENDSTSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_SYSTICK].pending)
+        if (s->vectors[ARMV7M_EXCP_SYSTICK].pending) {
             val |= (1 << 26);
+        }
         /* PENDSVSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_PENDSV].pending)
+        if (s->vectors[ARMV7M_EXCP_PENDSV].pending) {
             val |= (1 << 28);
+        }
         /* NMIPENDSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_NMI].pending)
+        if (s->vectors[ARMV7M_EXCP_NMI].pending) {
             val |= (1 << 31);
+        }
+        /* ISRPREEMPT not implemented */
         return val;
     case 0xd08: /* Vector Table Offset.  */
         return cpu->env.v7m.vecbase;
     case 0xd0c: /* Application Interrupt/Reset Control.  */
-        return 0xfa050000;
+        return 0xfa050000 | (s->prigroup << 8);
     case 0xd10: /* System Control.  */
         /* TODO: Implement SLEEPONEXIT.  */
         return 0;
@@ -231,20 +443,48 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
         return cpu->env.v7m.ccr;
     case 0xd24: /* System Handler Status.  */
         val = 0;
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].active) val |= (1 << 0);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].active) val |= (1 << 1);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].active) val |= (1 << 3);
-        if (s->gic.irq_state[ARMV7M_EXCP_SVC].active) val |= (1 << 7);
-        if (s->gic.irq_state[ARMV7M_EXCP_DEBUG].active) val |= (1 << 8);
-        if (s->gic.irq_state[ARMV7M_EXCP_PENDSV].active) val |= (1 << 10);
-        if (s->gic.irq_state[ARMV7M_EXCP_SYSTICK].active) val |= (1 << 11);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].pending) val |= (1 << 12);
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].pending) val |= (1 << 13);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].pending) val |= (1 << 14);
-        if (s->gic.irq_state[ARMV7M_EXCP_SVC].pending) val |= (1 << 15);
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].enabled) val |= (1 << 16);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].enabled) val |= (1 << 17);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].enabled) val |= (1 << 18);
+        if (s->vectors[ARMV7M_EXCP_MEM].active) {
+            val |= (1 << 0);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].active) {
+            val |= (1 << 1);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].active) {
+            val |= (1 << 3);
+        }
+        if (s->vectors[ARMV7M_EXCP_SVC].active) {
+            val |= (1 << 7);
+        }
+        if (s->vectors[ARMV7M_EXCP_DEBUG].active) {
+            val |= (1 << 8);
+        }
+        if (s->vectors[ARMV7M_EXCP_PENDSV].active) {
+            val |= (1 << 10);
+        }
+        if (s->vectors[ARMV7M_EXCP_SYSTICK].active) {
+            val |= (1 << 11);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].pending) {
+            val |= (1 << 12);
+        }
+        if (s->vectors[ARMV7M_EXCP_MEM].pending) {
+            val |= (1 << 13);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].pending) {
+            val |= (1 << 14);
+        }
+        if (s->vectors[ARMV7M_EXCP_SVC].pending) {
+            val |= (1 << 15);
+        }
+        if (s->vectors[ARMV7M_EXCP_MEM].enabled) {
+            val |= (1 << 16);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].enabled) {
+            val |= (1 << 17);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].enabled) {
+            val |= (1 << 18);
+        }
         return val;
     case 0xd28: /* Configurable Fault Status.  */
         return cpu->env.v7m.cfsr;
@@ -294,43 +534,11 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
     }
 }
 
-static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
+static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value)
 {
     ARMCPU *cpu = s->cpu;
-    uint32_t oldval;
+
     switch (offset) {
-    case 0x10: /* SysTick Control and Status.  */
-        oldval = s->systick.control;
-        s->systick.control &= 0xfffffff8;
-        s->systick.control |= value & 7;
-        if ((oldval ^ value) & SYSTICK_ENABLE) {
-            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-            if (value & SYSTICK_ENABLE) {
-                if (s->systick.tick) {
-                    s->systick.tick += now;
-                    timer_mod(s->systick.timer, s->systick.tick);
-                } else {
-                    systick_reload(s, 1);
-                }
-            } else {
-                timer_del(s->systick.timer);
-                s->systick.tick -= now;
-                if (s->systick.tick < 0)
-                  s->systick.tick = 0;
-            }
-        } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
-            /* This is a hack. Force the timer to be reloaded
-               when the reference clock is changed.  */
-            systick_reload(s, 1);
-        }
-        break;
-    case 0x14: /* SysTick Reload Value.  */
-        s->systick.reload = value;
-        break;
-    case 0x18: /* SysTick Current Value.  Writes reload the timer.  */
-        systick_reload(s, 1);
-        s->systick.control &= ~SYSTICK_COUNTFLAG;
-        break;
     case 0xd04: /* Interrupt Control State.  */
         if (value & (1 << 31)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_NMI);
@@ -338,14 +546,12 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
         if (value & (1 << 28)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_PENDSV);
         } else if (value & (1 << 27)) {
-            s->gic.irq_state[ARMV7M_EXCP_PENDSV].pending = 0;
-            gic_update(&s->gic);
+            armv7m_nvic_clear_pending(s, ARMV7M_EXCP_PENDSV);
         }
         if (value & (1 << 26)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
         } else if (value & (1 << 25)) {
-            s->gic.irq_state[ARMV7M_EXCP_SYSTICK].pending = 0;
-            gic_update(&s->gic);
+            armv7m_nvic_clear_pending(s, ARMV7M_EXCP_SYSTICK);
         }
         break;
     case 0xd08: /* Vector Table Offset.  */
@@ -357,14 +563,17 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
                 qemu_irq_pulse(s->sysresetreq);
             }
             if (value & 2) {
-                qemu_log_mask(LOG_UNIMP, "VECTCLRACTIVE unimplemented\n");
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Setting VECTCLRACTIVE when not in DEBUG mode "
+                              "is UNPREDICTABLE\n");
             }
             if (value & 1) {
-                qemu_log_mask(LOG_UNIMP, "AIRCR system reset unimplemented\n");
-            }
-            if (value & 0x700) {
-                qemu_log_mask(LOG_UNIMP, "PRIGROUP unimplemented\n");
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Setting VECTRESET when not in DEBUG mode "
+                              "is UNPREDICTABLE\n");
             }
+            s->prigroup = extract32(value, 8, 3);
+            nvic_irq_update(s);
         }
         break;
     case 0xd10: /* System Control.  */
@@ -383,11 +592,21 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
         cpu->env.v7m.ccr = value;
         break;
     case 0xd24: /* System Handler Control.  */
-        /* TODO: Real hardware allows you to set/clear the active bits
-           under some circumstances.  We don't implement this.  */
-        s->gic.irq_state[ARMV7M_EXCP_MEM].enabled = (value & (1 << 16)) != 0;
-        s->gic.irq_state[ARMV7M_EXCP_BUS].enabled = (value & (1 << 17)) != 0;
-        s->gic.irq_state[ARMV7M_EXCP_USAGE].enabled = (value & (1 << 18)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].active = (value & (1 << 0)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].active = (value & (1 << 1)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].active = (value & (1 << 3)) != 0;
+        s->vectors[ARMV7M_EXCP_SVC].active = (value & (1 << 7)) != 0;
+        s->vectors[ARMV7M_EXCP_DEBUG].active = (value & (1 << 8)) != 0;
+        s->vectors[ARMV7M_EXCP_PENDSV].active = (value & (1 << 10)) != 0;
+        s->vectors[ARMV7M_EXCP_SYSTICK].active = (value & (1 << 11)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].pending = (value & (1 << 12)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].pending = (value & (1 << 13)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].pending = (value & (1 << 14)) != 0;
+        s->vectors[ARMV7M_EXCP_SVC].pending = (value & (1 << 15)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].enabled = (value & (1 << 16)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].enabled = (value & (1 << 17)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].enabled = (value & (1 << 18)) != 0;
+        nvic_irq_update(s);
         break;
     case 0xd28: /* Configurable Fault Status.  */
         cpu->env.v7m.cfsr &= ~value; /* W1C */
@@ -409,13 +628,16 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
                       "NVIC: Aux fault status registers unimplemented\n");
         break;
     case 0xf00: /* Software Triggered Interrupt Register */
+    {
         /* user mode can only write to STIR if CCR.USERSETMPEND permits it */
-        if ((value & 0x1ff) < s->num_irq &&
+        int excnum = (value & 0x1ff) + NVIC_FIRST_IRQ;
+        if (excnum < s->num_irq &&
             (arm_current_el(&cpu->env) ||
              (cpu->env.v7m.ccr & R_V7M_CCR_USERSETMPEND_MASK))) {
-            gic_set_pending_private(&s->gic, 0, value & 0x1ff);
+            armv7m_nvic_set_pending(s, excnum);
         }
         break;
+    }
     default:
         qemu_log_mask(LOG_GUEST_ERROR,
                       "NVIC: Bad write offset 0x%x\n", offset);
@@ -425,46 +647,142 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
 static uint64_t nvic_sysreg_read(void *opaque, hwaddr addr,
                                  unsigned size)
 {
-    nvic_state *s = (nvic_state *)opaque;
+    NVICState *s = (NVICState *)opaque;
     uint32_t offset = addr;
-    int i;
+    unsigned i, startvec, end;
     uint32_t val;
 
     switch (offset) {
+    /* reads of set and clear both return the status */
+    case 0x100 ... 0x13f: /* NVIC Set enable */
+        offset += 0x80;
+        /* fall through */
+    case 0x180 ... 0x1bf: /* NVIC Clear enable */
+        val = 0;
+        startvec = offset - 0x180 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].enabled) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x200 ... 0x23f: /* NVIC Set pend */
+        offset += 0x80;
+        /* fall through */
+    case 0x280 ... 0x2bf: /* NVIC Clear pend */
+        val = 0;
+        startvec = offset - 0x280 + NVIC_FIRST_IRQ; /* vector # */
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].pending) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x300 ... 0x33f: /* NVIC Active */
+        val = 0;
+        startvec = offset - 0x300 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].active) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x400 ... 0x5ef: /* NVIC Priority */
+        val = 0;
+        startvec = offset - 0x400 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0; i < size && startvec + i < s->num_irq; i++) {
+            val |= s->vectors[startvec + i].prio << (8 * i);
+        }
+        break;
     case 0xd18 ... 0xd23: /* System Handler Priority.  */
         val = 0;
         for (i = 0; i < size; i++) {
-            val |= s->gic.priority1[(offset - 0xd14) + i][0] << (i * 8);
+            val |= s->vectors[(offset - 0xd14) + i].prio << (i * 8);
         }
-        return val;
+        break;
     case 0xfe0 ... 0xfff: /* ID.  */
         if (offset & 3) {
-            return 0;
+            val = 0;
+        } else {
+            val = nvic_id[(offset - 0xfe0) >> 2];
+        }
+        break;
+    default:
+        if (size == 4) {
+            val = nvic_readl(s, offset);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "NVIC: Bad read of size %d at offset 0x%x\n",
+                          size, offset);
+            val = 0;
         }
-        return nvic_id[(offset - 0xfe0) >> 2];
-    }
-    if (size == 4) {
-        return nvic_readl(s, offset);
     }
-    qemu_log_mask(LOG_GUEST_ERROR,
-                  "NVIC: Bad read of size %d at offset 0x%x\n", size, offset);
-    return 0;
+
+    trace_nvic_sysreg_read(addr, val, size);
+    return val;
 }
 
 static void nvic_sysreg_write(void *opaque, hwaddr addr,
                               uint64_t value, unsigned size)
 {
-    nvic_state *s = (nvic_state *)opaque;
+    NVICState *s = (NVICState *)opaque;
     uint32_t offset = addr;
-    int i;
+    unsigned i, startvec, end;
+    unsigned setval = 0;
+
+    trace_nvic_sysreg_write(addr, value, size);
 
     switch (offset) {
+    case 0x100 ... 0x13f: /* NVIC Set enable */
+        offset += 0x80;
+        setval = 1;
+        /* fall through */
+    case 0x180 ... 0x1bf: /* NVIC Clear enable */
+        startvec = 8 * (offset - 0x180) + NVIC_FIRST_IRQ;
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (value & (1 << i)) {
+                s->vectors[startvec + i].enabled = setval;
+            }
+        }
+        nvic_irq_update(s);
+        return;
+    case 0x200 ... 0x23f: /* NVIC Set pend */
+        /* the special logic in armv7m_nvic_set_pending()
+         * is not needed since IRQs are never escalated
+         */
+        offset += 0x80;
+        setval = 1;
+        /* fall through */
+    case 0x280 ... 0x2bf: /* NVIC Clear pend */
+        startvec = 8 * (offset - 0x280) + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (value & (1 << i)) {
+                s->vectors[startvec + i].pending = setval;
+            }
+        }
+        nvic_irq_update(s);
+        return;
+    case 0x300 ... 0x33f: /* NVIC Active */
+        return; /* R/O */
+    case 0x400 ... 0x5ef: /* NVIC Priority */
+        startvec = 8 * (offset - 0x400) + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0; i < size && startvec + i < s->num_irq; i++) {
+            set_prio(s, startvec + i, (value >> (i * 8)) & 0xff);
+        }
+        nvic_irq_update(s);
+        return;
     case 0xd18 ... 0xd23: /* System Handler Priority.  */
         for (i = 0; i < size; i++) {
-            s->gic.priority1[(offset - 0xd14) + i][0] =
-                (value >> (i * 8)) & 0xff;
+            unsigned hdlidx = (offset - 0xd14) + i;
+            set_prio(s, hdlidx, (value >> (i * 8)) & 0xff);
         }
-        gic_update(&s->gic);
+        nvic_irq_update(s);
         return;
     }
     if (size == 4) {
@@ -481,61 +799,143 @@ static const MemoryRegionOps nvic_sysreg_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static const VMStateDescription vmstate_nvic = {
-    .name = "armv7m_nvic",
+static int nvic_post_load(void *opaque, int version_id)
+{
+    NVICState *s = opaque;
+    unsigned i;
+
+    /* Check for out of range priority settings */
+    if (s->vectors[ARMV7M_EXCP_RESET].prio != -3 ||
+        s->vectors[ARMV7M_EXCP_NMI].prio != -2 ||
+        s->vectors[ARMV7M_EXCP_HARD].prio != -1) {
+        return 1;
+    }
+    for (i = ARMV7M_EXCP_MEM; i < s->num_irq; i++) {
+        if (s->vectors[i].prio & ~0xff) {
+            return 1;
+        }
+    }
+
+    nvic_recompute_state(s);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_VecInfo = {
+    .name = "armv7m_nvic_info",
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32(systick.control, nvic_state),
-        VMSTATE_UINT32(systick.reload, nvic_state),
-        VMSTATE_INT64(systick.tick, nvic_state),
-        VMSTATE_TIMER_PTR(systick.timer, nvic_state),
+        VMSTATE_INT16(prio, VecInfo),
+        VMSTATE_UINT8(enabled, VecInfo),
+        VMSTATE_UINT8(pending, VecInfo),
+        VMSTATE_UINT8(active, VecInfo),
+        VMSTATE_UINT8(level, VecInfo),
         VMSTATE_END_OF_LIST()
     }
 };
 
+static const VMStateDescription vmstate_nvic = {
+    .name = "armv7m_nvic",
+    .version_id = 4,
+    .minimum_version_id = 4,
+    .post_load = &nvic_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_ARRAY(vectors, NVICState, NVIC_MAX_VECTORS, 1,
+                             vmstate_VecInfo, VecInfo),
+        VMSTATE_UINT32(prigroup, NVICState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property props_nvic[] = {
+    /* Number of external IRQ lines (so excluding the 16 internal exceptions) */
+    DEFINE_PROP_UINT32("num-irq", NVICState, num_irq, 64),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void armv7m_nvic_reset(DeviceState *dev)
 {
-    nvic_state *s = NVIC(dev);
-    NVICClass *nc = NVIC_GET_CLASS(s);
-    nc->parent_reset(dev);
-    /* Common GIC reset resets to disabled; the NVIC doesn't have
-     * per-CPU interfaces so mark our non-existent CPU interface
-     * as enabled by default, and with a priority mask which allows
-     * all interrupts through.
+    NVICState *s = NVIC(dev);
+
+    s->vectors[ARMV7M_EXCP_NMI].enabled = 1;
+    s->vectors[ARMV7M_EXCP_HARD].enabled = 1;
+    /* MEM, BUS, and USAGE are enabled through
+     * the System Handler Control register
+     */
+    s->vectors[ARMV7M_EXCP_SVC].enabled = 1;
+    s->vectors[ARMV7M_EXCP_DEBUG].enabled = 1;
+    s->vectors[ARMV7M_EXCP_PENDSV].enabled = 1;
+    s->vectors[ARMV7M_EXCP_SYSTICK].enabled = 1;
+
+    s->vectors[ARMV7M_EXCP_RESET].prio = -3;
+    s->vectors[ARMV7M_EXCP_NMI].prio = -2;
+    s->vectors[ARMV7M_EXCP_HARD].prio = -1;
+
+    /* Strictly speaking the reset handler should be enabled.
+     * However, we don't simulate soft resets through the NVIC,
+     * and the reset vector should never be pended.
+     * So we leave it disabled to catch logic errors.
      */
-    s->gic.cpu_ctlr[0] = GICC_CTLR_EN_GRP0;
-    s->gic.priority_mask[0] = 0x100;
-    /* The NVIC as a whole is always enabled. */
-    s->gic.ctlr = 1;
-    systick_reset(s);
+
+    s->exception_prio = NVIC_NOEXC_PRIO;
+    s->vectpending = 0;
+}
+
+static void nvic_systick_trigger(void *opaque, int n, int level)
+{
+    NVICState *s = opaque;
+
+    if (level) {
+        /* SysTick just asked us to pend its exception.
+         * (This is different from an external interrupt line's
+         * behaviour.)
+         */
+        armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
+    }
 }
 
 static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
 {
-    nvic_state *s = NVIC(dev);
-    NVICClass *nc = NVIC_GET_CLASS(s);
-    Error *local_err = NULL;
+    NVICState *s = NVIC(dev);
+    SysBusDevice *systick_sbd;
+    Error *err = NULL;
 
     s->cpu = ARM_CPU(qemu_get_cpu(0));
     assert(s->cpu);
-    /* The NVIC always has only one CPU */
-    s->gic.num_cpu = 1;
-    /* Tell the common code we're an NVIC */
-    s->gic.revision = 0xffffffff;
-    s->num_irq = s->gic.num_irq;
-    nc->parent_realize(dev, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+
+    if (s->num_irq > NVIC_MAX_IRQ) {
+        error_setg(errp, "num-irq %d exceeds NVIC maximum", s->num_irq);
+        return;
+    }
+
+    qdev_init_gpio_in(dev, set_irq_level, s->num_irq);
+
+    /* include space for internal exception vectors */
+    s->num_irq += NVIC_FIRST_IRQ;
+
+    object_property_set_bool(OBJECT(&s->systick), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
         return;
     }
-    gic_init_irqs_and_distributor(&s->gic);
-    /* The NVIC and system controller register area looks like this:
-     *  0..0xff : system control registers, including systick
-     *  0x100..0xcff : GIC-like registers
-     *  0xd00..0xfff : system control registers
-     * We use overlaying to put the GIC like registers
-     * over the top of the system control register region.
+    systick_sbd = SYS_BUS_DEVICE(&s->systick);
+    sysbus_connect_irq(systick_sbd, 0,
+                       qdev_get_gpio_in_named(dev, "systick-trigger", 0));
+
+    /* The NVIC and System Control Space (SCS) starts at 0xe000e000
+     * and looks like this:
+     *  0x004 - ICTR
+     *  0x010 - 0xff - systick
+     *  0x100..0x7ec - NVIC
+     *  0x7f0..0xcff - Reserved
+     *  0xd00..0xd3c - SCS registers
+     *  0xd40..0xeff - Reserved or Not implemented
+     *  0xf00 - STIR
+     *
+     * At the moment there is only one thing in the container region,
+     * but we leave it in place to allow us to pull systick out into
+     * its own device object later.
      */
     memory_region_init(&s->container, OBJECT(s), "nvic", 0x1000);
     /* The system register region goes at the bottom of the priority
@@ -544,19 +944,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s,
                           "nvic_sysregs", 0x1000);
     memory_region_add_subregion(&s->container, 0, &s->sysregmem);
-    /* Alias the GIC region so we can get only the section of it
-     * we need, and layer it on top of the system register region.
-     */
-    memory_region_init_alias(&s->gic_iomem_alias, OBJECT(s),
-                             "nvic-gic", &s->gic.iomem,
-                             0x100, 0xc00);
-    memory_region_add_subregion_overlap(&s->container, 0x100,
-                                        &s->gic_iomem_alias, 1);
-    /* Map the whole thing into system memory at the location required
-     * by the v7M architecture.
-     */
-    memory_region_add_subregion(get_system_memory(), 0xe000e000, &s->container);
-    s->systick.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
+    memory_region_add_subregion_overlap(&s->container, 0x10,
+                                        sysbus_mmio_get_region(systick_sbd, 0),
+                                        1);
+
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container);
 }
 
 static void armv7m_nvic_instance_init(Object *obj)
@@ -567,36 +959,35 @@ static void armv7m_nvic_instance_init(Object *obj)
      * any user-specified property setting, so just modify the
      * value in the GICState struct.
      */
-    GICState *s = ARM_GIC_COMMON(obj);
     DeviceState *dev = DEVICE(obj);
-    nvic_state *nvic = NVIC(obj);
-    /* The ARM v7m may have anything from 0 to 496 external interrupt
-     * IRQ lines. We default to 64. Other boards may differ and should
-     * set the num-irq property appropriately.
-     */
-    s->num_irq = 64;
+    NVICState *nvic = NVIC(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    object_initialize(&nvic->systick, sizeof(nvic->systick), TYPE_SYSTICK);
+    qdev_set_parent_bus(DEVICE(&nvic->systick), sysbus_get_default());
+
+    sysbus_init_irq(sbd, &nvic->excpout);
     qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1);
+    qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", 1);
 }
 
 static void armv7m_nvic_class_init(ObjectClass *klass, void *data)
 {
-    NVICClass *nc = NVIC_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    nc->parent_reset = dc->reset;
-    nc->parent_realize = dc->realize;
     dc->vmsd  = &vmstate_nvic;
+    dc->props = props_nvic;
     dc->reset = armv7m_nvic_reset;
     dc->realize = armv7m_nvic_realize;
 }
 
 static const TypeInfo armv7m_nvic_info = {
     .name          = TYPE_NVIC,
-    .parent        = TYPE_ARM_GIC_COMMON,
+    .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_init = armv7m_nvic_instance_init,
-    .instance_size = sizeof(nvic_state),
+    .instance_size = sizeof(NVICState),
     .class_init    = armv7m_nvic_class_init,
-    .class_size    = sizeof(NVICClass),
+    .class_size    = sizeof(SysBusDeviceClass),
 };
 
 static void armv7m_nvic_register_types(void)
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 3f311740da..7fe87b13de 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -25,9 +25,7 @@
 
 #define ALL_CPU_MASK ((unsigned)(((1 << GIC_NCPU) - 1)))
 
-/* The NVIC has 16 internal vectors.  However these are not exposed
-   through the normal GIC interface.  */
-#define GIC_BASE_IRQ ((s->revision == REV_NVIC) ? 32 : 0)
+#define GIC_BASE_IRQ 0
 
 #define GIC_SET_ENABLED(irq, cm) s->irq_state[irq].enabled |= (cm)
 #define GIC_CLEAR_ENABLED(irq, cm) s->irq_state[irq].enabled &= ~(cm)
@@ -75,7 +73,6 @@
 
 /* The special cases for the revision property: */
 #define REV_11MPCORE 0
-#define REV_NVIC 0xffffffff
 
 void gic_set_pending_private(GICState *s, int cpu, int irq);
 uint32_t gic_acknowledge_irq(GICState *s, int cpu, MemTxAttrs attrs);
@@ -87,7 +84,7 @@ void gic_set_priority(GICState *s, int cpu, int irq, uint8_t val,
 
 static inline bool gic_test_pending(GICState *s, int irq, int cm)
 {
-    if (s->revision == REV_NVIC || s->revision == REV_11MPCORE) {
+    if (s->revision == REV_11MPCORE) {
         return s->irq_state[irq].pending & cm;
     } else {
         /* Edge-triggered interrupts are marked pending on a rising edge, but
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index aeb801d133..05303a55c8 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -138,6 +138,7 @@
 #define ICC_CTLR_EL1_EOIMODE        (1U << 1)
 #define ICC_CTLR_EL1_PMHE           (1U << 6)
 #define ICC_CTLR_EL1_PRIBITS_SHIFT 8
+#define ICC_CTLR_EL1_PRIBITS_MASK   (7U << ICC_CTLR_EL1_PRIBITS_SHIFT)
 #define ICC_CTLR_EL1_IDBITS_SHIFT 11
 #define ICC_CTLR_EL1_SEIS           (1U << 14)
 #define ICC_CTLR_EL1_A3V            (1U << 15)
@@ -407,4 +408,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s)
     }
 }
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s);
+
 #endif /* QEMU_ARM_GICV3_INTERNAL_H */
diff --git a/hw/intc/mips_gic.c b/hw/intc/mips_gic.c
index 6e257730f8..15e6e40f9f 100644
--- a/hw/intc/mips_gic.c
+++ b/hw/intc/mips_gic.c
@@ -20,31 +20,29 @@
 #include "kvm_mips.h"
 #include "hw/intc/mips_gic.h"
 
-static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin, int level)
+static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin)
 {
-    int ored_level = level;
+    int ored_level = 0;
     int i;
 
     /* ORing pending registers sharing same pin */
-    if (!ored_level) {
-        for (i = 0; i < gic->num_irq; i++) {
-            if ((gic->irq_state[i].map_pin & GIC_MAP_MSK) == pin &&
-                    gic->irq_state[i].map_vp == vp &&
-                    gic->irq_state[i].enabled) {
-                ored_level |= gic->irq_state[i].pending;
-            }
-            if (ored_level) {
-                /* no need to iterate all interrupts */
-                break;
-            }
+    for (i = 0; i < gic->num_irq; i++) {
+        if ((gic->irq_state[i].map_pin & GIC_MAP_MSK) == pin &&
+                gic->irq_state[i].map_vp == vp &&
+                gic->irq_state[i].enabled) {
+            ored_level |= gic->irq_state[i].pending;
         }
-        if (((gic->vps[vp].compare_map & GIC_MAP_MSK) == pin) &&
-                (gic->vps[vp].mask & GIC_VP_MASK_CMP_MSK)) {
-            /* ORing with local pending register (count/compare) */
-            ored_level |= (gic->vps[vp].pend & GIC_VP_MASK_CMP_MSK) >>
-                          GIC_VP_MASK_CMP_SHF;
+        if (ored_level) {
+            /* no need to iterate all interrupts */
+            break;
         }
     }
+    if (((gic->vps[vp].compare_map & GIC_MAP_MSK) == pin) &&
+            (gic->vps[vp].mask & GIC_VP_MASK_CMP_MSK)) {
+        /* ORing with local pending register (count/compare) */
+        ored_level |= (gic->vps[vp].pend & GIC_VP_MASK_CMP_MSK) >>
+                      GIC_VP_MASK_CMP_SHF;
+    }
     if (kvm_enabled())  {
         kvm_mips_set_ipi_interrupt(mips_env_get_cpu(gic->vps[vp].env),
                                    pin + GIC_CPU_PIN_OFFSET,
@@ -55,21 +53,27 @@ static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin, int level)
     }
 }
 
-static void gic_set_irq(void *opaque, int n_IRQ, int level)
+static void gic_update_pin_for_irq(MIPSGICState *gic, int n_IRQ)
 {
-    MIPSGICState *gic = (MIPSGICState *) opaque;
     int vp = gic->irq_state[n_IRQ].map_vp;
     int pin = gic->irq_state[n_IRQ].map_pin & GIC_MAP_MSK;
 
+    if (vp < 0 || vp >= gic->num_vps) {
+        return;
+    }
+    mips_gic_set_vp_irq(gic, vp, pin);
+}
+
+static void gic_set_irq(void *opaque, int n_IRQ, int level)
+{
+    MIPSGICState *gic = (MIPSGICState *) opaque;
+
     gic->irq_state[n_IRQ].pending = (uint8_t) level;
     if (!gic->irq_state[n_IRQ].enabled) {
         /* GIC interrupt source disabled */
         return;
     }
-    if (vp < 0 || vp >= gic->num_vps) {
-        return;
-    }
-    mips_gic_set_vp_irq(gic, vp, pin, level);
+    gic_update_pin_for_irq(gic, n_IRQ);
 }
 
 #define OFFSET_CHECK(c)                         \
@@ -209,7 +213,7 @@ static void gic_timer_store_vp_compare(MIPSGICState *gic, uint32_t vp_index,
     gic->vps[vp_index].pend &= ~(1 << GIC_LOCAL_INT_COMPARE);
     if (gic->vps[vp_index].compare_map & GIC_MAP_TO_PIN_MSK) {
         uint32_t pin = (gic->vps[vp_index].compare_map & GIC_MAP_MSK);
-        mips_gic_set_vp_irq(gic, vp_index, pin, 0);
+        mips_gic_set_vp_irq(gic, vp_index, pin);
     }
     mips_gictimer_store_vp_compare(gic->gic_timer, vp_index, compare);
 }
@@ -286,6 +290,7 @@ static void gic_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
         OFFSET_CHECK((base + size * 8) <= gic->num_irq);
         for (i = 0; i < size * 8; i++) {
             gic->irq_state[base + i].enabled &= !((data >> i) & 1);
+            gic_update_pin_for_irq(gic, base + i);
         }
         break;
     case GIC_SH_WEDGE_OFS:
@@ -305,6 +310,7 @@ static void gic_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
         OFFSET_CHECK((base + size * 8) <= gic->num_irq);
         for (i = 0; i < size * 8; i++) {
             gic->irq_state[base + i].enabled |= (data >> i) & 1;
+            gic_update_pin_for_irq(gic, base + i);
         }
         break;
     case GIC_SH_MAP0_PIN_OFS ... GIC_SH_MAP255_PIN_OFS:
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index 6ab29efc65..bef4caf980 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -16,6 +16,8 @@
 #include "migration/qemu-file.h"
 #include "hw/s390x/s390_flic.h"
 #include "trace.h"
+#include "hw/qdev.h"
+#include "qapi/error.h"
 
 S390FLICState *s390_get_flic(void)
 {
@@ -85,6 +87,30 @@ static void qemu_s390_flic_class_init(ObjectClass *oc, void *data)
     fsc->clear_io_irq = qemu_s390_clear_io_flic;
 }
 
+static Property s390_flic_common_properties[] = {
+    DEFINE_PROP_UINT32("adapter_routes_max_batch", S390FLICState,
+                       adapter_routes_max_batch, ADAPTER_ROUTES_MAX_GSI),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void s390_flic_common_realize(DeviceState *dev, Error **errp)
+{
+    uint32_t max_batch = S390_FLIC_COMMON(dev)->adapter_routes_max_batch;
+
+    if (max_batch > ADAPTER_ROUTES_MAX_GSI) {
+        error_setg(errp, "flic adapter_routes_max_batch too big"
+                   "%d (%d allowed)", max_batch, ADAPTER_ROUTES_MAX_GSI);
+    }
+}
+
+static void s390_flic_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->props = s390_flic_common_properties;
+    dc->realize = s390_flic_common_realize;
+}
+
 static const TypeInfo qemu_s390_flic_info = {
     .name          = TYPE_QEMU_S390_FLIC,
     .parent        = TYPE_S390_FLIC_COMMON,
@@ -92,10 +118,12 @@ static const TypeInfo qemu_s390_flic_info = {
     .class_init    = qemu_s390_flic_class_init,
 };
 
+
 static const TypeInfo s390_flic_common_info = {
     .name          = TYPE_S390_FLIC_COMMON,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(S390FLICState),
+    .class_init    = s390_flic_class_init,
     .class_size    = sizeof(S390FLICStateClass),
 };
 
diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
index e86a84e49a..cc44bc4e1e 100644
--- a/hw/intc/s390_flic_kvm.c
+++ b/hw/intc/s390_flic_kvm.c
@@ -293,6 +293,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
     int len = FLIC_SAVE_INITIAL_SIZE;
     void *buf;
     int count;
+    int r = 0;
 
     flic_disable_wait_pfault((struct KVMS390FLICState *) opaque);
 
@@ -303,7 +304,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
          * migration state */
         error_report("flic: couldn't allocate memory");
         qemu_put_be64(f, FLIC_FAILED);
-        return 0;
+        return -ENOMEM;
     }
 
     count = __get_all_irqs(flic, &buf, len);
@@ -314,6 +315,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
          * target system to fail when attempting to load irqs from the
          * migration state */
         qemu_put_be64(f, FLIC_FAILED);
+        r = count;
     } else {
         qemu_put_be64(f, count);
         qemu_put_buffer(f, (uint8_t *) buf,
@@ -321,7 +323,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
     }
     g_free(buf);
 
-    return 0;
+    return r;
 }
 
 /**
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 39a538d048..729c1288f1 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -161,3 +161,18 @@ gicv3_redist_write(uint32_t cpu, uint64_t offset, uint64_t data, unsigned size,
 gicv3_redist_badwrite(uint32_t cpu, uint64_t offset, uint64_t data, unsigned size, bool secure) "GICv3 redistributor %x write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u secure %d: error"
 gicv3_redist_set_irq(uint32_t cpu, int irq, int level) "GICv3 redistributor %x interrupt %d level changed to %d"
 gicv3_redist_send_sgi(uint32_t cpu, int irq) "GICv3 redistributor %x pending SGI %d"
+
+# hw/intc/armv7m_nvic.c
+nvic_recompute_state(int vectpending, int exception_prio) "NVIC state recomputed: vectpending %d exception_prio %d"
+nvic_set_prio(int irq, uint8_t prio) "NVIC set irq %d priority %d"
+nvic_irq_update(int vectpending, int pendprio, int exception_prio, int level) "NVIC vectpending %d pending prio %d exception_prio %d: setting irq line to %d"
+nvic_escalate_prio(int irq, int irqprio, int runprio) "NVIC escalating irq %d to HardFault: insufficient priority %d >= %d"
+nvic_escalate_disabled(int irq) "NVIC escalating irq %d to HardFault: disabled"
+nvic_set_pending(int irq, int en, int prio) "NVIC set pending irq %d (enabled: %d priority %d)"
+nvic_clear_pending(int irq, int en, int prio) "NVIC clear pending irq %d (enabled: %d priority %d)"
+nvic_set_pending_level(int irq) "NVIC set pending: irq %d higher prio than vectpending: setting irq line to 1"
+nvic_acknowledge_irq(int irq, int prio) "NVIC acknowledge IRQ: %d now active (prio %d)"
+nvic_complete_irq(int irq) "NVIC complete IRQ %d"
+nvic_set_irq_level(int irq, int level) "NVIC external irq %d level set to %d"
+nvic_sysreg_read(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
+nvic_sysreg_write(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 095c16a300..ffc0747c7f 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -49,40 +49,41 @@ int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
     return -1;
 }
 
-void xics_cpu_destroy(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *ss = &xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(xi, cs->cpu_index);
 
-    assert(cs->cpu_index < xics->nr_servers);
-    assert(cs == ss->cs);
+    assert(icp);
+    assert(cs == icp->cs);
 
-    ss->output = NULL;
-    ss->cs = NULL;
+    icp->output = NULL;
+    icp->cs = NULL;
 }
 
-void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    ICPState *ss = &xics->ss[cs->cpu_index];
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
+    ICPState *icp = xics_icp_get(xi, cs->cpu_index);
+    ICPStateClass *icpc;
 
-    assert(cs->cpu_index < xics->nr_servers);
+    assert(icp);
 
-    ss->cs = cs;
+    icp->cs = cs;
 
-    if (info->cpu_setup) {
-        info->cpu_setup(xics, cpu);
+    icpc = ICP_GET_CLASS(icp);
+    if (icpc->cpu_setup) {
+        icpc->cpu_setup(icp, cpu);
     }
 
     switch (PPC_INPUT(env)) {
     case PPC_FLAGS_INPUT_POWER7:
-        ss->output = env->irq_inputs[POWER7_INPUT_INT];
+        icp->output = env->irq_inputs[POWER7_INPUT_INT];
         break;
 
     case PPC_FLAGS_INPUT_970:
-        ss->output = env->irq_inputs[PPC970_INPUT_INT];
+        icp->output = env->irq_inputs[PPC970_INPUT_INT];
         break;
 
     default:
@@ -92,185 +93,43 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
     }
 }
 
-static void xics_common_pic_print_info(InterruptStatsProvider *obj,
-                                       Monitor *mon)
+void icp_pic_print_info(ICPState *icp, Monitor *mon)
 {
-    XICSState *xics = XICS_COMMON(obj);
-    ICSState *ics;
-    uint32_t i;
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        ICPState *icp = &xics->ss[i];
-
-        if (!icp->output) {
-            continue;
-        }
-        monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
-                       i, icp->xirr, icp->xirr_owner,
-                       icp->pending_priority, icp->mfrr);
-    }
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        monitor_printf(mon, "ICS %4x..%4x %p\n",
-                       ics->offset, ics->offset + ics->nr_irqs - 1, ics);
-
-        if (!ics->irqs) {
-            continue;
-        }
-
-        for (i = 0; i < ics->nr_irqs; i++) {
-            ICSIRQState *irq = ics->irqs + i;
-
-            if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
-                continue;
-            }
-            monitor_printf(mon, "  %4x %s %02x %02x\n",
-                           ics->offset + i,
-                           (irq->flags & XICS_FLAGS_IRQ_LSI) ?
-                           "LSI" : "MSI",
-                           irq->priority, irq->status);
-        }
-    }
-}
-
-/*
- * XICS Common class - parent for emulated XICS and KVM-XICS
- */
-static void xics_common_reset(DeviceState *d)
-{
-    XICSState *xics = XICS_COMMON(d);
-    ICSState *ics;
-    int i;
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        device_reset(DEVICE(&xics->ss[i]));
-    }
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        device_reset(DEVICE(ics));
-    }
-}
-
-static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name,
-                                  void *opaque, Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    int64_t value = xics->nr_irqs;
+    int cpu_index = icp->cs ? icp->cs->cpu_index : -1;
 
-    visit_type_int(v, name, &value, errp);
-}
-
-static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name,
-                                  void *opaque, Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
-    Error *error = NULL;
-    int64_t value;
-
-    visit_type_int(v, name, &value, &error);
-    if (error) {
-        error_propagate(errp, error);
+    if (!icp->output) {
         return;
     }
-    if (xics->nr_irqs) {
-        error_setg(errp, "Number of interrupts is already set to %u",
-                   xics->nr_irqs);
-        return;
-    }
-
-    assert(info->set_nr_irqs);
-    info->set_nr_irqs(xics, value, errp);
-}
-
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                         const char *typename, Error **errp)
-{
-    int i;
-
-    xics->nr_servers = nr_servers;
-
-    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
-    for (i = 0; i < xics->nr_servers; i++) {
-        char name[32];
-        ICPState *icp = &xics->ss[i];
-
-        object_initialize(icp, sizeof(*icp), typename);
-        snprintf(name, sizeof(name), "icp[%d]", i);
-        object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp);
-        icp->xics = xics;
-    }
+    monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
+                   cpu_index, icp->xirr, icp->xirr_owner,
+                   icp->pending_priority, icp->mfrr);
 }
 
-static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
-                                     const char *name, void *opaque,
-                                     Error **errp)
+void ics_pic_print_info(ICSState *ics, Monitor *mon)
 {
-    XICSState *xics = XICS_COMMON(obj);
-    int64_t value = xics->nr_servers;
-
-    visit_type_int(v, name, &value, errp);
-}
+    uint32_t i;
 
-static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
-                                     const char *name, void *opaque,
-                                     Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics);
-    Error *error = NULL;
-    int64_t value;
+    monitor_printf(mon, "ICS %4x..%4x %p\n",
+                   ics->offset, ics->offset + ics->nr_irqs - 1, ics);
 
-    visit_type_int(v, name, &value, &error);
-    if (error) {
-        error_propagate(errp, error);
+    if (!ics->irqs) {
         return;
     }
-    if (xics->nr_servers) {
-        error_setg(errp, "Number of servers is already set to %u",
-                   xics->nr_servers);
-        return;
-    }
-
-    assert(xsc->set_nr_servers);
-    xsc->set_nr_servers(xics, value, errp);
-}
-
-static void xics_common_initfn(Object *obj)
-{
-    XICSState *xics = XICS_COMMON(obj);
 
-    QLIST_INIT(&xics->ics);
-    object_property_add(obj, "nr_irqs", "int",
-                        xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
-                        NULL, NULL, NULL);
-    object_property_add(obj, "nr_servers", "int",
-                        xics_prop_get_nr_servers, xics_prop_set_nr_servers,
-                        NULL, NULL, NULL);
-}
-
-static void xics_common_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+    for (i = 0; i < ics->nr_irqs; i++) {
+        ICSIRQState *irq = ics->irqs + i;
 
-    dc->reset = xics_common_reset;
-    ic->print_info = xics_common_pic_print_info;
+        if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+            continue;
+        }
+        monitor_printf(mon, "  %4x %s %02x %02x\n",
+                       ics->offset + i,
+                       (irq->flags & XICS_FLAGS_IRQ_LSI) ?
+                       "LSI" : "MSI",
+                       irq->priority, irq->status);
+    }
 }
 
-static const TypeInfo xics_common_info = {
-    .name          = TYPE_XICS_COMMON,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(XICSState),
-    .class_size    = sizeof(XICSStateClass),
-    .instance_init = xics_common_initfn,
-    .class_init    = xics_common_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_INTERRUPT_STATS_PROVIDER },
-        { }
-    },
-};
-
 /*
  * ICP: Presentation layer
  */
@@ -278,8 +137,8 @@ static const TypeInfo xics_common_info = {
 #define XISR_MASK  0x00ffffff
 #define CPPR_MASK  0xff000000
 
-#define XISR(ss)   (((ss)->xirr) & XISR_MASK)
-#define CPPR(ss)   (((ss)->xirr) >> 24)
+#define XISR(icp)   (((icp)->xirr) & XISR_MASK)
+#define CPPR(icp)   (((icp)->xirr) >> 24)
 
 static void ics_reject(ICSState *ics, uint32_t nr)
 {
@@ -290,7 +149,7 @@ static void ics_reject(ICSState *ics, uint32_t nr)
     }
 }
 
-static void ics_resend(ICSState *ics)
+void ics_resend(ICSState *ics)
 {
     ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
 
@@ -308,151 +167,152 @@ static void ics_eoi(ICSState *ics, int nr)
     }
 }
 
-static void icp_check_ipi(ICPState *ss)
+static void icp_check_ipi(ICPState *icp)
 {
-    if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+    if (XISR(icp) && (icp->pending_priority <= icp->mfrr)) {
         return;
     }
 
-    trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
+    trace_xics_icp_check_ipi(icp->cs->cpu_index, icp->mfrr);
 
-    if (XISR(ss) && ss->xirr_owner) {
-        ics_reject(ss->xirr_owner, XISR(ss));
+    if (XISR(icp) && icp->xirr_owner) {
+        ics_reject(icp->xirr_owner, XISR(icp));
     }
 
-    ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
-    ss->pending_priority = ss->mfrr;
-    ss->xirr_owner = NULL;
-    qemu_irq_raise(ss->output);
+    icp->xirr = (icp->xirr & ~XISR_MASK) | XICS_IPI;
+    icp->pending_priority = icp->mfrr;
+    icp->xirr_owner = NULL;
+    qemu_irq_raise(icp->output);
 }
 
-static void icp_resend(ICPState *ss)
+void icp_resend(ICPState *icp)
 {
-    ICSState *ics;
+    XICSFabric *xi = icp->xics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
 
-    if (ss->mfrr < CPPR(ss)) {
-        icp_check_ipi(ss);
-    }
-    QLIST_FOREACH(ics, &ss->xics->ics, list) {
-        ics_resend(ics);
+    if (icp->mfrr < CPPR(icp)) {
+        icp_check_ipi(icp);
     }
+
+    xic->ics_resend(xi);
 }
 
-void icp_set_cppr(ICPState *ss, uint8_t cppr)
+void icp_set_cppr(ICPState *icp, uint8_t cppr)
 {
     uint8_t old_cppr;
     uint32_t old_xisr;
 
-    old_cppr = CPPR(ss);
-    ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+    old_cppr = CPPR(icp);
+    icp->xirr = (icp->xirr & ~CPPR_MASK) | (cppr << 24);
 
     if (cppr < old_cppr) {
-        if (XISR(ss) && (cppr <= ss->pending_priority)) {
-            old_xisr = XISR(ss);
-            ss->xirr &= ~XISR_MASK; /* Clear XISR */
-            ss->pending_priority = 0xff;
-            qemu_irq_lower(ss->output);
-            if (ss->xirr_owner) {
-                ics_reject(ss->xirr_owner, old_xisr);
-                ss->xirr_owner = NULL;
+        if (XISR(icp) && (cppr <= icp->pending_priority)) {
+            old_xisr = XISR(icp);
+            icp->xirr &= ~XISR_MASK; /* Clear XISR */
+            icp->pending_priority = 0xff;
+            qemu_irq_lower(icp->output);
+            if (icp->xirr_owner) {
+                ics_reject(icp->xirr_owner, old_xisr);
+                icp->xirr_owner = NULL;
             }
         }
     } else {
-        if (!XISR(ss)) {
-            icp_resend(ss);
+        if (!XISR(icp)) {
+            icp_resend(icp);
         }
     }
 }
 
-void icp_set_mfrr(ICPState *ss, uint8_t mfrr)
+void icp_set_mfrr(ICPState *icp, uint8_t mfrr)
 {
-    ss->mfrr = mfrr;
-    if (mfrr < CPPR(ss)) {
-        icp_check_ipi(ss);
+    icp->mfrr = mfrr;
+    if (mfrr < CPPR(icp)) {
+        icp_check_ipi(icp);
     }
 }
 
-uint32_t icp_accept(ICPState *ss)
+uint32_t icp_accept(ICPState *icp)
 {
-    uint32_t xirr = ss->xirr;
+    uint32_t xirr = icp->xirr;
 
-    qemu_irq_lower(ss->output);
-    ss->xirr = ss->pending_priority << 24;
-    ss->pending_priority = 0xff;
-    ss->xirr_owner = NULL;
+    qemu_irq_lower(icp->output);
+    icp->xirr = icp->pending_priority << 24;
+    icp->pending_priority = 0xff;
+    icp->xirr_owner = NULL;
 
-    trace_xics_icp_accept(xirr, ss->xirr);
+    trace_xics_icp_accept(xirr, icp->xirr);
 
     return xirr;
 }
 
-uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
+uint32_t icp_ipoll(ICPState *icp, uint32_t *mfrr)
 {
     if (mfrr) {
-        *mfrr = ss->mfrr;
+        *mfrr = icp->mfrr;
     }
-    return ss->xirr;
+    return icp->xirr;
 }
 
-void icp_eoi(ICPState *ss, uint32_t xirr)
+void icp_eoi(ICPState *icp, uint32_t xirr)
 {
+    XICSFabric *xi = icp->xics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
     ICSState *ics;
     uint32_t irq;
 
     /* Send EOI -> ICS */
-    ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
-    trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr);
+    icp->xirr = (icp->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+    trace_xics_icp_eoi(icp->cs->cpu_index, xirr, icp->xirr);
     irq = xirr & XISR_MASK;
-    QLIST_FOREACH(ics, &ss->xics->ics, list) {
-        if (ics_valid_irq(ics, irq)) {
-            ics_eoi(ics, irq);
-        }
+
+    ics = xic->ics_get(xi, irq);
+    if (ics) {
+        ics_eoi(ics, irq);
     }
-    if (!XISR(ss)) {
-        icp_resend(ss);
+    if (!XISR(icp)) {
+        icp_resend(icp);
     }
 }
 
 static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
 {
-    XICSState *xics = ics->xics;
-    ICPState *ss = xics->ss + server;
+    ICPState *icp = xics_icp_get(ics->xics, server);
 
     trace_xics_icp_irq(server, nr, priority);
 
-    if ((priority >= CPPR(ss))
-        || (XISR(ss) && (ss->pending_priority <= priority))) {
+    if ((priority >= CPPR(icp))
+        || (XISR(icp) && (icp->pending_priority <= priority))) {
         ics_reject(ics, nr);
     } else {
-        if (XISR(ss) && ss->xirr_owner) {
-            ics_reject(ss->xirr_owner, XISR(ss));
-            ss->xirr_owner = NULL;
+        if (XISR(icp) && icp->xirr_owner) {
+            ics_reject(icp->xirr_owner, XISR(icp));
+            icp->xirr_owner = NULL;
         }
-        ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
-        ss->xirr_owner = ics;
-        ss->pending_priority = priority;
-        trace_xics_icp_raise(ss->xirr, ss->pending_priority);
-        qemu_irq_raise(ss->output);
+        icp->xirr = (icp->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+        icp->xirr_owner = ics;
+        icp->pending_priority = priority;
+        trace_xics_icp_raise(icp->xirr, icp->pending_priority);
+        qemu_irq_raise(icp->output);
     }
 }
 
 static void icp_dispatch_pre_save(void *opaque)
 {
-    ICPState *ss = opaque;
-    ICPStateClass *info = ICP_GET_CLASS(ss);
+    ICPState *icp = opaque;
+    ICPStateClass *info = ICP_GET_CLASS(icp);
 
     if (info->pre_save) {
-        info->pre_save(ss);
+        info->pre_save(icp);
     }
 }
 
 static int icp_dispatch_post_load(void *opaque, int version_id)
 {
-    ICPState *ss = opaque;
-    ICPStateClass *info = ICP_GET_CLASS(ss);
+    ICPState *icp = opaque;
+    ICPStateClass *info = ICP_GET_CLASS(icp);
 
     if (info->post_load) {
-        return info->post_load(ss, version_id);
+        return info->post_load(icp, version_id);
     }
 
     return 0;
@@ -485,12 +345,30 @@ static void icp_reset(DeviceState *dev)
     qemu_set_irq(icp->output, 0);
 }
 
+static void icp_realize(DeviceState *dev, Error **errp)
+{
+    ICPState *icp = ICP(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xics", &err);
+    if (!obj) {
+        error_setg(errp, "%s: required link 'xics' not found: %s",
+                   __func__, error_get_pretty(err));
+        return;
+    }
+
+    icp->xics = XICS_FABRIC(obj);
+}
+
+
 static void icp_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->reset = icp_reset;
     dc->vmsd = &vmstate_icp_server;
+    dc->realize = icp_realize;
 }
 
 static const TypeInfo icp_info = {
@@ -663,17 +541,6 @@ static void ics_simple_reset(DeviceState *dev)
     }
 }
 
-static int ics_simple_post_load(ICSState *ics, int version_id)
-{
-    int i;
-
-    for (i = 0; i < ics->xics->nr_servers; i++) {
-        icp_resend(&ics->xics->ss[i]);
-    }
-
-    return 0;
-}
-
 static void ics_simple_dispatch_pre_save(void *opaque)
 {
     ICSState *ics = opaque;
@@ -746,15 +613,20 @@ static void ics_simple_realize(DeviceState *dev, Error **errp)
     ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
 }
 
+static Property ics_simple_properties[] = {
+    DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void ics_simple_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICSStateClass *isc = ICS_BASE_CLASS(klass);
 
-    dc->realize = ics_simple_realize;
+    isc->realize = ics_simple_realize;
+    dc->props = ics_simple_properties;
     dc->vmsd = &vmstate_ics_simple;
     dc->reset = ics_simple_reset;
-    isc->post_load = ics_simple_post_load;
     isc->reject = ics_simple_reject;
     isc->resend = ics_simple_resend;
     isc->eoi = ics_simple_eoi;
@@ -769,38 +641,69 @@ static const TypeInfo ics_simple_info = {
     .instance_init = ics_simple_initfn,
 };
 
+static void ics_base_realize(DeviceState *dev, Error **errp)
+{
+    ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
+    ICSState *ics = ICS_BASE(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xics", &err);
+    if (!obj) {
+        error_setg(errp, "%s: required link 'xics' not found: %s",
+                   __func__, error_get_pretty(err));
+        return;
+    }
+    ics->xics = XICS_FABRIC(obj);
+
+
+    if (icsc->realize) {
+        icsc->realize(dev, errp);
+    }
+}
+
+static void ics_base_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = ics_base_realize;
+}
+
 static const TypeInfo ics_base_info = {
     .name = TYPE_ICS_BASE,
     .parent = TYPE_DEVICE,
     .abstract = true,
     .instance_size = sizeof(ICSState),
+    .class_init = ics_base_class_init,
     .class_size = sizeof(ICSStateClass),
 };
 
+static const TypeInfo xics_fabric_info = {
+    .name = TYPE_XICS_FABRIC,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(XICSFabricClass),
+};
+
 /*
  * Exported functions
  */
-ICSState *xics_find_source(XICSState *xics, int irq)
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq)
 {
-    ICSState *ics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
+    ICSState *ics = xic->ics_get(xi, irq);
 
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        if (ics_valid_irq(ics, irq)) {
-            return ics;
-        }
+    if (ics) {
+        return ics->qirqs[irq - ics->offset];
     }
+
     return NULL;
 }
 
-qemu_irq xics_get_qirq(XICSState *xics, int irq)
+ICPState *xics_icp_get(XICSFabric *xi, int server)
 {
-    ICSState *ics = xics_find_source(xics, irq);
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
 
-    if (ics) {
-        return ics->qirqs[irq - ics->offset];
-    }
-
-    return NULL;
+    return xic->icp_get(xi, server);
 }
 
 void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
@@ -813,10 +716,10 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
 
 static void xics_register_types(void)
 {
-    type_register_static(&xics_common_info);
     type_register_static(&ics_simple_info);
     type_register_static(&ics_base_info);
     type_register_static(&icp_info);
+    type_register_static(&xics_fabric_info);
 }
 
 type_init(xics_register_types)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 17694eaa87..0a3daca3bb 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -40,16 +40,12 @@
 
 #include <sys/ioctl.h>
 
-typedef struct KVMXICSState {
-    XICSState parent_obj;
-
-    int kernel_xics_fd;
-} KVMXICSState;
+static int kernel_xics_fd = -1;
 
 /*
  * ICP-KVM
  */
-static void icp_get_kvm_state(ICPState *ss)
+static void icp_get_kvm_state(ICPState *icp)
 {
     uint64_t state;
     struct kvm_one_reg reg = {
@@ -59,25 +55,25 @@ static void icp_get_kvm_state(ICPState *ss)
     int ret;
 
     /* ICP for this CPU thread is not in use, exiting */
-    if (!ss->cs) {
+    if (!icp->cs) {
         return;
     }
 
-    ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, &reg);
+    ret = kvm_vcpu_ioctl(icp->cs, KVM_GET_ONE_REG, &reg);
     if (ret != 0) {
         error_report("Unable to retrieve KVM interrupt controller state"
-                " for CPU %ld: %s", kvm_arch_vcpu_id(ss->cs), strerror(errno));
+                " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno));
         exit(1);
     }
 
-    ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
-    ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
+    icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
+    icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
         & KVM_REG_PPC_ICP_MFRR_MASK;
-    ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
+    icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
         & KVM_REG_PPC_ICP_PPRI_MASK;
 }
 
-static int icp_set_kvm_state(ICPState *ss, int version_id)
+static int icp_set_kvm_state(ICPState *icp, int version_id)
 {
     uint64_t state;
     struct kvm_one_reg reg = {
@@ -87,18 +83,18 @@ static int icp_set_kvm_state(ICPState *ss, int version_id)
     int ret;
 
     /* ICP for this CPU thread is not in use, exiting */
-    if (!ss->cs) {
+    if (!icp->cs) {
         return 0;
     }
 
-    state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
-        | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
-        | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
+    state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
+        | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
+        | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
 
-    ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, &reg);
+    ret = kvm_vcpu_ioctl(icp->cs, KVM_SET_ONE_REG, &reg);
     if (ret != 0) {
         error_report("Unable to restore KVM interrupt controller state (0x%"
-                PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(ss->cs),
+                PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(icp->cs),
                 strerror(errno));
         return ret;
     }
@@ -122,6 +118,34 @@ static void icp_kvm_reset(DeviceState *dev)
     icp_set_kvm_state(icp, 1);
 }
 
+static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    int ret;
+
+    if (kernel_xics_fd == -1) {
+        abort();
+    }
+
+    /*
+     * If we are reusing a parked vCPU fd corresponding to the CPU
+     * which was hot-removed earlier we don't have to renable
+     * KVM_CAP_IRQ_XICS capability again.
+     */
+    if (icp->cap_irq_xics_enabled) {
+        return;
+    }
+
+    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
+                              kvm_arch_vcpu_id(cs));
+    if (ret < 0) {
+        error_report("Unable to connect CPU%ld to kernel XICS: %s",
+                     kvm_arch_vcpu_id(cs), strerror(errno));
+        exit(1);
+    }
+    icp->cap_irq_xics_enabled = true;
+}
+
 static void icp_kvm_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -130,6 +154,7 @@ static void icp_kvm_class_init(ObjectClass *klass, void *data)
     dc->reset = icp_kvm_reset;
     icpc->pre_save = icp_get_kvm_state;
     icpc->post_load = icp_set_kvm_state;
+    icpc->cpu_setup = icp_kvm_cpu_setup;
 }
 
 static const TypeInfo icp_kvm_info = {
@@ -145,7 +170,6 @@ static const TypeInfo icp_kvm_info = {
  */
 static void ics_get_kvm_state(ICSState *ics)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
     uint64_t state;
     struct kvm_device_attr attr = {
         .flags = 0,
@@ -160,7 +184,7 @@ static void ics_get_kvm_state(ICSState *ics)
 
         attr.attr = i + ics->offset;
 
-        ret = ioctl(xicskvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
+        ret = ioctl(kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
         if (ret != 0) {
             error_report("Unable to retrieve KVM interrupt controller state"
                     " for IRQ %d: %s", i + ics->offset, strerror(errno));
@@ -204,7 +228,6 @@ static void ics_get_kvm_state(ICSState *ics)
 
 static int ics_set_kvm_state(ICSState *ics, int version_id)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
     uint64_t state;
     struct kvm_device_attr attr = {
         .flags = 0,
@@ -238,7 +261,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
             }
         }
 
-        ret = ioctl(xicskvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
+        ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
         if (ret != 0) {
             error_report("Unable to restore KVM interrupt controller state"
                     " for IRQs %d: %s", i + ics->offset, strerror(errno));
@@ -308,7 +331,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICSStateClass *icsc = ICS_BASE_CLASS(klass);
 
-    dc->realize = ics_kvm_realize;
+    icsc->realize = ics_kvm_realize;
     dc->reset = ics_kvm_reset;
     icsc->pre_save = ics_get_kvm_state;
     icsc->post_load = ics_set_kvm_state;
@@ -324,57 +347,6 @@ static const TypeInfo ics_kvm_info = {
 /*
  * XICS-KVM
  */
-static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
-{
-    CPUState *cs;
-    ICPState *ss;
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(xics);
-    int ret;
-
-    cs = CPU(cpu);
-    ss = &xics->ss[cs->cpu_index];
-
-    assert(cs->cpu_index < xics->nr_servers);
-    if (xicskvm->kernel_xics_fd == -1) {
-        abort();
-    }
-
-    /*
-     * If we are reusing a parked vCPU fd corresponding to the CPU
-     * which was hot-removed earlier we don't have to renable
-     * KVM_CAP_IRQ_XICS capability again.
-     */
-    if (ss->cap_irq_xics_enabled) {
-        return;
-    }
-
-    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, xicskvm->kernel_xics_fd,
-                              kvm_arch_vcpu_id(cs));
-    if (ret < 0) {
-        error_report("Unable to connect CPU%ld to kernel XICS: %s",
-                     kvm_arch_vcpu_id(cs), strerror(errno));
-        exit(1);
-    }
-    ss->cap_irq_xics_enabled = true;
-}
-
-static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
-                                 Error **errp)
-{
-    ICSState *ics = QLIST_FIRST(&xics->ics);
-
-    /* This needs to be deprecated ... */
-    xics->nr_irqs = nr_irqs;
-    if (ics) {
-        ics->nr_irqs = nr_irqs;
-    }
-}
-
-static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                                    Error **errp)
-{
-    xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp);
-}
 
 static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                        uint32_t token,
@@ -385,13 +357,9 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                  __func__);
 }
 
-static void xics_kvm_realize(DeviceState *dev, Error **errp)
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev);
-    XICSState *xics = XICS_COMMON(dev);
-    ICSState *ics;
-    int i, rc;
-    Error *error = NULL;
+    int rc;
     struct kvm_create_device xics_create_device = {
         .type = KVM_DEV_TYPE_XICS,
         .flags = 0,
@@ -439,72 +407,24 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
         goto fail;
     }
 
-    xicskvm->kernel_xics_fd = xics_create_device.fd;
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        object_property_set_bool(OBJECT(ics), true, "realized", &error);
-        if (error) {
-            error_propagate(errp, error);
-            goto fail;
-        }
-    }
-
-    assert(xics->nr_servers);
-    for (i = 0; i < xics->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
-                                 &error);
-        if (error) {
-            error_propagate(errp, error);
-            goto fail;
-        }
-    }
+    kernel_xics_fd = xics_create_device.fd;
 
     kvm_kernel_irqchip = true;
     kvm_msi_via_irqfd_allowed = true;
     kvm_gsi_direct_mapping = true;
 
-    return;
+    return rc;
 
 fail:
     kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
     kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
     kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
     kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
+    return -1;
 }
 
-static void xics_kvm_initfn(Object *obj)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    ICSState *ics;
-
-    ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM));
-    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
-    ics->xics = xics;
-    QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_kvm_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    XICSStateClass *xsc = XICS_COMMON_CLASS(oc);
-
-    dc->realize = xics_kvm_realize;
-    xsc->cpu_setup = xics_kvm_cpu_setup;
-    xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
-    xsc->set_nr_servers = xics_kvm_set_nr_servers;
-}
-
-static const TypeInfo xics_spapr_kvm_info = {
-    .name          = TYPE_XICS_SPAPR_KVM,
-    .parent        = TYPE_XICS_COMMON,
-    .instance_size = sizeof(KVMXICSState),
-    .class_init    = xics_kvm_class_init,
-    .instance_init = xics_kvm_initfn,
-};
-
 static void xics_kvm_register_types(void)
 {
-    type_register_static(&xics_spapr_kvm_info);
     type_register_static(&ics_kvm_info);
     type_register_static(&icp_kvm_info);
 }
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e3f1c5e95..84d24b2837 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -44,7 +44,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     target_ulong cppr = args[0];
 
     icp_set_cppr(icp, cppr);
@@ -56,12 +56,13 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     target_ulong server = xics_get_cpu_index_by_dt_id(args[0]);
     target_ulong mfrr = args[1];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), server);
 
-    if (server >= spapr->xics->nr_servers) {
+    if (!icp) {
         return H_PARAMETER;
     }
 
-    icp_set_mfrr(spapr->xics->ss + server, mfrr);
+    icp_set_mfrr(icp, mfrr);
     return H_SUCCESS;
 }
 
@@ -69,7 +70,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
@@ -80,7 +81,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
@@ -92,7 +93,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     target_ulong xirr = args[0];
 
     icp_eoi(icp, xirr);
@@ -103,7 +104,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t mfrr;
     uint32_t xirr = icp_ipoll(icp, &mfrr);
 
@@ -118,7 +119,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno, server, priority;
 
     if ((nargs != 3) || (nret != 1)) {
@@ -134,7 +135,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1));
     priority = rtas_ld(args, 2);
 
-    if (!ics_valid_irq(ics, nr) || (server >= ics->xics->nr_servers)
+    if (!ics_valid_irq(ics, nr) || !xics_icp_get(XICS_FABRIC(spapr), server)
         || (priority > 0xff)) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
@@ -151,7 +152,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 3)) {
@@ -181,7 +182,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t nargs, target_ulong args,
                          uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 1)) {
@@ -212,7 +213,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                         uint32_t nargs, target_ulong args,
                         uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 1)) {
@@ -239,36 +240,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
-                                   Error **errp)
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
-
-    /* This needs to be deprecated ... */
-    xics->nr_irqs = nr_irqs;
-    if (ics) {
-        ics->nr_irqs = nr_irqs;
-    }
-}
-
-static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                                      Error **errp)
-{
-    xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp);
-}
-
-static void xics_spapr_realize(DeviceState *dev, Error **errp)
-{
-    XICSState *xics = XICS_SPAPR(dev);
-    ICSState *ics;
-    Error *error = NULL;
-    int i;
-
-    if (!xics->nr_servers) {
-        error_setg(errp, "Number of servers needs to be greater 0");
-        return;
-    }
-
     /* Registration of global state belongs into realize */
     spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
     spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
@@ -281,55 +254,9 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
     spapr_register_hypercall(H_XIRR_X, h_xirr_x);
     spapr_register_hypercall(H_EOI, h_eoi);
     spapr_register_hypercall(H_IPOLL, h_ipoll);
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        object_property_set_bool(OBJECT(ics), true, "realized", &error);
-        if (error) {
-            error_propagate(errp, error);
-            return;
-        }
-    }
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
-                                 &error);
-        if (error) {
-            error_propagate(errp, error);
-            return;
-        }
-    }
-}
-
-static void xics_spapr_initfn(Object *obj)
-{
-    XICSState *xics = XICS_SPAPR(obj);
-    ICSState *ics;
-
-    ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE));
-    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
-    ics->xics = xics;
-    QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_spapr_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
-
-    dc->realize = xics_spapr_realize;
-    xsc->set_nr_irqs = xics_spapr_set_nr_irqs;
-    xsc->set_nr_servers = xics_spapr_set_nr_servers;
+    return 0;
 }
 
-static const TypeInfo xics_spapr_info = {
-    .name          = TYPE_XICS_SPAPR,
-    .parent        = TYPE_XICS_COMMON,
-    .instance_size = sizeof(XICSState),
-    .class_size = sizeof(XICSStateClass),
-    .class_init    = xics_spapr_class_init,
-    .instance_init = xics_spapr_initfn,
-};
-
 #define ICS_IRQ_FREE(ics, srcno)   \
     (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
 
@@ -354,9 +281,8 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
     return -1;
 }
 
-int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
     int irq;
 
     if (!ics) {
@@ -387,10 +313,9 @@ int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
  * Allocate block of consecutive IRQs, and return the number of the first IRQ in
  * the block. If align==true, aligns the first IRQ number to num.
  */
-int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align,
-                           Error **errp)
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
+                          bool align, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
     int i, first = -1;
 
     if (!ics) {
@@ -440,20 +365,18 @@ static void ics_free(ICSState *ics, int srcno, int num)
     }
 }
 
-void xics_spapr_free(XICSState *xics, int irq, int num)
+void spapr_ics_free(ICSState *ics, int irq, int num)
 {
-    ICSState *ics = xics_find_source(xics, irq);
-
-    if (ics) {
+    if (ics_valid_irq(ics, irq)) {
         trace_xics_ics_free(0, irq, num);
         ics_free(ics, irq - ics->offset, num);
     }
 }
 
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
 {
     uint32_t interrupt_server_ranges_prop[] = {
-        0, cpu_to_be32(xics->nr_servers),
+        0, cpu_to_be32(nr_servers),
     };
     int node;
 
@@ -470,10 +393,3 @@ void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
     _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
     _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
 }
-
-static void xics_spapr_register_types(void)
-{
-    type_register_static(&xics_spapr_info);
-}
-
-type_init(xics_spapr_register_types)
diff --git a/hw/mips/Makefile.objs b/hw/mips/Makefile.objs
index 9352a1c062..48cd2ef50e 100644
--- a/hw/mips/Makefile.objs
+++ b/hw/mips/Makefile.objs
@@ -4,3 +4,4 @@ obj-$(CONFIG_JAZZ) += mips_jazz.o
 obj-$(CONFIG_FULONG) += mips_fulong2e.o
 obj-y += gt64xxx_pci.o
 obj-$(CONFIG_MIPS_CPS) += cps.o
+obj-$(CONFIG_MIPS_BOSTON) += boston.o
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
new file mode 100644
index 0000000000..83f7b82386
--- /dev/null
+++ b/hw/mips/boston.c
@@ -0,0 +1,577 @@
+/*
+ * MIPS Boston development board emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "exec/address-spaces.h"
+#include "hw/boards.h"
+#include "hw/char/serial.h"
+#include "hw/hw.h"
+#include "hw/ide/pci.h"
+#include "hw/ide/ahci.h"
+#include "hw/loader.h"
+#include "hw/loader-fit.h"
+#include "hw/mips/cps.h"
+#include "hw/mips/cpudevs.h"
+#include "hw/pci-host/xilinx-pcie.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "sysemu/char.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/qtest.h"
+
+#include <libfdt.h>
+
+#define TYPE_MIPS_BOSTON "mips-boston"
+#define BOSTON(obj) OBJECT_CHECK(BostonState, (obj), TYPE_MIPS_BOSTON)
+
+typedef struct {
+    SysBusDevice parent_obj;
+
+    MachineState *mach;
+    MIPSCPSState *cps;
+    SerialState *uart;
+
+    CharBackend lcd_display;
+    char lcd_content[8];
+    bool lcd_inited;
+
+    hwaddr kernel_entry;
+    hwaddr fdt_base;
+} BostonState;
+
+enum boston_plat_reg {
+    PLAT_FPGA_BUILD     = 0x00,
+    PLAT_CORE_CL        = 0x04,
+    PLAT_WRAPPER_CL     = 0x08,
+    PLAT_SYSCLK_STATUS  = 0x0c,
+    PLAT_SOFTRST_CTL    = 0x10,
+#define PLAT_SOFTRST_CTL_SYSRESET       (1 << 4)
+    PLAT_DDR3_STATUS    = 0x14,
+#define PLAT_DDR3_STATUS_LOCKED         (1 << 0)
+#define PLAT_DDR3_STATUS_CALIBRATED     (1 << 2)
+    PLAT_PCIE_STATUS    = 0x18,
+#define PLAT_PCIE_STATUS_PCIE0_LOCKED   (1 << 0)
+#define PLAT_PCIE_STATUS_PCIE1_LOCKED   (1 << 8)
+#define PLAT_PCIE_STATUS_PCIE2_LOCKED   (1 << 16)
+    PLAT_FLASH_CTL      = 0x1c,
+    PLAT_SPARE0         = 0x20,
+    PLAT_SPARE1         = 0x24,
+    PLAT_SPARE2         = 0x28,
+    PLAT_SPARE3         = 0x2c,
+    PLAT_MMCM_DIV       = 0x30,
+#define PLAT_MMCM_DIV_CLK0DIV_SHIFT     0
+#define PLAT_MMCM_DIV_INPUT_SHIFT       8
+#define PLAT_MMCM_DIV_MUL_SHIFT         16
+#define PLAT_MMCM_DIV_CLK1DIV_SHIFT     24
+    PLAT_BUILD_CFG      = 0x34,
+#define PLAT_BUILD_CFG_IOCU_EN          (1 << 0)
+#define PLAT_BUILD_CFG_PCIE0_EN         (1 << 1)
+#define PLAT_BUILD_CFG_PCIE1_EN         (1 << 2)
+#define PLAT_BUILD_CFG_PCIE2_EN         (1 << 3)
+    PLAT_DDR_CFG        = 0x38,
+#define PLAT_DDR_CFG_SIZE               (0xf << 0)
+#define PLAT_DDR_CFG_MHZ                (0xfff << 4)
+    PLAT_NOC_PCIE0_ADDR = 0x3c,
+    PLAT_NOC_PCIE1_ADDR = 0x40,
+    PLAT_NOC_PCIE2_ADDR = 0x44,
+    PLAT_SYS_CTL        = 0x48,
+};
+
+static void boston_lcd_event(void *opaque, int event)
+{
+    BostonState *s = opaque;
+    if (event == CHR_EVENT_OPENED && !s->lcd_inited) {
+        qemu_chr_fe_printf(&s->lcd_display, "        ");
+        s->lcd_inited = true;
+    }
+}
+
+static uint64_t boston_lcd_read(void *opaque, hwaddr addr,
+                                unsigned size)
+{
+    BostonState *s = opaque;
+    uint64_t val = 0;
+
+    switch (size) {
+    case 8:
+        val |= (uint64_t)s->lcd_content[(addr + 7) & 0x7] << 56;
+        val |= (uint64_t)s->lcd_content[(addr + 6) & 0x7] << 48;
+        val |= (uint64_t)s->lcd_content[(addr + 5) & 0x7] << 40;
+        val |= (uint64_t)s->lcd_content[(addr + 4) & 0x7] << 32;
+        /* fall through */
+    case 4:
+        val |= (uint64_t)s->lcd_content[(addr + 3) & 0x7] << 24;
+        val |= (uint64_t)s->lcd_content[(addr + 2) & 0x7] << 16;
+        /* fall through */
+    case 2:
+        val |= (uint64_t)s->lcd_content[(addr + 1) & 0x7] << 8;
+        /* fall through */
+    case 1:
+        val |= (uint64_t)s->lcd_content[(addr + 0) & 0x7];
+        break;
+    }
+
+    return val;
+}
+
+static void boston_lcd_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    BostonState *s = opaque;
+
+    switch (size) {
+    case 8:
+        s->lcd_content[(addr + 7) & 0x7] = val >> 56;
+        s->lcd_content[(addr + 6) & 0x7] = val >> 48;
+        s->lcd_content[(addr + 5) & 0x7] = val >> 40;
+        s->lcd_content[(addr + 4) & 0x7] = val >> 32;
+        /* fall through */
+    case 4:
+        s->lcd_content[(addr + 3) & 0x7] = val >> 24;
+        s->lcd_content[(addr + 2) & 0x7] = val >> 16;
+        /* fall through */
+    case 2:
+        s->lcd_content[(addr + 1) & 0x7] = val >> 8;
+        /* fall through */
+    case 1:
+        s->lcd_content[(addr + 0) & 0x7] = val;
+        break;
+    }
+
+    qemu_chr_fe_printf(&s->lcd_display,
+                       "\r%-8.8s", s->lcd_content);
+}
+
+static const MemoryRegionOps boston_lcd_ops = {
+    .read = boston_lcd_read,
+    .write = boston_lcd_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static uint64_t boston_platreg_read(void *opaque, hwaddr addr,
+                                    unsigned size)
+{
+    BostonState *s = opaque;
+    uint32_t gic_freq, val;
+
+    if (size != 4) {
+        qemu_log_mask(LOG_UNIMP, "%uB platform register read", size);
+        return 0;
+    }
+
+    switch (addr & 0xffff) {
+    case PLAT_FPGA_BUILD:
+    case PLAT_CORE_CL:
+    case PLAT_WRAPPER_CL:
+        return 0;
+    case PLAT_DDR3_STATUS:
+        return PLAT_DDR3_STATUS_LOCKED | PLAT_DDR3_STATUS_CALIBRATED;
+    case PLAT_MMCM_DIV:
+        gic_freq = mips_gictimer_get_freq(s->cps->gic.gic_timer) / 1000000;
+        val = gic_freq << PLAT_MMCM_DIV_INPUT_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_MUL_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_CLK0DIV_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_CLK1DIV_SHIFT;
+        return val;
+    case PLAT_BUILD_CFG:
+        val = PLAT_BUILD_CFG_PCIE0_EN;
+        val |= PLAT_BUILD_CFG_PCIE1_EN;
+        val |= PLAT_BUILD_CFG_PCIE2_EN;
+        return val;
+    case PLAT_DDR_CFG:
+        val = s->mach->ram_size / G_BYTE;
+        assert(!(val & ~PLAT_DDR_CFG_SIZE));
+        val |= PLAT_DDR_CFG_MHZ;
+        return val;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Read platform register 0x%" HWADDR_PRIx,
+                      addr & 0xffff);
+        return 0;
+    }
+}
+
+static void boston_platreg_write(void *opaque, hwaddr addr,
+                                 uint64_t val, unsigned size)
+{
+    if (size != 4) {
+        qemu_log_mask(LOG_UNIMP, "%uB platform register write", size);
+        return;
+    }
+
+    switch (addr & 0xffff) {
+    case PLAT_FPGA_BUILD:
+    case PLAT_CORE_CL:
+    case PLAT_WRAPPER_CL:
+    case PLAT_DDR3_STATUS:
+    case PLAT_PCIE_STATUS:
+    case PLAT_MMCM_DIV:
+    case PLAT_BUILD_CFG:
+    case PLAT_DDR_CFG:
+        /* read only */
+        break;
+    case PLAT_SOFTRST_CTL:
+        if (val & PLAT_SOFTRST_CTL_SYSRESET) {
+            qemu_system_reset_request();
+        }
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Write platform register 0x%" HWADDR_PRIx
+                      " = 0x%" PRIx64, addr & 0xffff, val);
+        break;
+    }
+}
+
+static const MemoryRegionOps boston_platreg_ops = {
+    .read = boston_platreg_read,
+    .write = boston_platreg_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void boston_flash_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned size)
+{
+}
+
+static const MemoryRegionOps boston_flash_ops = {
+    .write = boston_flash_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const TypeInfo boston_device = {
+    .name          = TYPE_MIPS_BOSTON,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BostonState),
+};
+
+static void boston_register_types(void)
+{
+    type_register_static(&boston_device);
+}
+type_init(boston_register_types)
+
+static void gen_firmware(uint32_t *p, hwaddr kernel_entry, hwaddr fdt_addr,
+                         bool is_64b)
+{
+    const uint32_t cm_base = 0x16100000;
+    const uint32_t gic_base = 0x16120000;
+    const uint32_t cpc_base = 0x16200000;
+
+    /* Move CM GCRs */
+    if (is_64b) {
+        stl_p(p++, 0x40287803);                 /* dmfc0 $8, CMGCRBase */
+        stl_p(p++, 0x00084138);                 /* dsll $8, $8, 4 */
+    } else {
+        stl_p(p++, 0x40087803);                 /* mfc0 $8, CMGCRBase */
+        stl_p(p++, 0x00084100);                 /* sll  $8, $8, 4 */
+    }
+    stl_p(p++, 0x3c09a000);                     /* lui  $9, 0xa000 */
+    stl_p(p++, 0x01094025);                     /* or   $8, $9 */
+    stl_p(p++, 0x3c0a0000 | (cm_base >> 16));   /* lui  $10, cm_base >> 16 */
+    if (is_64b) {
+        stl_p(p++, 0xfd0a0008);                 /* sd   $10, 0x8($8) */
+    } else {
+        stl_p(p++, 0xad0a0008);                 /* sw   $10, 0x8($8) */
+    }
+    stl_p(p++, 0x012a4025);                     /* or   $8, $10 */
+
+    /* Move & enable GIC GCRs */
+    stl_p(p++, 0x3c090000 | (gic_base >> 16));  /* lui  $9, gic_base >> 16 */
+    stl_p(p++, 0x35290001);                     /* ori  $9, 0x1 */
+    if (is_64b) {
+        stl_p(p++, 0xfd090080);                 /* sd   $9, 0x80($8) */
+    } else {
+        stl_p(p++, 0xad090080);                 /* sw   $9, 0x80($8) */
+    }
+
+    /* Move & enable CPC GCRs */
+    stl_p(p++, 0x3c090000 | (cpc_base >> 16));  /* lui  $9, cpc_base >> 16 */
+    stl_p(p++, 0x35290001);                     /* ori  $9, 0x1 */
+    if (is_64b) {
+        stl_p(p++, 0xfd090088);                 /* sd   $9, 0x88($8) */
+    } else {
+        stl_p(p++, 0xad090088);                 /* sw   $9, 0x88($8) */
+    }
+
+    /*
+     * Setup argument registers to follow the UHI boot protocol:
+     *
+     * a0/$4 = -2
+     * a1/$5 = virtual address of FDT
+     * a2/$6 = 0
+     * a3/$7 = 0
+     */
+    stl_p(p++, 0x2404fffe);                     /* li   $4, -2 */
+                                                /* lui  $5, hi(fdt_addr) */
+    stl_p(p++, 0x3c050000 | ((fdt_addr >> 16) & 0xffff));
+    if (fdt_addr & 0xffff) {                    /* ori  $5, lo(fdt_addr) */
+        stl_p(p++, 0x34a50000 | (fdt_addr & 0xffff));
+    }
+    stl_p(p++, 0x34060000);                     /* li   $6, 0 */
+    stl_p(p++, 0x34070000);                     /* li   $7, 0 */
+
+    /* Load kernel entry address & jump to it */
+                                                /* lui  $25, hi(kernel_entry) */
+    stl_p(p++, 0x3c190000 | ((kernel_entry >> 16) & 0xffff));
+                                                /* ori  $25, lo(kernel_entry) */
+    stl_p(p++, 0x37390000 | (kernel_entry & 0xffff));
+    stl_p(p++, 0x03200009);                     /* jr   $25 */
+}
+
+static const void *boston_fdt_filter(void *opaque, const void *fdt_orig,
+                                     const void *match_data, hwaddr *load_addr)
+{
+    BostonState *s = BOSTON(opaque);
+    MachineState *machine = s->mach;
+    const char *cmdline;
+    int err;
+    void *fdt;
+    size_t fdt_sz, ram_low_sz, ram_high_sz;
+
+    fdt_sz = fdt_totalsize(fdt_orig) * 2;
+    fdt = g_malloc0(fdt_sz);
+
+    err = fdt_open_into(fdt_orig, fdt, fdt_sz);
+    if (err) {
+        fprintf(stderr, "unable to open FDT\n");
+        return NULL;
+    }
+
+    cmdline = (machine->kernel_cmdline && machine->kernel_cmdline[0])
+            ? machine->kernel_cmdline : " ";
+    err = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    if (err < 0) {
+        fprintf(stderr, "couldn't set /chosen/bootargs\n");
+        return NULL;
+    }
+
+    ram_low_sz = MIN(256 * M_BYTE, machine->ram_size);
+    ram_high_sz = machine->ram_size - ram_low_sz;
+    qemu_fdt_setprop_sized_cells(fdt, "/memory@0", "reg",
+                                 1, 0x00000000, 1, ram_low_sz,
+                                 1, 0x90000000, 1, ram_high_sz);
+
+    fdt = g_realloc(fdt, fdt_totalsize(fdt));
+    qemu_fdt_dumpdtb(fdt, fdt_sz);
+
+    s->fdt_base = *load_addr;
+
+    return fdt;
+}
+
+static const void *boston_kernel_filter(void *opaque, const void *kernel,
+                                        hwaddr *load_addr, hwaddr *entry_addr)
+{
+    BostonState *s = BOSTON(opaque);
+
+    s->kernel_entry = *entry_addr;
+
+    return kernel;
+}
+
+static const struct fit_loader_match boston_matches[] = {
+    { "img,boston" },
+    { NULL },
+};
+
+static const struct fit_loader boston_fit_loader = {
+    .matches = boston_matches,
+    .addr_to_phys = cpu_mips_kseg0_to_phys,
+    .fdt_filter = boston_fdt_filter,
+    .kernel_filter = boston_kernel_filter,
+};
+
+static inline XilinxPCIEHost *
+xilinx_pcie_init(MemoryRegion *sys_mem, uint32_t bus_nr,
+                 hwaddr cfg_base, uint64_t cfg_size,
+                 hwaddr mmio_base, uint64_t mmio_size,
+                 qemu_irq irq, bool link_up)
+{
+    DeviceState *dev;
+    MemoryRegion *cfg, *mmio;
+
+    dev = qdev_create(NULL, TYPE_XILINX_PCIE_HOST);
+
+    qdev_prop_set_uint32(dev, "bus_nr", bus_nr);
+    qdev_prop_set_uint64(dev, "cfg_base", cfg_base);
+    qdev_prop_set_uint64(dev, "cfg_size", cfg_size);
+    qdev_prop_set_uint64(dev, "mmio_base", mmio_base);
+    qdev_prop_set_uint64(dev, "mmio_size", mmio_size);
+    qdev_prop_set_bit(dev, "link_up", link_up);
+
+    qdev_init_nofail(dev);
+
+    cfg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+    memory_region_add_subregion_overlap(sys_mem, cfg_base, cfg, 0);
+
+    mmio = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
+    memory_region_add_subregion_overlap(sys_mem, 0, mmio, 0);
+
+    qdev_connect_gpio_out_named(dev, "interrupt_out", 0, irq);
+
+    return XILINX_PCIE_HOST(dev);
+}
+
+static void boston_mach_init(MachineState *machine)
+{
+    DeviceState *dev;
+    BostonState *s;
+    Error *err = NULL;
+    const char *cpu_model;
+    MemoryRegion *flash, *ddr, *ddr_low_alias, *lcd, *platreg;
+    MemoryRegion *sys_mem = get_system_memory();
+    XilinxPCIEHost *pcie2;
+    PCIDevice *ahci;
+    DriveInfo *hd[6];
+    Chardev *chr;
+    int fw_size, fit_err;
+    bool is_64b;
+
+    if ((machine->ram_size % G_BYTE) ||
+        (machine->ram_size > (2 * G_BYTE))) {
+        error_report("Memory size must be 1GB or 2GB");
+        exit(1);
+    }
+
+    cpu_model = machine->cpu_model ?: "I6400";
+
+    dev = qdev_create(NULL, TYPE_MIPS_BOSTON);
+    qdev_init_nofail(dev);
+
+    s = BOSTON(dev);
+    s->mach = machine;
+    s->cps = g_new0(MIPSCPSState, 1);
+
+    if (!cpu_supports_cps_smp(cpu_model)) {
+        error_report("Boston requires CPUs which support CPS");
+        exit(1);
+    }
+
+    is_64b = cpu_supports_isa(cpu_model, ISA_MIPS64);
+
+    object_initialize(s->cps, sizeof(MIPSCPSState), TYPE_MIPS_CPS);
+    qdev_set_parent_bus(DEVICE(s->cps), sysbus_get_default());
+
+    object_property_set_str(OBJECT(s->cps), cpu_model, "cpu-model", &err);
+    object_property_set_int(OBJECT(s->cps), smp_cpus, "num-vp", &err);
+    object_property_set_bool(OBJECT(s->cps), true, "realized", &err);
+
+    if (err != NULL) {
+        error_report("%s", error_get_pretty(err));
+        exit(1);
+    }
+
+    sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1);
+
+    flash =  g_new(MemoryRegion, 1);
+    memory_region_init_rom_device(flash, NULL, &boston_flash_ops, s,
+                                  "boston.flash", 128 * M_BYTE, &err);
+    memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0);
+
+    ddr = g_new(MemoryRegion, 1);
+    memory_region_allocate_system_memory(ddr, NULL, "boston.ddr",
+                                         machine->ram_size);
+    memory_region_add_subregion_overlap(sys_mem, 0x80000000, ddr, 0);
+
+    ddr_low_alias = g_new(MemoryRegion, 1);
+    memory_region_init_alias(ddr_low_alias, NULL, "boston_low.ddr",
+                             ddr, 0, MIN(machine->ram_size, (256 * M_BYTE)));
+    memory_region_add_subregion_overlap(sys_mem, 0, ddr_low_alias, 0);
+
+    xilinx_pcie_init(sys_mem, 0,
+                     0x10000000, 32 * M_BYTE,
+                     0x40000000, 1 * G_BYTE,
+                     get_cps_irq(s->cps, 2), false);
+
+    xilinx_pcie_init(sys_mem, 1,
+                     0x12000000, 32 * M_BYTE,
+                     0x20000000, 512 * M_BYTE,
+                     get_cps_irq(s->cps, 1), false);
+
+    pcie2 = xilinx_pcie_init(sys_mem, 2,
+                             0x14000000, 32 * M_BYTE,
+                             0x16000000, 1 * M_BYTE,
+                             get_cps_irq(s->cps, 0), true);
+
+    platreg = g_new(MemoryRegion, 1);
+    memory_region_init_io(platreg, NULL, &boston_platreg_ops, s,
+                          "boston-platregs", 0x1000);
+    memory_region_add_subregion_overlap(sys_mem, 0x17ffd000, platreg, 0);
+
+    if (!serial_hds[0]) {
+        serial_hds[0] = qemu_chr_new("serial0", "null");
+    }
+
+    s->uart = serial_mm_init(sys_mem, 0x17ffe000, 2,
+                             get_cps_irq(s->cps, 3), 10000000,
+                             serial_hds[0], DEVICE_NATIVE_ENDIAN);
+
+    lcd = g_new(MemoryRegion, 1);
+    memory_region_init_io(lcd, NULL, &boston_lcd_ops, s, "boston-lcd", 0x8);
+    memory_region_add_subregion_overlap(sys_mem, 0x17fff000, lcd, 0);
+
+    chr = qemu_chr_new("lcd", "vc:320x240");
+    qemu_chr_fe_init(&s->lcd_display, chr, NULL);
+    qemu_chr_fe_set_handlers(&s->lcd_display, NULL, NULL,
+                             boston_lcd_event, s, NULL, true);
+
+    ahci = pci_create_simple_multifunction(&PCI_BRIDGE(&pcie2->root)->sec_bus,
+                                           PCI_DEVFN(0, 0),
+                                           true, TYPE_ICH9_AHCI);
+    g_assert(ARRAY_SIZE(hd) == ICH_AHCI(ahci)->ahci.ports);
+    ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports);
+    ahci_ide_create_devs(ahci, hd);
+
+    if (machine->firmware) {
+        fw_size = load_image_targphys(machine->firmware,
+                                      0x1fc00000, 4 * M_BYTE);
+        if (fw_size == -1) {
+            error_printf("unable to load firmware image '%s'\n",
+                          machine->firmware);
+            exit(1);
+        }
+    } else if (machine->kernel_filename) {
+        fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s);
+        if (fit_err) {
+            error_printf("unable to load FIT image\n");
+            exit(1);
+        }
+
+        gen_firmware(memory_region_get_ram_ptr(flash) + 0x7c00000,
+                     s->kernel_entry, s->fdt_base, is_64b);
+    } else if (!qtest_enabled()) {
+        error_printf("Please provide either a -kernel or -bios argument\n");
+        exit(1);
+    }
+}
+
+static void boston_mach_class_init(MachineClass *mc)
+{
+    mc->desc = "MIPS Boston";
+    mc->init = boston_mach_init;
+    mc->block_default_type = IF_IDE;
+    mc->default_ram_size = 1 * G_BYTE;
+    mc->max_cpus = 16;
+}
+
+DEFINE_MACHINE("boston", boston_mach_class_init)
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 898e4ccfb1..c8b489390f 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -26,7 +26,7 @@ obj-$(CONFIG_IVSHMEM) += ivshmem.o
 obj-$(CONFIG_REALVIEW) += arm_sysctl.o
 obj-$(CONFIG_NSERIES) += cbus.o
 obj-$(CONFIG_ECCMEMCTL) += eccmemctl.o
-obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o
+obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o exynos4210_clk.o
 obj-$(CONFIG_IMX) += imx_ccm.o
 obj-$(CONFIG_IMX) += imx31_ccm.o
 obj-$(CONFIG_IMX) += imx25_ccm.o
@@ -42,6 +42,7 @@ obj-$(CONFIG_OMAP) += omap_sdrc.o
 obj-$(CONFIG_OMAP) += omap_tap.o
 obj-$(CONFIG_RASPI) += bcm2835_mbox.o
 obj-$(CONFIG_RASPI) += bcm2835_property.o
+obj-$(CONFIG_RASPI) += bcm2835_rng.o
 obj-$(CONFIG_SLAVIO) += slavio_misc.o
 obj-$(CONFIG_ZYNQ) += zynq_slcr.o
 obj-$(CONFIG_ZYNQ) += zynq-xadc.o
diff --git a/hw/misc/bcm2835_rng.c b/hw/misc/bcm2835_rng.c
new file mode 100644
index 0000000000..4d62143b24
--- /dev/null
+++ b/hw/misc/bcm2835_rng.c
@@ -0,0 +1,149 @@
+/*
+ * BCM2835 Random Number Generator emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "crypto/random.h"
+#include "hw/misc/bcm2835_rng.h"
+
+static uint32_t get_random_bytes(void)
+{
+    uint32_t res;
+    Error *err = NULL;
+
+    if (qcrypto_random_bytes((uint8_t *)&res, sizeof(res), &err) < 0) {
+        /* On failure we don't want to return the guest a non-random
+         * value in case they're really using it for cryptographic
+         * purposes, so the best we can do is die here.
+         * This shouldn't happen unless something's broken.
+         * In theory we could implement this device's full FIFO
+         * and interrupt semantics and then just stop filling the
+         * FIFO. That's a lot of work, though, so we assume any
+         * errors are systematic problems and trust that if we didn't
+         * fail as the guest inited then we won't fail later on
+         * mid-run.
+         */
+        error_report_err(err);
+        exit(1);
+    }
+    return res;
+}
+
+static uint64_t bcm2835_rng_read(void *opaque, hwaddr offset,
+                                 unsigned size)
+{
+    BCM2835RngState *s = (BCM2835RngState *)opaque;
+    uint32_t res = 0;
+
+    assert(size == 4);
+
+    switch (offset) {
+    case 0x0:    /* rng_ctrl */
+        res = s->rng_ctrl;
+        break;
+    case 0x4:    /* rng_status */
+        res = s->rng_status | (1 << 24);
+        break;
+    case 0x8:    /* rng_data */
+        res = get_random_bytes();
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "bcm2835_rng_read: Bad offset %x\n",
+                      (int)offset);
+        res = 0;
+        break;
+    }
+
+    return res;
+}
+
+static void bcm2835_rng_write(void *opaque, hwaddr offset,
+                              uint64_t value, unsigned size)
+{
+    BCM2835RngState *s = (BCM2835RngState *)opaque;
+
+    assert(size == 4);
+
+    switch (offset) {
+    case 0x0:    /* rng_ctrl */
+        s->rng_ctrl = value;
+        break;
+    case 0x4:    /* rng_status */
+        /* we shouldn't let the guest write to bits [31..20] */
+        s->rng_status &= ~0xFFFFF;        /* clear 20 lower bits */
+        s->rng_status |= value & 0xFFFFF; /* set them to new value */
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "bcm2835_rng_write: Bad offset %x\n",
+                      (int)offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps bcm2835_rng_ops = {
+    .read = bcm2835_rng_read,
+    .write = bcm2835_rng_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_rng = {
+    .name = TYPE_BCM2835_RNG,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(rng_ctrl, BCM2835RngState),
+        VMSTATE_UINT32(rng_status, BCM2835RngState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_rng_init(Object *obj)
+{
+    BCM2835RngState *s = BCM2835_RNG(obj);
+
+    memory_region_init_io(&s->iomem, obj, &bcm2835_rng_ops, s,
+                          TYPE_BCM2835_RNG, 0x10);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+}
+
+static void bcm2835_rng_reset(DeviceState *dev)
+{
+    BCM2835RngState *s = BCM2835_RNG(dev);
+
+    s->rng_ctrl = 0;
+    s->rng_status = 0;
+}
+
+static void bcm2835_rng_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = bcm2835_rng_reset;
+    dc->vmsd = &vmstate_bcm2835_rng;
+}
+
+static TypeInfo bcm2835_rng_info = {
+    .name          = TYPE_BCM2835_RNG,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835RngState),
+    .class_init    = bcm2835_rng_class_init,
+    .instance_init = bcm2835_rng_init,
+};
+
+static void bcm2835_rng_register_types(void)
+{
+    type_register_static(&bcm2835_rng_info);
+}
+
+type_init(bcm2835_rng_register_types)
diff --git a/hw/misc/exynos4210_clk.c b/hw/misc/exynos4210_clk.c
new file mode 100644
index 0000000000..81862c0ada
--- /dev/null
+++ b/hw/misc/exynos4210_clk.c
@@ -0,0 +1,164 @@
+/*
+ *  Exynos4210 Clock Controller Emulation
+ *
+ *  Copyright (c) 2017 Krzysztof Kozlowski <krzk@kernel.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "qemu/log.h"
+
+#define TYPE_EXYNOS4210_CLK             "exynos4210.clk"
+#define EXYNOS4210_CLK(obj) \
+    OBJECT_CHECK(Exynos4210ClkState, (obj), TYPE_EXYNOS4210_CLK)
+
+#define CLK_PLL_LOCKED                  BIT(29)
+
+#define EXYNOS4210_CLK_REGS_MEM_SIZE    0x15104
+
+typedef struct Exynos4210Reg {
+    const char   *name; /* for debug only */
+    uint32_t     offset;
+    uint32_t     reset_value;
+} Exynos4210Reg;
+
+/* Clock controller register base: 0x10030000 */
+static const Exynos4210Reg exynos4210_clk_regs[] = {
+    {"EPLL_LOCK",                     0xc010, 0x00000fff},
+    {"VPLL_LOCK",                     0xc020, 0x00000fff},
+    {"EPLL_CON0",                     0xc110, 0x00300301 | CLK_PLL_LOCKED},
+    {"EPLL_CON1",                     0xc114, 0x00000000},
+    {"VPLL_CON0",                     0xc120, 0x00240201 | CLK_PLL_LOCKED},
+    {"VPLL_CON1",                     0xc124, 0x66010464},
+    {"APLL_LOCK",                    0x14000, 0x00000fff},
+    {"MPLL_LOCK",                    0x14004, 0x00000fff},
+    {"APLL_CON0",                    0x14100, 0x00c80601 | CLK_PLL_LOCKED},
+    {"APLL_CON1",                    0x14104, 0x0000001c},
+    {"MPLL_CON0",                    0x14108, 0x00c80601 | CLK_PLL_LOCKED},
+    {"MPLL_CON1",                    0x1410c, 0x0000001c},
+};
+
+#define EXYNOS4210_REGS_NUM       ARRAY_SIZE(exynos4210_clk_regs)
+
+typedef struct Exynos4210ClkState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t reg[EXYNOS4210_REGS_NUM];
+} Exynos4210ClkState;
+
+static uint64_t exynos4210_clk_read(void *opaque, hwaddr offset,
+                                    unsigned size)
+{
+    const Exynos4210ClkState *s = (Exynos4210ClkState *)opaque;
+    const Exynos4210Reg *regs = exynos4210_clk_regs;
+    unsigned int i;
+
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        if (regs->offset == offset) {
+            return s->reg[i];
+        }
+        regs++;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: bad read offset 0x%04x\n",
+                  __func__, (uint32_t)offset);
+    return 0;
+}
+
+static void exynos4210_clk_write(void *opaque, hwaddr offset,
+                                 uint64_t val, unsigned size)
+{
+    Exynos4210ClkState *s = (Exynos4210ClkState *)opaque;
+    const Exynos4210Reg *regs = exynos4210_clk_regs;
+    unsigned int i;
+
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        if (regs->offset == offset) {
+            s->reg[i] = val;
+            return;
+        }
+        regs++;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write offset 0x%04x\n",
+                  __func__, (uint32_t)offset);
+}
+
+static const MemoryRegionOps exynos4210_clk_ops = {
+    .read = exynos4210_clk_read,
+    .write = exynos4210_clk_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false
+    }
+};
+
+static void exynos4210_clk_reset(DeviceState *dev)
+{
+    Exynos4210ClkState *s = EXYNOS4210_CLK(dev);
+    unsigned int i;
+
+    /* Set default values for registers */
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        s->reg[i] = exynos4210_clk_regs[i].reset_value;
+    }
+}
+
+static void exynos4210_clk_init(Object *obj)
+{
+    Exynos4210ClkState *s = EXYNOS4210_CLK(obj);
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+
+    /* memory mapping */
+    memory_region_init_io(&s->iomem, obj, &exynos4210_clk_ops, s,
+                          TYPE_EXYNOS4210_CLK, EXYNOS4210_CLK_REGS_MEM_SIZE);
+    sysbus_init_mmio(dev, &s->iomem);
+}
+
+static const VMStateDescription exynos4210_clk_vmstate = {
+    .name = TYPE_EXYNOS4210_CLK,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(reg, Exynos4210ClkState, EXYNOS4210_REGS_NUM),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void exynos4210_clk_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = exynos4210_clk_reset;
+    dc->vmsd = &exynos4210_clk_vmstate;
+}
+
+static const TypeInfo exynos4210_clk_info = {
+    .name          = TYPE_EXYNOS4210_CLK,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(Exynos4210ClkState),
+    .instance_init = exynos4210_clk_init,
+    .class_init    = exynos4210_clk_class_init,
+};
+
+static void exynos4210_clk_register(void)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "Clock init\n");
+    type_register_static(&exynos4210_clk_info);
+}
+
+type_init(exynos4210_clk_register)
diff --git a/hw/misc/imx6_src.c b/hw/misc/imx6_src.c
index 55b817b8d7..edbb756c36 100644
--- a/hw/misc/imx6_src.c
+++ b/hw/misc/imx6_src.c
@@ -14,6 +14,7 @@
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "arm-powerctl.h"
+#include "qom/cpu.h"
 
 #ifndef DEBUG_IMX6_SRC
 #define DEBUG_IMX6_SRC 0
@@ -113,6 +114,45 @@ static uint64_t imx6_src_read(void *opaque, hwaddr offset, unsigned size)
     return value;
 }
 
+
+/* The reset is asynchronous so we need to defer clearing the reset
+ * bit until the work is completed.
+ */
+
+struct SRCSCRResetInfo {
+    IMX6SRCState *s;
+    int reset_bit;
+};
+
+static void imx6_clear_reset_bit(CPUState *cpu, run_on_cpu_data data)
+{
+    struct SRCSCRResetInfo *ri = data.host_ptr;
+    IMX6SRCState *s = ri->s;
+
+    assert(qemu_mutex_iothread_locked());
+
+    s->regs[SRC_SCR] = deposit32(s->regs[SRC_SCR], ri->reset_bit, 1, 0);
+    DPRINTF("reg[%s] <= 0x%" PRIx32 "\n",
+            imx6_src_reg_name(SRC_SCR), s->regs[SRC_SCR]);
+
+    g_free(ri);
+}
+
+static void imx6_defer_clear_reset_bit(int cpuid,
+                                       IMX6SRCState *s,
+                                       unsigned long reset_shift)
+{
+    struct SRCSCRResetInfo *ri;
+
+    ri = g_malloc(sizeof(struct SRCSCRResetInfo));
+    ri->s = s;
+    ri->reset_bit = reset_shift;
+
+    async_run_on_cpu(arm_get_cpu_by_id(cpuid), imx6_clear_reset_bit,
+                     RUN_ON_CPU_HOST_PTR(ri));
+}
+
+
 static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
                            unsigned size)
 {
@@ -153,7 +193,7 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
                 arm_set_cpu_off(3);
             }
             /* We clear the reset bits as the processor changed state */
-            clear_bit(CORE3_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(3, s, CORE3_RST_SHIFT);
             clear_bit(CORE3_RST_SHIFT, &change_mask);
         }
         if (EXTRACT(change_mask, CORE2_ENABLE)) {
@@ -162,11 +202,11 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
                 arm_set_cpu_on(2, s->regs[SRC_GPR5], s->regs[SRC_GPR6],
                                3, false);
             } else {
-                /* CORE 3 is shut down */
+                /* CORE 2 is shut down */
                 arm_set_cpu_off(2);
             }
             /* We clear the reset bits as the processor changed state */
-            clear_bit(CORE2_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(2, s, CORE2_RST_SHIFT);
             clear_bit(CORE2_RST_SHIFT, &change_mask);
         }
         if (EXTRACT(change_mask, CORE1_ENABLE)) {
@@ -175,28 +215,28 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
                 arm_set_cpu_on(1, s->regs[SRC_GPR3], s->regs[SRC_GPR4],
                                3, false);
             } else {
-                /* CORE 3 is shut down */
+                /* CORE 1 is shut down */
                 arm_set_cpu_off(1);
             }
             /* We clear the reset bits as the processor changed state */
-            clear_bit(CORE1_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(1, s, CORE1_RST_SHIFT);
             clear_bit(CORE1_RST_SHIFT, &change_mask);
         }
         if (EXTRACT(change_mask, CORE0_RST)) {
             arm_reset_cpu(0);
-            clear_bit(CORE0_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(0, s, CORE0_RST_SHIFT);
         }
         if (EXTRACT(change_mask, CORE1_RST)) {
             arm_reset_cpu(1);
-            clear_bit(CORE1_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(1, s, CORE1_RST_SHIFT);
         }
         if (EXTRACT(change_mask, CORE2_RST)) {
             arm_reset_cpu(2);
-            clear_bit(CORE2_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(2, s, CORE2_RST_SHIFT);
         }
         if (EXTRACT(change_mask, CORE3_RST)) {
             arm_reset_cpu(3);
-            clear_bit(CORE3_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(3, s, CORE3_RST_SHIFT);
         }
         if (EXTRACT(change_mask, SW_IPU2_RST)) {
             /* We pretend the IPU2 is reset */
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index bf57e635d6..82ce8378bf 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -1267,10 +1267,11 @@ static void ivshmem_realize(PCIDevice *dev, Error **errp)
     if (s->sizearg == NULL) {
         s->legacy_size = 4 << 20; /* 4 MB default */
     } else {
-        char *end;
-        int64_t size = qemu_strtosz(s->sizearg, &end);
-        if (size < 0 || (size_t)size != size || *end != '\0'
-            || !is_power_of_2(size)) {
+        int ret;
+        uint64_t size;
+
+        ret = qemu_strtosz_MiB(s->sizearg, NULL, &size);
+        if (ret < 0 || (size_t)size != size || !is_power_of_2(size)) {
             error_setg(errp, "Invalid size %s", s->sizearg);
             return;
         }
diff --git a/hw/misc/mips_cmgcr.c b/hw/misc/mips_cmgcr.c
index b3ba16694e..a1edb53f95 100644
--- a/hw/misc/mips_cmgcr.c
+++ b/hw/misc/mips_cmgcr.c
@@ -29,6 +29,20 @@ static inline bool is_gic_connected(MIPSGCRState *s)
     return s->gic_mr != NULL;
 }
 
+static inline void update_gcr_base(MIPSGCRState *gcr, uint64_t val)
+{
+    CPUState *cpu;
+    MIPSCPU *mips_cpu;
+
+    gcr->gcr_base = val & GCR_BASE_GCRBASE_MSK;
+    memory_region_set_address(&gcr->iomem, gcr->gcr_base);
+
+    CPU_FOREACH(cpu) {
+        mips_cpu = MIPS_CPU(cpu);
+        mips_cpu->env.CP0_CMGCRBase = gcr->gcr_base >> 4;
+    }
+}
+
 static inline void update_cpc_base(MIPSGCRState *gcr, uint64_t val)
 {
     if (is_cpc_connected(gcr)) {
@@ -117,6 +131,9 @@ static void gcr_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
     MIPSGCRVPState *other_vps = &gcr->vps[current_vps->other];
 
     switch (addr) {
+    case GCR_BASE_OFS:
+        update_gcr_base(gcr, data);
+        break;
     case GCR_GIC_BASE_OFS:
         update_gic_base(gcr, data);
         break;
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index e99d4544a2..d4de8ad9f1 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -508,7 +508,7 @@ static void gem_update_int_status(CadenceGEMState *s)
 
     if ((s->num_priority_queues == 1) && s->regs[GEM_ISR]) {
         /* No priority queues, just trigger the interrupt */
-        DB_PRINT("asserting int.\n", i);
+        DB_PRINT("asserting int.\n");
         qemu_set_irq(s->irq[0], 1);
         return;
     }
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 058908d8d7..d239e4bd7d 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -385,18 +385,24 @@ static int spapr_vlan_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
     int ret;
 
     /* Some old phyp versions give the mac address in an 8-byte
-     * property.  The kernel driver has an insane workaround for this;
+     * property.  The kernel driver (before 3.10) has an insane workaround;
      * rather than doing the obvious thing and checking the property
      * length, it checks whether the first byte has 0b10 in the low
      * bits.  If a correct 6-byte property has a different first byte
      * the kernel will get the wrong mac address, overrunning its
      * buffer in the process (read only, thank goodness).
      *
-     * Here we workaround the kernel workaround by always supplying an
-     * 8-byte property, with the mac address in the last six bytes */
-    memcpy(&padded_mac[2], &vdev->nicconf.macaddr, ETH_ALEN);
-    ret = fdt_setprop(fdt, node_off, "local-mac-address",
-                      padded_mac, sizeof(padded_mac));
+     * Here we return a 6-byte address unless that would break a pre-3.10
+     * driver.  In that case we return a padded 8-byte address to allow the old
+     * workaround to succeed. */
+    if ((vdev->nicconf.macaddr.a[0] & 0x3) == 0x2) {
+        ret = fdt_setprop(fdt, node_off, "local-mac-address",
+                          &vdev->nicconf.macaddr, ETH_ALEN);
+    } else {
+        memcpy(&padded_mac[2], &vdev->nicconf.macaddr, ETH_ALEN);
+        ret = fdt_setprop(fdt, node_off, "local-mac-address",
+                          padded_mac, sizeof(padded_mac));
+    }
     if (ret < 0) {
         return ret;
     }
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 65ba188555..aa5d2c1f5f 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
 {
     sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev);
+    int ret;
 
     if (nvram->blk) {
         nvram->size = blk_getlength(nvram->blk);
+
+        ret = blk_set_perm(nvram->blk,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
     } else {
         nvram->size = DEFAULT_NVRAM_SIZE;
     }
diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs
index 45f1f0ebab..9c7909cf44 100644
--- a/hw/pci-host/Makefile.objs
+++ b/hw/pci-host/Makefile.objs
@@ -16,3 +16,4 @@ common-obj-$(CONFIG_FULONG) += bonito.o
 common-obj-$(CONFIG_PCI_PIIX) += piix.o
 common-obj-$(CONFIG_PCI_Q35) += q35.o
 common-obj-$(CONFIG_PCI_GENERIC) += gpex.o
+common-obj-$(CONFIG_PCI_XILINX) += xilinx-pcie.o
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 5580293f93..260a119a9e 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -309,7 +309,6 @@ static void raven_realize(PCIDevice *d, Error **errp)
     memory_region_set_readonly(&s->bios, true);
     memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
                                 &s->bios);
-    vmstate_register_ram_global(&s->bios);
     if (s->bios_name) {
         filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name);
         if (filename) {
@@ -328,12 +327,15 @@ static void raven_realize(PCIDevice *d, Error **errp)
                 }
             }
         }
+        g_free(filename);
         if (bios_size < 0 || bios_size > BIOS_SIZE) {
-            /* FIXME should error_setg() */
-            hw_error("qemu: could not load bios image '%s'\n", s->bios_name);
+            memory_region_del_subregion(get_system_memory(), &s->bios);
+            error_setg(errp, "Could not load bios image '%s'", s->bios_name);
+            return;
         }
-        g_free(filename);
     }
+
+    vmstate_register_ram_global(&s->bios);
 }
 
 static const VMStateDescription vmstate_raven = {
@@ -361,7 +363,6 @@ static void raven_class_init(ObjectClass *klass, void *data)
     /*
      * Reason: PCI-facing part of the host bridge, not usable without
      * the host-facing part, which can't be device_add'ed, yet.
-     * Reason: realize() method uses hw_error().
      */
     dc->cannot_instantiate_with_device_add_yet = true;
 }
diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c
new file mode 100644
index 0000000000..8b71e2d950
--- /dev/null
+++ b/hw/pci-host/xilinx-pcie.c
@@ -0,0 +1,328 @@
+/*
+ * Xilinx PCIe host controller emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci-host/xilinx-pcie.h"
+
+enum root_cfg_reg {
+    /* Interrupt Decode Register */
+    ROOTCFG_INTDEC              = 0x138,
+
+    /* Interrupt Mask Register */
+    ROOTCFG_INTMASK             = 0x13c,
+    /* INTx Interrupt Received */
+#define ROOTCFG_INTMASK_INTX    (1 << 16)
+    /* MSI Interrupt Received */
+#define ROOTCFG_INTMASK_MSI     (1 << 17)
+
+    /* PHY Status/Control Register */
+    ROOTCFG_PSCR                = 0x144,
+    /* Link Up */
+#define ROOTCFG_PSCR_LINK_UP    (1 << 11)
+
+    /* Root Port Status/Control Register */
+    ROOTCFG_RPSCR               = 0x148,
+    /* Bridge Enable */
+#define ROOTCFG_RPSCR_BRIDGEEN  (1 << 0)
+    /* Interrupt FIFO Not Empty */
+#define ROOTCFG_RPSCR_INTNEMPTY (1 << 18)
+    /* Interrupt FIFO Overflow */
+#define ROOTCFG_RPSCR_INTOVF    (1 << 19)
+
+    /* Root Port Interrupt FIFO Read Register 1 */
+    ROOTCFG_RPIFR1              = 0x158,
+#define ROOTCFG_RPIFR1_INT_LANE_SHIFT   27
+#define ROOTCFG_RPIFR1_INT_ASSERT_SHIFT 29
+#define ROOTCFG_RPIFR1_INT_VALID_SHIFT  31
+    /* Root Port Interrupt FIFO Read Register 2 */
+    ROOTCFG_RPIFR2              = 0x15c,
+};
+
+static void xilinx_pcie_update_intr(XilinxPCIEHost *s,
+                                    uint32_t set, uint32_t clear)
+{
+    int level;
+
+    s->intr |= set;
+    s->intr &= ~clear;
+
+    if (s->intr_fifo_r != s->intr_fifo_w) {
+        s->intr |= ROOTCFG_INTMASK_INTX;
+    }
+
+    level = !!(s->intr & s->intr_mask);
+    qemu_set_irq(s->irq, level);
+}
+
+static void xilinx_pcie_queue_intr(XilinxPCIEHost *s,
+                                   uint32_t fifo_reg1, uint32_t fifo_reg2)
+{
+    XilinxPCIEInt *intr;
+    unsigned int new_w;
+
+    new_w = (s->intr_fifo_w + 1) % ARRAY_SIZE(s->intr_fifo);
+    if (new_w == s->intr_fifo_r) {
+        s->rpscr |= ROOTCFG_RPSCR_INTOVF;
+        return;
+    }
+
+    intr = &s->intr_fifo[s->intr_fifo_w];
+    s->intr_fifo_w = new_w;
+
+    intr->fifo_reg1 = fifo_reg1;
+    intr->fifo_reg2 = fifo_reg2;
+
+    xilinx_pcie_update_intr(s, ROOTCFG_INTMASK_INTX, 0);
+}
+
+static void xilinx_pcie_set_irq(void *opaque, int irq_num, int level)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(opaque);
+
+    xilinx_pcie_queue_intr(s,
+       (irq_num << ROOTCFG_RPIFR1_INT_LANE_SHIFT) |
+           (level << ROOTCFG_RPIFR1_INT_ASSERT_SHIFT) |
+           (1 << ROOTCFG_RPIFR1_INT_VALID_SHIFT),
+       0);
+}
+
+static void xilinx_pcie_host_realize(DeviceState *dev, Error **errp)
+{
+    PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    PCIExpressHost *pex = PCIE_HOST_BRIDGE(dev);
+
+    snprintf(s->name, sizeof(s->name), "pcie%u", s->bus_nr);
+
+    /* PCI configuration space */
+    pcie_host_mmcfg_init(pex, s->cfg_size);
+
+    /* MMIO region */
+    memory_region_init(&s->mmio, OBJECT(s), "mmio", UINT64_MAX);
+    memory_region_set_enabled(&s->mmio, false);
+
+    /* dummy I/O region */
+    memory_region_init_ram(&s->io, OBJECT(s), "io", 16, NULL);
+    memory_region_set_enabled(&s->io, false);
+
+    /* interrupt out */
+    qdev_init_gpio_out_named(dev, &s->irq, "interrupt_out", 1);
+
+    sysbus_init_mmio(sbd, &pex->mmio);
+    sysbus_init_mmio(sbd, &s->mmio);
+
+    pci->bus = pci_register_bus(dev, s->name, xilinx_pcie_set_irq,
+                                pci_swizzle_map_irq_fn, s, &s->mmio,
+                                &s->io, 0, 4, TYPE_PCIE_BUS);
+
+    qdev_set_parent_bus(DEVICE(&s->root), BUS(pci->bus));
+    qdev_init_nofail(DEVICE(&s->root));
+}
+
+static const char *xilinx_pcie_host_root_bus_path(PCIHostState *host_bridge,
+                                                  PCIBus *rootbus)
+{
+    return "0000:00";
+}
+
+static void xilinx_pcie_host_init(Object *obj)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(obj);
+    XilinxPCIERoot *root = &s->root;
+
+    object_initialize(root, sizeof(*root), TYPE_XILINX_PCIE_ROOT);
+    object_property_add_child(obj, "root", OBJECT(root), NULL);
+    qdev_prop_set_uint32(DEVICE(root), "addr", PCI_DEVFN(0, 0));
+    qdev_prop_set_bit(DEVICE(root), "multifunction", false);
+}
+
+static Property xilinx_pcie_host_props[] = {
+    DEFINE_PROP_UINT32("bus_nr", XilinxPCIEHost, bus_nr, 0),
+    DEFINE_PROP_SIZE("cfg_base", XilinxPCIEHost, cfg_base, 0),
+    DEFINE_PROP_SIZE("cfg_size", XilinxPCIEHost, cfg_size, 32 << 20),
+    DEFINE_PROP_SIZE("mmio_base", XilinxPCIEHost, mmio_base, 0),
+    DEFINE_PROP_SIZE("mmio_size", XilinxPCIEHost, mmio_size, 1 << 20),
+    DEFINE_PROP_BOOL("link_up", XilinxPCIEHost, link_up, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xilinx_pcie_host_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+
+    hc->root_bus_path = xilinx_pcie_host_root_bus_path;
+    dc->realize = xilinx_pcie_host_realize;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->fw_name = "pci";
+    dc->props = xilinx_pcie_host_props;
+}
+
+static const TypeInfo xilinx_pcie_host_info = {
+    .name       = TYPE_XILINX_PCIE_HOST,
+    .parent     = TYPE_PCIE_HOST_BRIDGE,
+    .instance_size = sizeof(XilinxPCIEHost),
+    .instance_init = xilinx_pcie_host_init,
+    .class_init = xilinx_pcie_host_class_init,
+};
+
+static uint32_t xilinx_pcie_root_config_read(PCIDevice *d,
+                                             uint32_t address, int len)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(OBJECT(d)->parent);
+    uint32_t val;
+
+    switch (address) {
+    case ROOTCFG_INTDEC:
+        val = s->intr;
+        break;
+    case ROOTCFG_INTMASK:
+        val = s->intr_mask;
+        break;
+    case ROOTCFG_PSCR:
+        val = s->link_up ? ROOTCFG_PSCR_LINK_UP : 0;
+        break;
+    case ROOTCFG_RPSCR:
+        if (s->intr_fifo_r != s->intr_fifo_w) {
+            s->rpscr &= ~ROOTCFG_RPSCR_INTNEMPTY;
+        } else {
+            s->rpscr |= ROOTCFG_RPSCR_INTNEMPTY;
+        }
+        val = s->rpscr;
+        break;
+    case ROOTCFG_RPIFR1:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            val = 0;
+        } else {
+            val = s->intr_fifo[s->intr_fifo_r].fifo_reg1;
+        }
+        break;
+    case ROOTCFG_RPIFR2:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            val = 0;
+        } else {
+            val = s->intr_fifo[s->intr_fifo_r].fifo_reg2;
+        }
+        break;
+    default:
+        val = pci_default_read_config(d, address, len);
+        break;
+    }
+    return val;
+}
+
+static void xilinx_pcie_root_config_write(PCIDevice *d, uint32_t address,
+                                          uint32_t val, int len)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(OBJECT(d)->parent);
+    switch (address) {
+    case ROOTCFG_INTDEC:
+        xilinx_pcie_update_intr(s, 0, val);
+        break;
+    case ROOTCFG_INTMASK:
+        s->intr_mask = val;
+        xilinx_pcie_update_intr(s, 0, 0);
+        break;
+    case ROOTCFG_RPSCR:
+        s->rpscr &= ~ROOTCFG_RPSCR_BRIDGEEN;
+        s->rpscr |= val & ROOTCFG_RPSCR_BRIDGEEN;
+        memory_region_set_enabled(&s->mmio, val & ROOTCFG_RPSCR_BRIDGEEN);
+
+        if (val & ROOTCFG_INTMASK_INTX) {
+            s->rpscr &= ~ROOTCFG_INTMASK_INTX;
+        }
+        break;
+    case ROOTCFG_RPIFR1:
+    case ROOTCFG_RPIFR2:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            return;
+        } else {
+            s->intr_fifo_r = (s->intr_fifo_r + 1) % ARRAY_SIZE(s->intr_fifo);
+        }
+        break;
+    default:
+        pci_default_write_config(d, address, val, len);
+        break;
+    }
+}
+
+static int xilinx_pcie_root_init(PCIDevice *dev)
+{
+    BusState *bus = qdev_get_parent_bus(DEVICE(dev));
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(bus->parent);
+
+    pci_set_word(dev->config + PCI_COMMAND,
+                 PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+    pci_set_word(dev->config + PCI_MEMORY_BASE, s->mmio_base >> 16);
+    pci_set_word(dev->config + PCI_MEMORY_LIMIT,
+                 ((s->mmio_base + s->mmio_size - 1) >> 16) & 0xfff0);
+
+    pci_bridge_initfn(dev, TYPE_PCI_BUS);
+
+    if (pcie_endpoint_cap_v1_init(dev, 0x80) < 0) {
+        hw_error("Failed to initialize PCIe capability");
+    }
+
+    return 0;
+}
+
+static void xilinx_pcie_root_class_init(ObjectClass *klass, void *data)
+{
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->desc = "Xilinx AXI-PCIe Host Bridge";
+    k->vendor_id = PCI_VENDOR_ID_XILINX;
+    k->device_id = 0x7021;
+    k->revision = 0;
+    k->class_id = PCI_CLASS_BRIDGE_HOST;
+    k->is_express = true;
+    k->is_bridge = true;
+    k->init = xilinx_pcie_root_init;
+    k->exit = pci_bridge_exitfn;
+    dc->reset = pci_bridge_reset;
+    k->config_read = xilinx_pcie_root_config_read;
+    k->config_write = xilinx_pcie_root_config_write;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
+}
+
+static const TypeInfo xilinx_pcie_root_info = {
+    .name = TYPE_XILINX_PCIE_ROOT,
+    .parent = TYPE_PCI_BRIDGE,
+    .instance_size = sizeof(XilinxPCIERoot),
+    .class_init = xilinx_pcie_root_class_init,
+};
+
+static void xilinx_pcie_register(void)
+{
+    type_register_static(&xilinx_pcie_root_info);
+    type_register_static(&xilinx_pcie_host_info);
+}
+
+type_init(xilinx_pcie_register)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a563555e7d..273f1e4602 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1530,6 +1530,34 @@ static const pci_class_desc pci_class_descriptions[] =
     { 0, NULL}
 };
 
+static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+                                                  void (*fn)(PCIBus *b,
+                                                             PCIDevice *d,
+                                                             void *opaque),
+                                                  void *opaque)
+{
+    PCIDevice *d;
+    int devfn;
+
+    for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+        d = bus->devices[ARRAY_SIZE(bus->devices) - 1 - devfn];
+        if (d) {
+            fn(bus, d, opaque);
+        }
+    }
+}
+
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+                         void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
+                         void *opaque)
+{
+    bus = pci_find_bus_nr(bus, bus_num);
+
+    if (bus) {
+        pci_for_each_device_under_bus_reverse(bus, fn, opaque);
+    }
+}
+
 static void pci_for_each_device_under_bus(PCIBus *bus,
                                           void (*fn)(PCIBus *b, PCIDevice *d,
                                                      void *opaque),
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index daf1f65427..a8c18203d6 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -1025,8 +1025,8 @@ void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    assert(qobject_type(data) == QTYPE_QDICT);
     qdict = qobject_to_qdict(data);
+    assert(qdict);
 
     devfn = (int)qdict_get_int(qdict, "devfn");
     monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 716aea6852..68aaedc06d 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -72,6 +72,7 @@
 #include "exec/address-spaces.h"
 #include "hw/sysbus.h"
 #include "qemu/cutils.h"
+#include "trace.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
@@ -79,21 +80,11 @@
 #define CLOCKFREQ (266UL * 1000UL * 1000UL)
 #define BUSFREQ (100UL * 1000UL * 1000UL)
 
-/* debug UniNorth */
-//#define DEBUG_UNIN
-
-#ifdef DEBUG_UNIN
-#define UNIN_DPRINTF(fmt, ...)                                  \
-    do { printf("UNIN: " fmt , ## __VA_ARGS__); } while (0)
-#else
-#define UNIN_DPRINTF(fmt, ...)
-#endif
-
 /* UniN device */
 static void unin_write(void *opaque, hwaddr addr, uint64_t value,
                        unsigned size)
 {
-    UNIN_DPRINTF("write addr " TARGET_FMT_plx " val %"PRIx64"\n", addr, value);
+    trace_mac99_uninorth_write(addr, value);
     if (addr == 0x0) {
         *(int*)opaque = value;
     }
@@ -109,7 +100,7 @@ static uint64_t unin_read(void *opaque, hwaddr addr, unsigned size)
         value = *(int*)opaque;
     }
 
-    UNIN_DPRINTF("readl addr " TARGET_FMT_plx " val %x\n", addr, value);
+    trace_mac99_uninorth_read(addr, value);
 
     return value;
 }
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 4fab5c0ae7..09f0d22def 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -381,7 +381,7 @@ static void ppc_powernv_init(MachineState *machine)
 
     fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
     if (fw_size < 0) {
-        error_report("qemu: could not load OPAL '%s'", fw_filename);
+        error_report("Could not load OPAL '%s'", fw_filename);
         exit(1);
     }
     g_free(fw_filename);
@@ -393,7 +393,7 @@ static void ppc_powernv_init(MachineState *machine)
         kernel_size = load_image_targphys(machine->kernel_filename,
                                           KERNEL_LOAD_ADDR, 0x2000000);
         if (kernel_size < 0) {
-            error_report("qemu: could not load kernel'%s'",
+            error_report("Could not load kernel '%s'",
                          machine->kernel_filename);
             exit(1);
         }
@@ -405,7 +405,7 @@ static void ppc_powernv_init(MachineState *machine)
         pnv->initrd_size = load_image_targphys(machine->initrd_filename,
                                   pnv->initrd_base, 0x10000000); /* 128MB max */
         if (pnv->initrd_size < 0) {
-            error_report("qemu: could not load initial ram disk '%s'",
+            error_report("Could not load initial ram disk '%s'",
                          machine->initrd_filename);
             exit(1);
         }
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index d171e60b5c..5f93083d4a 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -62,7 +62,16 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    unsigned int old_pending = env->pending_interrupts;
+    unsigned int old_pending;
+    bool locked = false;
+
+    /* We may already have the BQL if coming from the reset path */
+    if (!qemu_mutex_iothread_locked()) {
+        locked = true;
+        qemu_mutex_lock_iothread();
+    }
+
+    old_pending = env->pending_interrupts;
 
     if (level) {
         env->pending_interrupts |= 1 << n_IRQ;
@@ -80,9 +89,14 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
 #endif
     }
 
+
     LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32
                 "req %08x\n", __func__, env, n_IRQ, level,
                 env->pending_interrupts, CPU(cpu)->interrupt_request);
+
+    if (locked) {
+        qemu_mutex_unlock_iothread();
+    }
 }
 
 /* PowerPC 6xx / 7xx internal IRQ controller */
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index d6d3fc2c4a..d5df94aa6e 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -1881,7 +1881,7 @@ static void ppc405cr_clk_setup (ppc405cr_cpc_t *cpc)
         D1 = (((cpc->pllmr >> 20) - 1) & 0xF) + 1; /* FBDV */
         D2 = 8 - ((cpc->pllmr >> 16) & 0x7); /* FWDVA */
         M = D0 * D1 * D2;
-        VCO_out = cpc->sysclk * M;
+        VCO_out = (uint64_t)cpc->sysclk * M;
         if (VCO_out < 400000000 || VCO_out > 800000000) {
             /* PLL cannot lock */
             cpc->pllmr &= ~0x80000000;
@@ -1892,7 +1892,7 @@ static void ppc405cr_clk_setup (ppc405cr_cpc_t *cpc)
         /* Bypass PLL */
     bypass_pll:
         M = D0;
-        PLL_out = cpc->sysclk * M;
+        PLL_out = (uint64_t)cpc->sysclk * M;
     }
     CPU_clk = PLL_out;
     if (cpc->cr1 & 0x00800000)
@@ -2242,7 +2242,7 @@ static void ppc405ep_compute_clocks (ppc405ep_cpc_t *cpc)
 #ifdef DEBUG_CLOCKS_LL
         printf("FWDA %01" PRIx32 " %d\n", (cpc->pllmr[1] >> 16) & 0x7, D);
 #endif
-        VCO_out = cpc->sysclk * M * D;
+        VCO_out = (uint64_t)cpc->sysclk * M * D;
         if (VCO_out < 500000000UL || VCO_out > 1000000000UL) {
             /* Error - unlock the PLL */
             printf("VCO out of range %" PRIu64 "\n", VCO_out);
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index 683218e5c5..dc19682970 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c
@@ -26,13 +26,7 @@
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
 #include "exec/address-spaces.h"
-
-#undef DEBUG
-#ifdef DEBUG
-#define DPRINTF(fmt, ...) do { printf(fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...)
-#endif /* DEBUG */
+#include "trace.h"
 
 struct PCIMasterMap {
     uint32_t la;
@@ -249,8 +243,7 @@ static int ppc4xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
 {
     int slot = pci_dev->devfn >> 3;
 
-    DPRINTF("%s: devfn %x irq %d -> %d\n", __func__,
-            pci_dev->devfn, irq_num, slot);
+    trace_ppc4xx_pci_map_irq(pci_dev->devfn, irq_num, slot);
 
     return slot - 1;
 }
@@ -259,7 +252,7 @@ static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level)
 {
     qemu_irq *pci_irqs = opaque;
 
-    DPRINTF("%s: PCI irq %d\n", __func__, irq_num);
+    trace_ppc4xx_pci_set_irq(irq_num);
     if (irq_num < 0) {
         fprintf(stderr, "%s: PCI irq %d\n", __func__, irq_num);
         return;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e465d7ac98..81c6c1c27c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -63,6 +63,7 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "hw/nmi.h"
+#include "hw/intc/intc.h"
 
 #include "hw/compat.h"
 #include "qemu/cutils.h"
@@ -95,37 +96,68 @@
 
 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
 
-static XICSState *try_create_xics(const char *type, int nr_servers,
-                                  int nr_irqs, Error **errp)
+static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
+                           const char *type_icp, int nr_servers,
+                           int nr_irqs, Error **errp)
 {
-    Error *err = NULL;
-    DeviceState *dev;
+    XICSFabric *xi = XICS_FABRIC(spapr);
+    Error *err = NULL, *local_err = NULL;
+    ICSState *ics = NULL;
+    int i;
 
-    dev = qdev_create(NULL, type);
-    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
-    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
-    object_property_set_bool(OBJECT(dev), true, "realized", &err);
+    ics = ICS_SIMPLE(object_new(type_ics));
+    qdev_set_parent_bus(DEVICE(ics), sysbus_get_default());
+    object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL);
+    object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err);
+    object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL);
+    object_property_set_bool(OBJECT(ics), true, "realized", &local_err);
+    error_propagate(&err, local_err);
     if (err) {
-        error_propagate(errp, err);
-        object_unparent(OBJECT(dev));
-        return NULL;
+        goto error;
+    }
+
+    spapr->icps = g_malloc0(nr_servers * sizeof(ICPState));
+    spapr->nr_servers = nr_servers;
+
+    for (i = 0; i < nr_servers; i++) {
+        ICPState *icp = &spapr->icps[i];
+
+        object_initialize(icp, sizeof(*icp), type_icp);
+        qdev_set_parent_bus(DEVICE(icp), sysbus_get_default());
+        object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL);
+        object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL);
+        object_property_set_bool(OBJECT(icp), true, "realized", &err);
+        if (err) {
+            goto error;
+        }
+        object_unref(OBJECT(icp));
+    }
+
+    spapr->ics = ics;
+    return 0;
+
+error:
+    error_propagate(errp, err);
+    if (ics) {
+        object_unparent(OBJECT(ics));
     }
-    return XICS_COMMON(dev);
+    return -1;
 }
 
-static XICSState *xics_system_init(MachineState *machine,
-                                   int nr_servers, int nr_irqs, Error **errp)
+static int xics_system_init(MachineState *machine,
+                            int nr_servers, int nr_irqs, Error **errp)
 {
-    XICSState *xics = NULL;
+    int rc = -1;
 
     if (kvm_enabled()) {
         Error *err = NULL;
 
-        if (machine_kernel_irqchip_allowed(machine)) {
-            xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs,
-                                   &err);
+        if (machine_kernel_irqchip_allowed(machine) &&
+            !xics_kvm_init(SPAPR_MACHINE(machine), errp)) {
+            rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM,
+                                 TYPE_KVM_ICP, nr_servers, nr_irqs, &err);
         }
-        if (machine_kernel_irqchip_required(machine) && !xics) {
+        if (machine_kernel_irqchip_required(machine) && rc < 0) {
             error_reportf_err(err,
                               "kernel_irqchip requested but unavailable: ");
         } else {
@@ -133,11 +165,13 @@ static XICSState *xics_system_init(MachineState *machine,
         }
     }
 
-    if (!xics) {
-        xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp);
+    if (rc < 0) {
+        xics_spapr_init(SPAPR_MACHINE(machine), errp);
+        rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE,
+                               TYPE_ICP, nr_servers, nr_irqs, errp);
     }
 
-    return xics;
+    return rc;
 }
 
 static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -924,7 +958,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
     _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
 
     /* /interrupt controller */
-    spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP);
+    spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP);
 
     ret = spapr_populate_memory(spapr, fdt);
     if (ret < 0) {
@@ -958,7 +992,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
         _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
     }
 
-    if (mc->query_hotpluggable_cpus) {
+    if (mc->has_hotpluggable_cpus) {
         int offset = fdt_path_offset(fdt, "/cpus");
         ret = spapr_drc_populate_dt(fdt, offset, NULL,
                                     SPAPR_DR_CONNECTOR_TYPE_CPU);
@@ -1010,6 +1044,9 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
 {
     CPUPPCState *env = &cpu->env;
 
+    /* The TCG path should also be holding the BQL at this point */
+    g_assert(qemu_mutex_iothread_locked());
+
     if (msr_pr) {
         hcall_dprintf("Hypercall made with MSR[PR]=1\n");
         env->gpr[3] = H_PRIVILEGE;
@@ -1050,6 +1087,62 @@ static void close_htab_fd(sPAPRMachineState *spapr)
     spapr->htab_fd = -1;
 }
 
+static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
+}
+
+static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
+                                                hwaddr ptex, int n)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+    hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+
+    if (!spapr->htab) {
+        /*
+         * HTAB is controlled by KVM. Fetch into temporary buffer
+         */
+        ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
+        kvmppc_read_hptes(hptes, ptex, n);
+        return hptes;
+    }
+
+    /*
+     * HTAB is controlled by QEMU. Just point to the internally
+     * accessible PTEG.
+     */
+    return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
+}
+
+static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
+                              const ppc_hash_pte64_t *hptes,
+                              hwaddr ptex, int n)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    if (!spapr->htab) {
+        g_free((void *)hptes);
+    }
+
+    /* Nothing to do for qemu managed HPT */
+}
+
+static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                             uint64_t pte0, uint64_t pte1)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+    hwaddr offset = ptex * HASH_PTE_SIZE_64;
+
+    if (!spapr->htab) {
+        kvmppc_write_hpte(ptex, pte0, pte1);
+    } else {
+        stq_p(spapr->htab + offset, pte0);
+        stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+    }
+}
+
 static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
 {
     int shift;
@@ -1249,6 +1342,13 @@ static int spapr_post_load(void *opaque, int version_id)
     sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
     int err = 0;
 
+    if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
+        int i;
+        for (i = 0; i < spapr->nr_servers; i++) {
+            icp_resend(&spapr->icps[i]);
+        }
+    }
+
     /* In earlier versions, there was no separate qdev for the PAPR
      * RTC, so the RTC offset was stored directly in sPAPREnvironment.
      * So when migrating from those versions, poke the incoming offset
@@ -1751,13 +1851,28 @@ static void spapr_validate_node_memory(MachineState *machine, Error **errp)
     }
 }
 
+/* find cpu slot in machine->possible_cpus by core_id */
+static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
+{
+    int index = id / smp_threads;
+
+    if (index >= ms->possible_cpus->len) {
+        return NULL;
+    }
+    if (idx) {
+        *idx = index;
+    }
+    return &ms->possible_cpus->cpus[index];
+}
+
 static void spapr_init_cpus(sPAPRMachineState *spapr)
 {
     MachineState *machine = MACHINE(spapr);
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     char *type = spapr_get_cpu_core_type(machine->cpu_model);
     int smt = kvmppc_smt_threads();
-    int spapr_max_cores, spapr_cores;
+    const CPUArchIdList *possible_cpus;
+    int boot_cores_nr = smp_cpus / smp_threads;
     int i;
 
     if (!type) {
@@ -1765,7 +1880,8 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
         exit(1);
     }
 
-    if (mc->query_hotpluggable_cpus) {
+    possible_cpus = mc->possible_cpu_arch_ids(machine);
+    if (mc->has_hotpluggable_cpus) {
         if (smp_cpus % smp_threads) {
             error_report("smp_cpus (%u) must be multiple of threads (%u)",
                          smp_cpus, smp_threads);
@@ -1776,24 +1892,18 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
                          max_cpus, smp_threads);
             exit(1);
         }
-
-        spapr_max_cores = max_cpus / smp_threads;
-        spapr_cores = smp_cpus / smp_threads;
     } else {
         if (max_cpus != smp_cpus) {
             error_report("This machine version does not support CPU hotplug");
             exit(1);
         }
-
-        spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads;
-        spapr_cores = spapr_max_cores;
+        boot_cores_nr = possible_cpus->len;
     }
 
-    spapr->cores = g_new0(Object *, spapr_max_cores);
-    for (i = 0; i < spapr_max_cores; i++) {
+    for (i = 0; i < possible_cpus->len; i++) {
         int core_id = i * smp_threads;
 
-        if (mc->query_hotpluggable_cpus) {
+        if (mc->has_hotpluggable_cpus) {
             sPAPRDRConnector *drc =
                 spapr_dr_connector_new(OBJECT(spapr),
                                        SPAPR_DR_CONNECTOR_TYPE_CPU,
@@ -1802,7 +1912,7 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
             qemu_register_reset(spapr_drc_reset, drc);
         }
 
-        if (i < spapr_cores) {
+        if (i < boot_cores_nr) {
             Object *core  = object_new(type);
             int nr_threads = smp_threads;
 
@@ -1889,9 +1999,8 @@ static void ppc_spapr_init(MachineState *machine)
     load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
 
     /* Set up Interrupt Controller before we create the VCPUs */
-    spapr->xics = xics_system_init(machine,
-                                   DIV_ROUND_UP(max_cpus * smt, smp_threads),
-                                   XICS_IRQS_SPAPR, &error_fatal);
+    xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads),
+                     XICS_IRQS_SPAPR, &error_fatal);
 
     /* Set up containers for ibm,client-set-architecture negotiated options */
     spapr->ov5 = spapr_ovec_new();
@@ -2357,6 +2466,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     uint64_t align = memory_region_get_alignment(mr);
     uint64_t size = memory_region_size(mr);
     uint64_t addr;
+    char *mem_dev;
 
     if (size % SPAPR_MEMORY_BLOCK_SIZE) {
         error_setg(&local_err, "Hotplugged memory size must be a multiple of "
@@ -2364,6 +2474,13 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
+    mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL);
+    if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) {
+        error_setg(&local_err, "Memory backend has bad page size. "
+                   "Use 'memory-backend-file' with correct mem-path.");
+        goto out;
+    }
+
     pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
     if (local_err) {
         goto out;
@@ -2488,6 +2605,165 @@ void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset,
     return fdt;
 }
 
+static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                              Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    CPUCore *cc = CPU_CORE(dev);
+    CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
+
+    core_slot->cpu = NULL;
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_core_release(DeviceState *dev, void *opaque)
+{
+    HotplugHandler *hotplug_ctrl;
+
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+}
+
+static
+void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
+                               Error **errp)
+{
+    int index;
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    Error *local_err = NULL;
+    CPUCore *cc = CPU_CORE(dev);
+    int smt = kvmppc_smt_threads();
+
+    if (!spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index)) {
+        error_setg(errp, "Unable to find CPU core with core-id: %d",
+                   cc->core_id);
+        return;
+    }
+    if (index == 0) {
+        error_setg(errp, "Boot CPU core may not be unplugged");
+        return;
+    }
+
+    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
+    g_assert(drc);
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    drck->detach(drc, dev, spapr_core_release, NULL, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    spapr_hotplug_req_remove_by_index(drc);
+}
+
+static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                            Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+    MachineClass *mc = MACHINE_GET_CLASS(spapr);
+    sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
+    CPUCore *cc = CPU_CORE(dev);
+    CPUState *cs = CPU(core->threads);
+    sPAPRDRConnector *drc;
+    Error *local_err = NULL;
+    void *fdt = NULL;
+    int fdt_offset = 0;
+    int smt = kvmppc_smt_threads();
+    CPUArchId *core_slot;
+    int index;
+
+    core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
+    if (!core_slot) {
+        error_setg(errp, "Unable to find CPU core with core-id: %d",
+                   cc->core_id);
+        return;
+    }
+    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
+
+    g_assert(drc || !mc->has_hotpluggable_cpus);
+
+    /*
+     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
+     * coldplugged CPUs DT entries are setup in spapr_build_fdt().
+     */
+    if (dev->hotplugged) {
+        fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
+    }
+
+    if (drc) {
+        sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+        drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, &local_err);
+        if (local_err) {
+            g_free(fdt);
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+    if (dev->hotplugged) {
+        /*
+         * Send hotplug notification interrupt to the guest only in case
+         * of hotplugged CPUs.
+         */
+        spapr_hotplug_req_add_by_index(drc);
+    } else {
+        /*
+         * Set the right DRC states for cold plugged CPU.
+         */
+        if (drc) {
+            sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
+            drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
+        }
+    }
+    core_slot->cpu = OBJECT(dev);
+}
+
+static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp)
+{
+    MachineState *machine = MACHINE(OBJECT(hotplug_dev));
+    MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
+    Error *local_err = NULL;
+    CPUCore *cc = CPU_CORE(dev);
+    char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
+    const char *type = object_get_typename(OBJECT(dev));
+    CPUArchId *core_slot;
+    int index;
+
+    if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
+        error_setg(&local_err, "CPU hotplug not supported for this machine");
+        goto out;
+    }
+
+    if (strcmp(base_core_type, type)) {
+        error_setg(&local_err, "CPU core type should be %s", base_core_type);
+        goto out;
+    }
+
+    if (cc->core_id % smp_threads) {
+        error_setg(&local_err, "invalid core id %d", cc->core_id);
+        goto out;
+    }
+
+    core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
+    if (!core_slot) {
+        error_setg(&local_err, "core id %d out of range", cc->core_id);
+        goto out;
+    }
+
+    if (core_slot->cpu) {
+        error_setg(&local_err, "core %d already populated", cc->core_id);
+        goto out;
+    }
+
+out:
+    g_free(base_core_type);
+    error_propagate(errp, local_err);
+}
+
 static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp)
 {
@@ -2550,7 +2826,7 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
             error_setg(errp, "Memory hot unplug not supported for this guest");
         }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
-        if (!mc->query_hotpluggable_cpus) {
+        if (!mc->has_hotpluggable_cpus) {
             error_setg(errp, "CPU hot unplug not supported on this machine");
             return;
         }
@@ -2577,11 +2853,11 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
             error_setg(errp, "Memory hot unplug not supported for this guest");
         }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
-        if (!mc->query_hotpluggable_cpus) {
+        if (!mc->has_hotpluggable_cpus) {
             error_setg(errp, "CPU hot unplug not supported on this machine");
             return;
         }
-        spapr_core_unplug(hotplug_dev, dev, errp);
+        spapr_core_unplug_request(hotplug_dev, dev, errp);
     }
 }
 
@@ -2610,35 +2886,34 @@ static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
     return cpu_index / smp_threads / smp_cores;
 }
 
-static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine)
+static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
 {
     int i;
-    HotpluggableCPUList *head = NULL;
-    sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     int spapr_max_cores = max_cpus / smp_threads;
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
 
-    for (i = 0; i < spapr_max_cores; i++) {
-        HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
-        HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
-        CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1);
+    if (!mc->has_hotpluggable_cpus) {
+        spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads;
+    }
+    if (machine->possible_cpus) {
+        assert(machine->possible_cpus->len == spapr_max_cores);
+        return machine->possible_cpus;
+    }
 
-        cpu_item->type = spapr_get_cpu_core_type(machine->cpu_model);
-        cpu_item->vcpus_count = smp_threads;
-        cpu_props->has_core_id = true;
-        cpu_props->core_id = i * smp_threads;
+    machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+                             sizeof(CPUArchId) * spapr_max_cores);
+    machine->possible_cpus->len = spapr_max_cores;
+    for (i = 0; i < machine->possible_cpus->len; i++) {
+        int core_id = i * smp_threads;
+
+        machine->possible_cpus->cpus[i].vcpus_count = smp_threads;
+        machine->possible_cpus->cpus[i].arch_id = core_id;
+        machine->possible_cpus->cpus[i].props.has_core_id = true;
+        machine->possible_cpus->cpus[i].props.core_id = core_id;
         /* TODO: add 'has_node/node' here to describe
            to which node core belongs */
-
-        cpu_item->props = cpu_props;
-        if (spapr->cores[i]) {
-            cpu_item->has_qom_path = true;
-            cpu_item->qom_path = object_get_canonical_path(spapr->cores[i]);
-        }
-        list_item->value = cpu_item;
-        list_item->next = head;
-        head = list_item;
     }
-    return head;
+    return machine->possible_cpus;
 }
 
 static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
@@ -2693,6 +2968,40 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
     *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
 }
 
+static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+    return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
+}
+
+static void spapr_ics_resend(XICSFabric *dev)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+    ics_resend(spapr->ics);
+}
+
+static ICPState *spapr_icp_get(XICSFabric *xi, int server)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+
+    return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL;
+}
+
+static void spapr_pic_print_info(InterruptStatsProvider *obj,
+                                 Monitor *mon)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+    int i;
+
+    for (i = 0; i < spapr->nr_servers; i++) {
+        icp_pic_print_info(&spapr->icps[i], mon);
+    }
+
+    ics_pic_print_info(spapr->ics, mon);
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2701,6 +3010,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     NMIClass *nc = NMI_CLASS(oc);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
     PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+    XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+    InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
 
     mc->desc = "pSeries Logical Partition (PAPR compliant)";
 
@@ -2712,7 +3023,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->init = ppc_spapr_init;
     mc->reset = ppc_spapr_reset;
     mc->block_default_type = IF_SCSI;
-    mc->max_cpus = 255;
+    mc->max_cpus = 1024;
     mc->no_parallel = 1;
     mc->default_boot_order = "";
     mc->default_ram_size = 512 * M_BYTE;
@@ -2724,15 +3035,24 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     hc->plug = spapr_machine_device_plug;
     hc->unplug = spapr_machine_device_unplug;
     mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
+    mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
     hc->unplug_request = spapr_machine_device_unplug_request;
 
     smc->dr_lmb_enabled = true;
     smc->tcg_default_cpu = "POWER8";
-    mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
+    mc->has_hotpluggable_cpus = true;
     fwc->get_dev_path = spapr_get_fw_dev_path;
     nc->nmi_monitor_handler = spapr_nmi;
     smc->phb_placement = spapr_phb_placement;
     vhc->hypercall = emulate_spapr_hypercall;
+    vhc->hpt_mask = spapr_hpt_mask;
+    vhc->map_hptes = spapr_map_hptes;
+    vhc->unmap_hptes = spapr_unmap_hptes;
+    vhc->store_hpte = spapr_store_hpte;
+    xic->ics_get = spapr_ics_get;
+    xic->ics_resend = spapr_ics_resend;
+    xic->icp_get = spapr_icp_get;
+    ispc->print_info = spapr_pic_print_info;
 }
 
 static const TypeInfo spapr_machine_info = {
@@ -2749,6 +3069,8 @@ static const TypeInfo spapr_machine_info = {
         { TYPE_NMI },
         { TYPE_HOTPLUG_HANDLER },
         { TYPE_PPC_VIRTUAL_HYPERVISOR },
+        { TYPE_XICS_FABRIC },
+        { TYPE_INTERRUPT_STATS_PROVIDER },
         { }
     },
 };
@@ -2928,7 +3250,7 @@ static void spapr_machine_2_6_instance_options(MachineState *machine)
 static void spapr_machine_2_6_class_options(MachineClass *mc)
 {
     spapr_machine_2_7_class_options(mc);
-    mc->query_hotpluggable_cpus = NULL;
+    mc->has_hotpluggable_cpus = false;
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
 }
 
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 9dddaeb3fa..90d682fe33 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -13,10 +13,12 @@
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
 #include "target/ppc/kvm_ppc.h"
 #include "hw/ppc/ppc.h"
 #include "target/ppc/mmu-hash64.h"
 #include "sysemu/numa.h"
+#include "qemu/error-report.h"
 
 static void spapr_cpu_reset(void *opaque)
 {
@@ -34,15 +36,26 @@ static void spapr_cpu_reset(void *opaque)
 
     env->spr[SPR_HIOR] = 0;
 
-    ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
-                                &error_fatal);
+    /*
+     * This is a hack for the benefit of KVM PR - it abuses the SDR1
+     * slot in kvm_sregs to communicate the userspace address of the
+     * HPT
+     */
+    if (kvm_enabled()) {
+        env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab
+            | (spapr->htab_shift - 18);
+        if (kvmppc_put_books_sregs(cpu) < 0) {
+            error_report("Unable to update SDR1 in KVM");
+            exit(1);
+        }
+    }
 }
 
 static void spapr_cpu_destroy(PowerPCCPU *cpu)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    xics_cpu_destroy(spapr->xics, cpu);
+    xics_cpu_destroy(XICS_FABRIC(spapr), cpu);
     qemu_unregister_reset(spapr_cpu_reset, cpu);
 }
 
@@ -57,8 +70,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
     cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
 
     /* Enable PAPR mode in TCG or KVM */
-    cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
-    cpu_ppc_set_papr(cpu);
+    cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
 
     if (cpu->max_compat) {
         Error *local_err = NULL;
@@ -76,7 +88,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
             cs->numa_node = i;
     }
 
-    xics_cpu_setup(spapr->xics, cpu);
+    xics_cpu_setup(XICS_FABRIC(spapr), cpu);
 
     qemu_register_reset(spapr_cpu_reset, cpu);
     spapr_cpu_reset(cpu);
@@ -109,13 +121,12 @@ char *spapr_get_cpu_core_type(const char *model)
     return core_type;
 }
 
-static void spapr_core_release(DeviceState *dev, void *opaque)
+static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp)
 {
     sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
     sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(dev));
     const char *typename = object_class_get_name(scc->cpu_class);
     size_t size = object_type_get_instance_size(typename);
-    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     CPUCore *cc = CPU_CORE(dev);
     int i;
 
@@ -129,140 +140,7 @@ static void spapr_core_release(DeviceState *dev, void *opaque)
         cpu_remove_sync(cs);
         object_unparent(obj);
     }
-
-    spapr->cores[cc->core_id / smp_threads] = NULL;
-
     g_free(sc->threads);
-    object_unparent(OBJECT(dev));
-}
-
-void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                       Error **errp)
-{
-    CPUCore *cc = CPU_CORE(dev);
-    int smt = kvmppc_smt_threads();
-    int index = cc->core_id / smp_threads;
-    sPAPRDRConnector *drc =
-        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
-    sPAPRDRConnectorClass *drck;
-    Error *local_err = NULL;
-
-    if (index == 0) {
-        error_setg(errp, "Boot CPU core may not be unplugged");
-        return;
-    }
-
-    g_assert(drc);
-
-    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    drck->detach(drc, dev, spapr_core_release, NULL, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    spapr_hotplug_req_remove_by_index(drc);
-}
-
-void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                     Error **errp)
-{
-    sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
-    MachineClass *mc = MACHINE_GET_CLASS(spapr);
-    sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
-    CPUCore *cc = CPU_CORE(dev);
-    CPUState *cs = CPU(core->threads);
-    sPAPRDRConnector *drc;
-    Error *local_err = NULL;
-    void *fdt = NULL;
-    int fdt_offset = 0;
-    int index = cc->core_id / smp_threads;
-    int smt = kvmppc_smt_threads();
-
-    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
-    spapr->cores[index] = OBJECT(dev);
-
-    g_assert(drc || !mc->query_hotpluggable_cpus);
-
-    /*
-     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
-     * coldplugged CPUs DT entries are setup in spapr_build_fdt().
-     */
-    if (dev->hotplugged) {
-        fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
-    }
-
-    if (drc) {
-        sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-        drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, &local_err);
-        if (local_err) {
-            g_free(fdt);
-            spapr->cores[index] = NULL;
-            error_propagate(errp, local_err);
-            return;
-        }
-    }
-
-    if (dev->hotplugged) {
-        /*
-         * Send hotplug notification interrupt to the guest only in case
-         * of hotplugged CPUs.
-         */
-        spapr_hotplug_req_add_by_index(drc);
-    } else {
-        /*
-         * Set the right DRC states for cold plugged CPU.
-         */
-        if (drc) {
-            sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
-            drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
-        }
-    }
-}
-
-void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                         Error **errp)
-{
-    MachineState *machine = MACHINE(OBJECT(hotplug_dev));
-    MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
-    sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
-    int spapr_max_cores = max_cpus / smp_threads;
-    int index;
-    Error *local_err = NULL;
-    CPUCore *cc = CPU_CORE(dev);
-    char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
-    const char *type = object_get_typename(OBJECT(dev));
-
-    if (dev->hotplugged && !mc->query_hotpluggable_cpus) {
-        error_setg(&local_err, "CPU hotplug not supported for this machine");
-        goto out;
-    }
-
-    if (strcmp(base_core_type, type)) {
-        error_setg(&local_err, "CPU core type should be %s", base_core_type);
-        goto out;
-    }
-
-    if (cc->core_id % smp_threads) {
-        error_setg(&local_err, "invalid core id %d", cc->core_id);
-        goto out;
-    }
-
-    index = cc->core_id / smp_threads;
-    if (index < 0 || index >= spapr_max_cores) {
-        error_setg(&local_err, "core id %d out of range", cc->core_id);
-        goto out;
-    }
-
-    if (spapr->cores[index]) {
-        error_setg(&local_err, "core %d already populated", cc->core_id);
-        goto out;
-    }
-
-out:
-    g_free(base_core_type);
-    error_propagate(errp, local_err);
 }
 
 static void spapr_cpu_core_realize_child(Object *child, Error **errp)
@@ -368,6 +246,7 @@ void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
     sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
 
     dc->realize = spapr_cpu_core_realize;
+    dc->unrealize = spapr_cpu_core_unrealizefn;
     scc->cpu_class = cpu_class_by_name(TYPE_POWERPC_CPU, data);
     g_assert(scc->cpu_class);
 }
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index f85a9c32a7..24a5758e62 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -481,7 +481,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
 
     rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
                                                        RTAS_LOG_TYPE_EPOW)));
 }
@@ -574,7 +574,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
 
     rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
                                                        RTAS_LOG_TYPE_HOTPLUG)));
 }
@@ -695,7 +695,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                 spapr_event_sources_get_source(spapr->event_sources, i);
 
             g_assert(source->enabled);
-            qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq));
+            qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), source->irq));
         }
     }
 
@@ -752,7 +752,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
     spapr->event_sources = spapr_event_sources_new();
 
     spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
-                                 xics_spapr_alloc(spapr->xics, 0, false,
+                                 spapr_ics_alloc(spapr->ics, 0, false,
                                                   &error_fatal));
 
     /* NOTE: if machine supports modern/dedicated hotplug event source,
@@ -765,7 +765,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
      */
     if (spapr->use_hotplug_event_source) {
         spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
-                                     xics_spapr_alloc(spapr->xics, 0, false,
+                                     spapr_ics_alloc(spapr->ics, 0, false,
                                                       &error_fatal));
     }
 
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 42d20e0b92..f05a90ed2c 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -47,12 +47,12 @@ static bool has_spr(PowerPCCPU *cpu, int spr)
     return cpu->env.spr_cb[spr].name != NULL;
 }
 
-static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index)
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
 {
     /*
-     * hash value/pteg group index is normalized by htab_mask
+     * hash value/pteg group index is normalized by HPT mask
      */
-    if (((pte_index & ~7ULL) / HPTES_PER_GROUP) & ~env->htab_mask) {
+    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
         return false;
     }
     return true;
@@ -77,15 +77,14 @@ static bool is_ram_address(sPAPRMachineState *spapr, hwaddr addr)
 static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
-    CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong pteh = args[2];
     target_ulong ptel = args[3];
     unsigned apshift;
     target_ulong raddr;
-    target_ulong index;
-    uint64_t token;
+    target_ulong slot;
+    const ppc_hash_pte64_t *hptes;
 
     apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
     if (!apshift) {
@@ -116,36 +115,36 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     pteh &= ~0x60ULL;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
-    index = 0;
+    slot = ptex & 7ULL;
+    ptex = ptex & ~7ULL;
+
     if (likely((flags & H_EXACT) == 0)) {
-        pte_index &= ~7ULL;
-        token = ppc_hash64_start_access(cpu, pte_index);
-        for (; index < 8; index++) {
-            if (!(ppc_hash64_load_hpte0(cpu, token, index) & HPTE64_V_VALID)) {
+        hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+        for (slot = 0; slot < 8; slot++) {
+            if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
                 break;
             }
         }
-        ppc_hash64_stop_access(cpu, token);
-        if (index == 8) {
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+        if (slot == 8) {
             return H_PTEG_FULL;
         }
     } else {
-        token = ppc_hash64_start_access(cpu, pte_index);
-        if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
-            ppc_hash64_stop_access(cpu, token);
+        hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+        if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+            ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
             return H_PTEG_FULL;
         }
-        ppc_hash64_stop_access(cpu, token);
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
     }
 
-    ppc_hash64_store_hpte(cpu, pte_index + index,
-                          pteh | HPTE64_V_HPTE_DIRTY, ptel);
+    ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
 
-    args[0] = pte_index + index;
+    args[0] = ptex + slot;
     return H_SUCCESS;
 }
 
@@ -161,18 +160,17 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
                                 target_ulong flags,
                                 target_ulong *vp, target_ulong *rp)
 {
-    CPUPPCState *env = &cpu->env;
-    uint64_t token;
+    const ppc_hash_pte64_t *hptes;
     target_ulong v, r;
 
-    if (!valid_pte_index(env, ptex)) {
+    if (!valid_ptex(cpu, ptex)) {
         return REMOVE_PARM;
     }
 
-    token = ppc_hash64_start_access(cpu, ptex);
-    v = ppc_hash64_load_hpte0(cpu, token, 0);
-    r = ppc_hash64_load_hpte1(cpu, token, 0);
-    ppc_hash64_stop_access(cpu, token);
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -191,11 +189,11 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong avpn = args[2];
     RemoveResult ret;
 
-    ret = remove_hpte(cpu, pte_index, avpn, flags,
+    ret = remove_hpte(cpu, ptex, avpn, flags,
                       &args[0], &args[1]);
 
     switch (ret) {
@@ -291,19 +289,19 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong avpn = args[2];
-    uint64_t token;
+    const ppc_hash_pte64_t *hptes;
     target_ulong v, r;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
-    token = ppc_hash64_start_access(cpu, pte_index);
-    v = ppc_hash64_load_hpte0(cpu, token, 0);
-    r = ppc_hash64_load_hpte1(cpu, token, 0);
-    ppc_hash64_stop_access(cpu, token);
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
@@ -315,36 +313,35 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     r |= (flags << 55) & HPTE64_R_PP0;
     r |= (flags << 48) & HPTE64_R_KEY_HI;
     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
-    ppc_hash64_store_hpte(cpu, pte_index,
+    ppc_hash64_store_hpte(cpu, ptex,
                           (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
-    ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
     /* Flush the tlb */
     check_tlb_flush(env, true);
     /* Don't need a memory barrier, due to qemu's global lock */
-    ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
+    ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
     return H_SUCCESS;
 }
 
 static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
-    CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     uint8_t *hpte;
     int i, ridx, n_entries = 1;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
     if (flags & H_READ_4) {
         /* Clear the two low order bits */
-        pte_index &= ~(3ULL);
+        ptex &= ~(3ULL);
         n_entries = 4;
     }
 
-    hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
+    hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64);
 
     for (i = 0, ridx = 0; i < n_entries; i++) {
         args[ridx++] = ldq_p(hpte);
diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
index 3eb1d5976f..41df4c35ba 100644
--- a/hw/ppc/spapr_ovec.c
+++ b/hw/ppc/spapr_ovec.c
@@ -16,18 +16,9 @@
 #include "qemu/bitmap.h"
 #include "exec/address-spaces.h"
 #include "qemu/error-report.h"
+#include "trace.h"
 #include <libfdt.h>
 
-/* #define DEBUG_SPAPR_OVEC */
-
-#ifdef DEBUG_SPAPR_OVEC
-#define DPRINTFN(fmt, ...) \
-    do { fprintf(stderr, fmt "\n", ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTFN(fmt, ...) \
-    do { } while (0)
-#endif
-
 #define OV_MAXBYTES 256 /* not including length byte */
 #define OV_MAXBITS (OV_MAXBYTES * BITS_PER_BYTE)
 
@@ -210,8 +201,7 @@ sPAPROptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector)
     for (i = 0; i < vector_len; i++) {
         uint8_t entry = ldub_phys(&address_space_memory, addr + i);
         if (entry) {
-            DPRINTFN("read guest vector %2d, byte %3d / %3d: 0x%.2x",
-                     vector, i + 1, vector_len, entry);
+            trace_spapr_ovec_parse_vector(vector, i + 1, vector_len, entry);
             guest_byte_to_bitmap(entry, ov->bitmap, i * BITS_PER_BYTE);
         }
     }
@@ -245,10 +235,9 @@ int spapr_ovec_populate_dt(void *fdt, int fdt_offset,
     for (i = 1; i < vec_len + 1; i++) {
         vec[i] = guest_byte_from_bitmap(ov->bitmap, (i - 1) * BITS_PER_BYTE);
         if (vec[i]) {
-            DPRINTFN("encoding guest vector byte %3d / %3d: 0x%.2x",
-                     i, vec_len, vec[i]);
+            trace_spapr_ovec_populate_dt(i, vec_len, vec[i]);
         }
     }
 
-    return fdt_setprop(fdt, fdt_offset, name, vec, vec_len);
+    return fdt_setprop(fdt, fdt_offset, name, vec, vec_len + 1);
 }
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index fd6fc1d953..2a3499eaf8 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -43,6 +43,7 @@
 
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_ids.h"
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/kvm.h"
@@ -325,7 +326,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             return;
         }
 
-        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+        spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
         if (msi_present(pdev)) {
             spapr_msi_setmsg(pdev, 0, false, 0, 0);
         }
@@ -363,7 +364,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
+    irq = spapr_ics_alloc_block(spapr->ics, req_num, false,
                            ret_intr_type == RTAS_TYPE_MSI, &err);
     if (err) {
         error_reportf_err(err, "Can't allocate MSIs for device %x: ",
@@ -374,7 +375,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     /* Release previous MSIs */
     if (msi) {
-        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+        spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
         g_hash_table_remove(phb->msi, &config_addr);
     }
 
@@ -736,7 +737,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
 
     trace_spapr_pci_msi_write(addr, data, irq);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, irq));
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), irq));
 }
 
 static const MemoryRegionOps spapr_msi_ops = {
@@ -946,6 +947,274 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
     rp->assigned_len = assigned_idx * sizeof(ResourceFields);
 }
 
+typedef struct PCIClass PCIClass;
+typedef struct PCISubClass PCISubClass;
+typedef struct PCIIFace PCIIFace;
+
+struct PCIIFace {
+    int iface;
+    const char *name;
+};
+
+struct PCISubClass {
+    int subclass;
+    const char *name;
+    const PCIIFace *iface;
+};
+
+struct PCIClass {
+    const char *name;
+    const PCISubClass *subc;
+};
+
+static const PCISubClass undef_subclass[] = {
+    { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mass_subclass[] = {
+    { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
+    { PCI_CLASS_STORAGE_IDE, "ide", NULL },
+    { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
+    { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
+    { PCI_CLASS_STORAGE_RAID, "raid", NULL },
+    { PCI_CLASS_STORAGE_ATA, "ata", NULL },
+    { PCI_CLASS_STORAGE_SATA, "sata", NULL },
+    { PCI_CLASS_STORAGE_SAS, "sas", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass net_subclass[] = {
+    { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
+    { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
+    { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
+    { PCI_CLASS_NETWORK_ATM, "atm", NULL },
+    { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
+    { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
+    { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass displ_subclass[] = {
+    { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
+    { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
+    { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass media_subclass[] = {
+    { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
+    { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
+    { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mem_subclass[] = {
+    { PCI_CLASS_MEMORY_RAM, "memory", NULL },
+    { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass bridg_subclass[] = {
+    { PCI_CLASS_BRIDGE_HOST, "host", NULL },
+    { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
+    { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
+    { PCI_CLASS_BRIDGE_MC, "mca", NULL },
+    { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
+    { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
+    { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
+    { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
+    { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
+    { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
+    { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass comm_subclass[] = {
+    { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
+    { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
+    { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
+    { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
+    { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
+    { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
+    { 0xFF, NULL, NULL, },
+};
+
+static const PCIIFace pic_iface[] = {
+    { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
+    { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
+    { 0xFF, NULL },
+};
+
+static const PCISubClass sys_subclass[] = {
+    { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
+    { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
+    { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
+    { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
+    { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
+    { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass inp_subclass[] = {
+    { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
+    { PCI_CLASS_INPUT_PEN, "pen", NULL },
+    { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
+    { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
+    { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass dock_subclass[] = {
+    { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass cpu_subclass[] = {
+    { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
+    { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
+    { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
+    { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCIIFace usb_iface[] = {
+    { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
+    { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
+    { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
+    { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
+    { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
+    { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
+    { 0xFF, NULL },
+};
+
+static const PCISubClass ser_subclass[] = {
+    { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
+    { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
+    { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
+    { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
+    { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
+    { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
+    { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
+    { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
+    { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
+    { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass wrl_subclass[] = {
+    { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
+    { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
+    { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
+    { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
+    { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass sat_subclass[] = {
+    { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
+    { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
+    { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
+    { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass crypt_subclass[] = {
+    { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
+    { PCI_CLASS_CRYPT_ENTERTAINMENT,
+      "entertainment-encryption", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass spc_subclass[] = {
+    { PCI_CLASS_SP_DPIO, "dpio", NULL },
+    { PCI_CLASS_SP_PERF, "counter", NULL },
+    { PCI_CLASS_SP_SYNCH, "measurement", NULL },
+    { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCIClass pci_classes[] = {
+    { "legacy-device", undef_subclass },
+    { "mass-storage",  mass_subclass },
+    { "network", net_subclass },
+    { "display", displ_subclass, },
+    { "multimedia-device", media_subclass },
+    { "memory-controller", mem_subclass },
+    { "unknown-bridge", bridg_subclass },
+    { "communication-controller", comm_subclass},
+    { "system-peripheral", sys_subclass },
+    { "input-controller", inp_subclass },
+    { "docking-station", dock_subclass },
+    { "cpu", cpu_subclass },
+    { "serial-bus", ser_subclass },
+    { "wireless-controller", wrl_subclass },
+    { "intelligent-io", NULL },
+    { "satellite-device", sat_subclass },
+    { "encryption", crypt_subclass },
+    { "data-processing-controller", spc_subclass },
+};
+
+static const char *pci_find_device_name(uint8_t class, uint8_t subclass,
+                                        uint8_t iface)
+{
+    const PCIClass *pclass;
+    const PCISubClass *psubclass;
+    const PCIIFace *piface;
+    const char *name;
+
+    if (class >= ARRAY_SIZE(pci_classes)) {
+        return "pci";
+    }
+
+    pclass = pci_classes + class;
+    name = pclass->name;
+
+    if (pclass->subc == NULL) {
+        return name;
+    }
+
+    psubclass = pclass->subc;
+    while ((psubclass->subclass & 0xff) != 0xff) {
+        if ((psubclass->subclass & 0xff) == subclass) {
+            name = psubclass->name;
+            break;
+        }
+        psubclass++;
+    }
+
+    piface = psubclass->iface;
+    if (piface == NULL) {
+        return name;
+    }
+    while ((piface->iface & 0xff) != 0xff) {
+        if ((piface->iface & 0xff) == iface) {
+            name = piface->name;
+            break;
+        }
+        piface++;
+    }
+
+    return name;
+}
+
+static void pci_get_node_name(char *nodename, int len, PCIDevice *dev)
+{
+    int slot = PCI_SLOT(dev->devfn);
+    int func = PCI_FUNC(dev->devfn);
+    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+    const char *name;
+
+    name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
+                                ccode & 0xff);
+
+    if (func != 0) {
+        snprintf(nodename, len, "%s@%x,%x", name, slot, func);
+    } else {
+        snprintf(nodename, len, "%s@%x", name, slot);
+    }
+}
+
 static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
                                             PCIDevice *pdev);
 
@@ -957,6 +1226,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
     int pci_status, err;
     char *buf = NULL;
     uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
+    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
     uint32_t max_msi, max_msix;
 
     if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
@@ -971,8 +1241,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
                           pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
     _FDT(fdt_setprop_cell(fdt, offset, "revision-id",
                           pci_default_read_config(dev, PCI_REVISION_ID, 1)));
-    _FDT(fdt_setprop_cell(fdt, offset, "class-code",
-                          pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
+    _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
     if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
         _FDT(fdt_setprop_cell(fdt, offset, "interrupts",
                  pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
@@ -1013,11 +1282,10 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
         _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
     }
 
-    /* NOTE: this is normally generated by firmware via path/unit name,
-     * but in our case we must set it manually since it does not get
-     * processed by OF beforehand
-     */
-    _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
+    _FDT(fdt_setprop_string(fdt, offset, "name",
+                            pci_find_device_name((ccode >> 16) & 0xff,
+                                                 (ccode >> 8) & 0xff,
+                                                 ccode & 0xff)));
     buf = spapr_phb_get_loc_code(sphb, dev);
     if (!buf) {
         error_report("Failed setting the ibm,loc-code");
@@ -1061,15 +1329,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
                                      void *fdt, int node_offset)
 {
     int offset, ret;
-    int slot = PCI_SLOT(dev->devfn);
-    int func = PCI_FUNC(dev->devfn);
     char nodename[FDT_NAME_MAX];
 
-    if (func != 0) {
-        snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
-    } else {
-        snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
-    }
+    pci_get_node_name(nodename, FDT_NAME_MAX, dev);
     offset = fdt_add_subnode(fdt, node_offset, nodename);
     ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
 
@@ -1485,7 +1747,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
         uint32_t irq;
         Error *local_err = NULL;
 
-        irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err);
+        irq = spapr_ics_alloc_block(spapr->ics, 1, true, false, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             error_prepend(errp, "can't allocate LSIs: ");
@@ -1782,9 +2044,9 @@ static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
     s_fdt.fdt = p->fdt;
     s_fdt.node_off = offset;
     s_fdt.sphb = p->sphb;
-    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                        spapr_populate_pci_devices_dt,
-                        &s_fdt);
+    pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus),
+                                spapr_populate_pci_devices_dt,
+                                &s_fdt);
 }
 
 static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
@@ -1953,9 +2215,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     s_fdt.fdt = fdt;
     s_fdt.node_off = bus_off;
     s_fdt.sphb = phb;
-    pci_for_each_device(bus, pci_bus_num(bus),
-                        spapr_populate_pci_devices_dt,
-                        &s_fdt);
+    pci_for_each_device_reverse(bus, pci_bus_num(bus),
+                                spapr_populate_pci_devices_dt,
+                                &s_fdt);
 
     ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
                                 SPAPR_DR_CONNECTOR_TYPE_PCI);
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 8bfc5f971f..a0ee4fd265 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -454,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
         dev->qdev.id = id;
     }
 
-    dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err);
+    dev->irq = spapr_ics_alloc(spapr->ics, dev->irq, false, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index f46995cdb2..43d265f351 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -56,6 +56,10 @@ spapr_drc_realize_child(uint32_t index, char *childname) "drc: 0x%"PRIx32", chil
 spapr_drc_realize_complete(uint32_t index) "drc: 0x%"PRIx32
 spapr_drc_unrealize(uint32_t index) "drc: 0x%"PRIx32
 
+# hw/ppc/spapr_ovec.c
+spapr_ovec_parse_vector(int vector, int byte, uint16_t vec_len, uint8_t entry) "read guest vector %2d, byte %3d / %3d: 0x%.2x"
+spapr_ovec_populate_dt(int byte, uint16_t vec_len, uint8_t entry) "encoding guest vector byte %3d / %3d: 0x%.2x"
+
 # hw/ppc/spapr_rtas.c
 spapr_rtas_set_indicator_invalid(uint32_t index) "sensor index: 0x%"PRIx32
 spapr_rtas_set_indicator_not_supported(uint32_t index, uint32_t type) "sensor index: 0x%"PRIx32", type: %"PRIu32
@@ -85,3 +89,11 @@ rs6000mc_presence_read(uint32_t addr, uint32_t val) "read addr=%x val=%x"
 rs6000mc_size_read(uint32_t addr, uint32_t val) "read addr=%x val=%x"
 rs6000mc_size_write(uint32_t addr, uint32_t val) "write addr=%x val=%x"
 rs6000mc_parity_read(uint32_t addr, uint32_t val) "read addr=%x val=%x"
+
+# hw/ppc/mac_newworld.c
+mac99_uninorth_write(uint64_t addr, uint64_t value) "addr=0x%" PRIx64 " val=0x%"PRIx64
+mac99_uninorth_read(uint64_t addr, uint64_t value) "addr=0x%" PRIx64 " val=0x%"PRIx64
+
+# hw/ppc/ppc4xx_pci.c
+ppc4xx_pci_map_irq(int32_t devfn, int irq_num, int slot) "devfn %x irq %d -> %d"
+ppc4xx_pci_set_irq(int irq_num) "PCI irq %d"
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 0f2580d644..e32b2a4d42 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -368,13 +368,16 @@ static CCW1 copy_ccw_from_guest(hwaddr addr, bool fmt1)
         ret.cda = be32_to_cpu(tmp1.cda);
     } else {
         cpu_physical_memory_read(addr, &tmp0, sizeof(tmp0));
-        ret.cmd_code = tmp0.cmd_code;
-        ret.flags = tmp0.flags;
-        ret.count = be16_to_cpu(tmp0.count);
-        ret.cda = be16_to_cpu(tmp0.cda1) | (tmp0.cda0 << 16);
-        if ((ret.cmd_code & 0x0f) == CCW_CMD_TIC) {
-            ret.cmd_code &= 0x0f;
+        if ((tmp0.cmd_code & 0x0f) == CCW_CMD_TIC) {
+            ret.cmd_code = CCW_CMD_TIC;
+            ret.flags = 0;
+            ret.count = 0;
+        } else {
+            ret.cmd_code = tmp0.cmd_code;
+            ret.flags = tmp0.flags;
+            ret.count = be16_to_cpu(tmp0.count);
         }
+        ret.cda = be16_to_cpu(tmp0.cda1) | (tmp0.cda0 << 16);
     }
     return ret;
 }
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 2e2664f22e..7978c7d52a 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -20,6 +20,7 @@
 #include "hw/s390x/virtio-ccw.h"
 #include "hw/s390x/css.h"
 #include "ipl.h"
+#include "qemu/error-report.h"
 
 #define KERN_IMAGE_START                0x010000UL
 #define KERN_PARM_AREA                  0x010480UL
@@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = {
     DEFINE_PROP_STRING("initrd", S390IPLState, initrd),
     DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline),
     DEFINE_PROP_STRING("firmware", S390IPLState, firmware),
+    DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw),
     DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false),
     DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration,
                      true),
@@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
                 TYPE_VIRTIO_CCW_DEVICE);
         SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st),
                                                             TYPE_SCSI_DEVICE);
+        VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st),
+                                                          TYPE_VIRTIO_NET);
+
+        if (vn) {
+            ipl->netboot = true;
+        }
         if (virtio_ccw_dev) {
             CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev);
 
@@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
     return false;
 }
 
+static int load_netboot_image(Error **errp)
+{
+    S390IPLState *ipl = get_ipl_device();
+    char *netboot_filename;
+    MemoryRegion *sysmem =  get_system_memory();
+    MemoryRegion *mr = NULL;
+    void *ram_ptr = NULL;
+    int img_size = -1;
+
+    mr = memory_region_find(sysmem, 0, 1).mr;
+    if (!mr) {
+        error_setg(errp, "Failed to find memory region at address 0");
+        return -1;
+    }
+
+    ram_ptr = memory_region_get_ram_ptr(mr);
+    if (!ram_ptr) {
+        error_setg(errp, "No RAM found");
+        goto unref_mr;
+    }
+
+    netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw);
+    if (netboot_filename == NULL) {
+        error_setg(errp, "Could not find network bootloader");
+        goto unref_mr;
+    }
+
+    img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr,
+                            NULL, NULL, 1, EM_S390, 0, 0, NULL, false);
+
+    if (img_size < 0) {
+        img_size = load_image_size(netboot_filename, ram_ptr, ram_size);
+        ipl->start_addr = KERN_IMAGE_START;
+    }
+
+    if (img_size < 0) {
+        error_setg(errp, "Failed to load network bootloader");
+    }
+
+    g_free(netboot_filename);
+
+unref_mr:
+    memory_region_unref(mr);
+    return img_size;
+}
+
+static bool is_virtio_net_device(IplParameterBlock *iplb)
+{
+    uint8_t cssid;
+    uint8_t ssid;
+    uint16_t devno;
+    uint16_t schid;
+    SubchDev *sch = NULL;
+
+    if (iplb->pbt != S390_IPL_TYPE_CCW) {
+        return false;
+    }
+
+    devno = be16_to_cpu(iplb->ccw.devno);
+    ssid = iplb->ccw.ssid & 3;
+
+    for (schid = 0; schid < MAX_SCHID; schid++) {
+        for (cssid = 0; cssid < MAX_CSSID; cssid++) {
+            sch = css_find_subch(1, cssid, ssid, schid);
+
+            if (sch && sch->devno == devno) {
+                return sch->id.cu_model == VIRTIO_ID_NET;
+            }
+        }
+    }
+    return false;
+}
+
 void s390_ipl_update_diag308(IplParameterBlock *iplb)
 {
     S390IPLState *ipl = get_ipl_device();
 
     ipl->iplb = *iplb;
     ipl->iplb_valid = true;
+    ipl->netboot = is_virtio_net_device(iplb);
 }
 
 IplParameterBlock *s390_ipl_get_iplb(void)
@@ -287,6 +369,7 @@ void s390_reipl_request(void)
 void s390_ipl_prepare_cpu(S390CPU *cpu)
 {
     S390IPLState *ipl = get_ipl_device();
+    Error *err = NULL;
 
     cpu->env.psw.addr = ipl->start_addr;
     cpu->env.psw.mask = IPL_PSW_MASK;
@@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu)
             ipl->iplb_valid = s390_gen_initial_iplb(ipl);
         }
     }
+    if (ipl->netboot) {
+        if (load_netboot_image(&err) < 0) {
+            error_report_err(err);
+            vm_stop(RUN_STATE_INTERNAL_ERROR);
+        }
+        ipl->iplb.ccw.netboot_start_addr = ipl->start_addr;
+    }
 }
 
 static void s390_ipl_reset(DeviceState *dev)
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index c89109585a..46930e4c64 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -16,7 +16,8 @@
 #include "cpu.h"
 
 struct IplBlockCcw {
-    uint8_t  reserved0[85];
+    uint64_t netboot_start_addr;
+    uint8_t  reserved0[77];
     uint8_t  ssid;
     uint16_t devno;
     uint8_t  vm_flags;
@@ -100,12 +101,14 @@ struct S390IPLState {
     IplParameterBlock iplb;
     bool iplb_valid;
     bool reipl_requested;
+    bool netboot;
 
     /*< public >*/
     char *kernel;
     char *initrd;
     char *cmdline;
     char *firmware;
+    char *netboot_fw;
     uint8_t cssid;
     uint8_t ssid;
     uint16_t devno;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index e9a676797a..40914fde6f 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -63,7 +63,7 @@ static int virtio_ccw_hcall_notify(const uint64_t *args)
     if (!sch || !css_subch_visible(sch)) {
         return -EINVAL;
     }
-    if (queue >= VIRTIO_CCW_QUEUE_MAX) {
+    if (queue >= VIRTIO_QUEUE_MAX) {
         return -EINVAL;
     }
     virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
@@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine)
     /* get a BUS */
     css_bus = virtual_css_bus_init();
     s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline,
-                      machine->initrd_filename, "s390-ccw.img", true);
+                      machine->initrd_filename, "s390-ccw.img",
+                      "s390-netboot.img", true);
     s390_flic_init();
 
     dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
@@ -336,7 +337,12 @@ static const TypeInfo ccw_machine_info = {
     type_init(ccw_machine_register_##suffix)
 
 #define CCW_COMPAT_2_8 \
-        HW_COMPAT_2_8
+        HW_COMPAT_2_8 \
+        {\
+            .driver   = TYPE_S390_FLIC_COMMON,\
+            .property = "adapter_routes_max_batch",\
+            .value    = "64",\
+        },
 
 #define CCW_COMPAT_2_7 \
         HW_COMPAT_2_7
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
index 7a3a7fe5fd..afa4148e6b 100644
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -44,16 +44,6 @@
 #include "hw/s390x/ipl.h"
 #include "cpu.h"
 
-//#define DEBUG_S390
-
-#ifdef DEBUG_S390
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
 #define MAX_BLK_DEVS                    10
 
 #define S390_TOD_CLOCK_VALUE_MISSING    0x00
@@ -75,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
                        const char *kernel_cmdline,
                        const char *initrd_filename,
                        const char *firmware,
+                       const char *netboot_fw,
                        bool enforce_bios)
 {
     Object *new = object_new(TYPE_S390_IPL);
@@ -88,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
     }
     qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
     qdev_prop_set_string(dev, "firmware", firmware);
+    qdev_prop_set_string(dev, "netboot_fw", netboot_fw);
     qdev_prop_set_bit(dev, "enforce_bios", enforce_bios);
     object_property_add_child(qdev_get_machine(), TYPE_S390_IPL,
                               new, NULL);
diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h
index f588b80a6e..f2377a3e0e 100644
--- a/hw/s390x/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
                        const char *kernel_cmdline,
                        const char *initrd_filename,
                        const char *firmware,
+                       const char *netboot_fw,
                        bool enforce_bios);
 void s390_create_virtio_net(BusState *bus, const char *name);
 void s390_nmi(NMIState *n, int cpu_index, Error **errp);
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 63c46373fb..00b3bde4e9 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -35,6 +35,8 @@
 #include "trace.h"
 #include "hw/s390x/css-bridge.h"
 
+#define NR_CLASSIC_INDICATOR_BITS 64
+
 static void virtio_ccw_bus_new(VirtioBusState *bus, size_t bus_size,
                                VirtioCcwDevice *dev);
 
@@ -126,7 +128,7 @@ static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info,
     uint16_t num = info ? info->num : linfo->num;
     uint64_t desc = info ? info->desc : linfo->queue;
 
-    if (index >= VIRTIO_CCW_QUEUE_MAX) {
+    if (index >= VIRTIO_QUEUE_MAX) {
         return -EINVAL;
     }
 
@@ -162,7 +164,7 @@ static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info,
         virtio_queue_set_vector(vdev, index, index);
     }
     /* tell notify handler in case of config change */
-    vdev->config_vector = VIRTIO_CCW_QUEUE_MAX;
+    vdev->config_vector = VIRTIO_QUEUE_MAX;
     return 0;
 }
 
@@ -280,6 +282,15 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
                                    ccw.cmd_code);
     check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
 
+    if (dev->force_revision_1 && dev->revision < 0 &&
+        ccw.cmd_code != CCW_CMD_SET_VIRTIO_REV) {
+        /*
+         * virtio-1 drivers must start with negotiating to a revision >= 1,
+         * so post a command reject for all other commands
+         */
+        return -ENOSYS;
+    }
+
     /* Look at the command. */
     switch (ccw.cmd_code) {
     case CCW_CMD_SET_VQ:
@@ -500,6 +511,11 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
             ret = -ENOSYS;
             break;
         }
+        if (virtio_get_num_queues(vdev) > NR_CLASSIC_INDICATOR_BITS) {
+            /* More queues than indicator bits --> trigger a reject */
+            ret = -ENOSYS;
+            break;
+        }
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
@@ -549,7 +565,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
                                                     ccw.cda,
                                                     MEMTXATTRS_UNSPECIFIED,
                                                     NULL);
-            if (vq_config.index >= VIRTIO_CCW_QUEUE_MAX) {
+            if (vq_config.index >= VIRTIO_QUEUE_MAX) {
                 ret = -EINVAL;
                 break;
             }
@@ -638,7 +654,8 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
          * need to fetch it here. Nothing to do for now, though.
          */
         if (dev->revision >= 0 ||
-            revinfo.revision > virtio_ccw_rev_max(dev)) {
+            revinfo.revision > virtio_ccw_rev_max(dev) ||
+            (dev->force_revision_1 && !revinfo.revision)) {
             ret = -ENOSYS;
             break;
         }
@@ -669,6 +686,12 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
     if (!sch) {
         return;
     }
+    if (!virtio_ccw_rev_max(dev) && dev->force_revision_1) {
+        error_setg(&err, "Invalid value of property max_rev "
+                   "(is %d expected >= 1)", virtio_ccw_rev_max(dev));
+        error_propagate(errp, err);
+        return;
+    }
 
     sch->driver_data = dev;
     sch->ccw_cb = virtio_ccw_cb;
@@ -878,6 +901,24 @@ static void virtio_ccw_rng_realize(VirtioCcwDevice *ccw_dev, Error **errp)
                              NULL);
 }
 
+static void virtio_ccw_crypto_realize(VirtioCcwDevice *ccw_dev, Error **errp)
+{
+    VirtIOCryptoCcw *dev = VIRTIO_CRYPTO_CCW(ccw_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    Error *err = NULL;
+
+    qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus));
+    object_property_set_bool(OBJECT(vdev), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    object_property_set_link(OBJECT(vdev),
+                             OBJECT(dev->vdev.conf.cryptodev), "cryptodev",
+                             NULL);
+}
+
 /* DeviceState to VirtioCcwDevice. Note: used on datapath,
  * be careful and test performance if you change this.
  */
@@ -919,11 +960,11 @@ static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
     uint64_t indicators;
 
     /* queue indicators + secondary indicators */
-    if (vector >= VIRTIO_CCW_QUEUE_MAX + 64) {
+    if (vector >= VIRTIO_QUEUE_MAX + 64) {
         return;
     }
 
-    if (vector < VIRTIO_CCW_QUEUE_MAX) {
+    if (vector < VIRTIO_QUEUE_MAX) {
         if (!dev->indicators) {
             return;
         }
@@ -1278,15 +1319,22 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp)
     CcwDevice *ccw_dev = CCW_DEVICE(d);
     SubchDev *sch = ccw_dev->sch;
     int n = virtio_get_num_queues(vdev);
+    S390FLICState *flic = s390_get_flic();
 
     if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
         dev->max_rev = 0;
     }
 
-    if (virtio_get_num_queues(vdev) > VIRTIO_CCW_QUEUE_MAX) {
+    if (virtio_get_num_queues(vdev) > VIRTIO_QUEUE_MAX) {
+        error_setg(errp, "The number of virtqueues %d "
+                   "exceeds virtio limit %d", n,
+                   VIRTIO_QUEUE_MAX);
+        return;
+    }
+    if (virtio_get_num_queues(vdev) > flic->adapter_routes_max_batch) {
         error_setg(errp, "The number of virtqueues %d "
-                   "exceeds ccw limit %d", n,
-                   VIRTIO_CCW_QUEUE_MAX);
+                   "exceeds flic adapter route limit %d", n,
+                   flic->adapter_routes_max_batch);
         return;
     }
 
@@ -1518,6 +1566,48 @@ static const TypeInfo virtio_ccw_rng = {
     .class_init    = virtio_ccw_rng_class_init,
 };
 
+static Property virtio_ccw_crypto_properties[] = {
+    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
+    DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
+                    VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
+                       VIRTIO_CCW_MAX_REV),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_crypto_instance_init(Object *obj)
+{
+    VirtIOCryptoCcw *dev = VIRTIO_CRYPTO_CCW(obj);
+    VirtioCcwDevice *ccw_dev = VIRTIO_CCW_DEVICE(obj);
+
+    ccw_dev->force_revision_1 = true;
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_CRYPTO);
+
+    object_property_add_alias(obj, "cryptodev", OBJECT(&dev->vdev),
+                              "cryptodev", &error_abort);
+}
+
+static void virtio_ccw_crypto_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->realize = virtio_ccw_crypto_realize;
+    k->exit = virtio_ccw_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_crypto_properties;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo virtio_ccw_crypto = {
+    .name          = TYPE_VIRTIO_CRYPTO_CCW,
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtIOCryptoCcw),
+    .instance_init = virtio_ccw_crypto_instance_init,
+    .class_init    = virtio_ccw_crypto_class_init,
+};
+
 static void virtio_ccw_busdev_realize(DeviceState *dev, Error **errp)
 {
     VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
@@ -1720,6 +1810,7 @@ static void virtio_ccw_register(void)
 #ifdef CONFIG_VHOST_VSOCK
     type_register_static(&vhost_vsock_ccw_info);
 #endif
+    type_register_static(&virtio_ccw_crypto);
 }
 
 type_init(virtio_ccw_register)
diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
index 77d10f1671..41d4010378 100644
--- a/hw/s390x/virtio-ccw.h
+++ b/hw/s390x/virtio-ccw.h
@@ -22,6 +22,7 @@
 #endif
 #include "hw/virtio/virtio-balloon.h"
 #include "hw/virtio/virtio-rng.h"
+#include "hw/virtio/virtio-crypto.h"
 #include "hw/virtio/virtio-bus.h"
 #ifdef CONFIG_VHOST_VSOCK
 #include "hw/virtio/vhost-vsock.h"
@@ -94,6 +95,7 @@ struct VirtioCcwDevice {
     IndAddr *indicators2;
     IndAddr *summary_indicator;
     uint64_t ind_bit;
+    bool force_revision_1;
 };
 
 /* The maximum virtio revision we support. */
@@ -182,6 +184,17 @@ typedef struct VirtIORNGCcw {
     VirtIORNG vdev;
 } VirtIORNGCcw;
 
+/* virtio-crypto-ccw */
+
+#define TYPE_VIRTIO_CRYPTO_CCW "virtio-crypto-ccw"
+#define VIRTIO_CRYPTO_CCW(obj) \
+        OBJECT_CHECK(VirtIOCryptoCcw, (obj), TYPE_VIRTIO_CRYPTO_CCW)
+
+typedef struct VirtIOCryptoCcw {
+    VirtioCcwDevice parent_obj;
+    VirtIOCrypto vdev;
+} VirtIOCryptoCcw;
+
 VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch);
 
 #ifdef CONFIG_VIRTFS
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index bbfb5dc289..a53f058621 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque)
     }
 }
 
-static void scsi_cd_change_media_cb(void *opaque, bool load)
+static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
 {
     SCSIDiskState *s = opaque;
 
@@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
             return;
         }
     }
-    blkconf_apply_backend_options(&dev->conf);
+    blkconf_apply_backend_options(&dev->conf,
+                                  blk_is_read_only(s->qdev.conf.blk),
+                                  dev->type == TYPE_DISK, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
     if (s->qdev.conf.discard_granularity == -1) {
         s->qdev.conf.discard_granularity =
@@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
 
     if (!dev->conf.blk) {
-        dev->conf.blk = blk_new();
+        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
     }
 
     s->qdev.blocksize = 2048;
diff --git a/hw/sd/Makefile.objs b/hw/sd/Makefile.objs
index 31c83308f2..c2b7664264 100644
--- a/hw/sd/Makefile.objs
+++ b/hw/sd/Makefile.objs
@@ -6,3 +6,4 @@ common-obj-$(CONFIG_SDHCI) += sdhci.o
 obj-$(CONFIG_MILKYMIST) += milkymist-memcard.o
 obj-$(CONFIG_OMAP) += omap_mmc.o
 obj-$(CONFIG_PXA2XX) += pxa2xx_mmci.o
+obj-$(CONFIG_RASPI) += bcm2835_sdhost.o
diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c
new file mode 100644
index 0000000000..f7f4e656df
--- /dev/null
+++ b/hw/sd/bcm2835_sdhost.c
@@ -0,0 +1,429 @@
+/*
+ * Raspberry Pi (BCM2835) SD Host Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "sysemu/blockdev.h"
+#include "hw/sd/bcm2835_sdhost.h"
+
+#define TYPE_BCM2835_SDHOST_BUS "bcm2835-sdhost-bus"
+#define BCM2835_SDHOST_BUS(obj) \
+    OBJECT_CHECK(SDBus, (obj), TYPE_BCM2835_SDHOST_BUS)
+
+#define SDCMD  0x00 /* Command to SD card              - 16 R/W */
+#define SDARG  0x04 /* Argument to SD card             - 32 R/W */
+#define SDTOUT 0x08 /* Start value for timeout counter - 32 R/W */
+#define SDCDIV 0x0c /* Start value for clock divider   - 11 R/W */
+#define SDRSP0 0x10 /* SD card rsp (31:0)         - 32 R   */
+#define SDRSP1 0x14 /* SD card rsp (63:32)        - 32 R   */
+#define SDRSP2 0x18 /* SD card rsp (95:64)        - 32 R   */
+#define SDRSP3 0x1c /* SD card rsp (127:96)       - 32 R   */
+#define SDHSTS 0x20 /* SD host status                  - 11 R   */
+#define SDVDD  0x30 /* SD card power control           -  1 R/W */
+#define SDEDM  0x34 /* Emergency Debug Mode            - 13 R/W */
+#define SDHCFG 0x38 /* Host configuration              -  2 R/W */
+#define SDHBCT 0x3c /* Host byte count (debug)         - 32 R/W */
+#define SDDATA 0x40 /* Data to/from SD card            - 32 R/W */
+#define SDHBLC 0x50 /* Host block count (SDIO/SDHC)    -  9 R/W */
+
+#define SDCMD_NEW_FLAG                  0x8000
+#define SDCMD_FAIL_FLAG                 0x4000
+#define SDCMD_BUSYWAIT                  0x800
+#define SDCMD_NO_RESPONSE               0x400
+#define SDCMD_LONG_RESPONSE             0x200
+#define SDCMD_WRITE_CMD                 0x80
+#define SDCMD_READ_CMD                  0x40
+#define SDCMD_CMD_MASK                  0x3f
+
+#define SDCDIV_MAX_CDIV                 0x7ff
+
+#define SDHSTS_BUSY_IRPT                0x400
+#define SDHSTS_BLOCK_IRPT               0x200
+#define SDHSTS_SDIO_IRPT                0x100
+#define SDHSTS_REW_TIME_OUT             0x80
+#define SDHSTS_CMD_TIME_OUT             0x40
+#define SDHSTS_CRC16_ERROR              0x20
+#define SDHSTS_CRC7_ERROR               0x10
+#define SDHSTS_FIFO_ERROR               0x08
+/* Reserved */
+/* Reserved */
+#define SDHSTS_DATA_FLAG                0x01
+
+#define SDHCFG_BUSY_IRPT_EN     (1 << 10)
+#define SDHCFG_BLOCK_IRPT_EN    (1 << 8)
+#define SDHCFG_SDIO_IRPT_EN     (1 << 5)
+#define SDHCFG_DATA_IRPT_EN     (1 << 4)
+#define SDHCFG_SLOW_CARD        (1 << 3)
+#define SDHCFG_WIDE_EXT_BUS     (1 << 2)
+#define SDHCFG_WIDE_INT_BUS     (1 << 1)
+#define SDHCFG_REL_CMD_LINE     (1 << 0)
+
+#define SDEDM_FORCE_DATA_MODE   (1 << 19)
+#define SDEDM_CLOCK_PULSE       (1 << 20)
+#define SDEDM_BYPASS            (1 << 21)
+
+#define SDEDM_WRITE_THRESHOLD_SHIFT 9
+#define SDEDM_READ_THRESHOLD_SHIFT 14
+#define SDEDM_THRESHOLD_MASK     0x1f
+
+#define SDEDM_FSM_MASK           0xf
+#define SDEDM_FSM_IDENTMODE      0x0
+#define SDEDM_FSM_DATAMODE       0x1
+#define SDEDM_FSM_READDATA       0x2
+#define SDEDM_FSM_WRITEDATA      0x3
+#define SDEDM_FSM_READWAIT       0x4
+#define SDEDM_FSM_READCRC        0x5
+#define SDEDM_FSM_WRITECRC       0x6
+#define SDEDM_FSM_WRITEWAIT1     0x7
+#define SDEDM_FSM_POWERDOWN      0x8
+#define SDEDM_FSM_POWERUP        0x9
+#define SDEDM_FSM_WRITESTART1    0xa
+#define SDEDM_FSM_WRITESTART2    0xb
+#define SDEDM_FSM_GENPULSES      0xc
+#define SDEDM_FSM_WRITEWAIT2     0xd
+#define SDEDM_FSM_STARTPOWDOWN   0xf
+
+#define SDDATA_FIFO_WORDS        16
+
+static void bcm2835_sdhost_update_irq(BCM2835SDHostState *s)
+{
+    uint32_t irq = s->status &
+        (SDHSTS_BUSY_IRPT | SDHSTS_BLOCK_IRPT | SDHSTS_SDIO_IRPT);
+    qemu_set_irq(s->irq, !!irq);
+}
+
+static void bcm2835_sdhost_send_command(BCM2835SDHostState *s)
+{
+    SDRequest request;
+    uint8_t rsp[16];
+    int rlen;
+
+    request.cmd = s->cmd & SDCMD_CMD_MASK;
+    request.arg = s->cmdarg;
+
+    rlen = sdbus_do_command(&s->sdbus, &request, rsp);
+    if (rlen < 0) {
+        goto error;
+    }
+    if (!(s->cmd & SDCMD_NO_RESPONSE)) {
+#define RWORD(n) (((uint32_t)rsp[n] << 24) | (rsp[n + 1] << 16) \
+                  | (rsp[n + 2] << 8) | rsp[n + 3])
+        if (rlen == 0 || (rlen == 4 && (s->cmd & SDCMD_LONG_RESPONSE))) {
+            goto error;
+        }
+        if (rlen != 4 && rlen != 16) {
+            goto error;
+        }
+        if (rlen == 4) {
+            s->rsp[0] = RWORD(0);
+            s->rsp[1] = s->rsp[2] = s->rsp[3] = 0;
+        } else {
+            s->rsp[0] = RWORD(12);
+            s->rsp[1] = RWORD(8);
+            s->rsp[2] = RWORD(4);
+            s->rsp[3] = RWORD(0);
+        }
+#undef RWORD
+    }
+    return;
+
+error:
+    s->cmd |= SDCMD_FAIL_FLAG;
+    s->status |= SDHSTS_CMD_TIME_OUT;
+}
+
+static void bcm2835_sdhost_fifo_push(BCM2835SDHostState *s, uint32_t value)
+{
+    int n;
+
+    if (s->fifo_len == BCM2835_SDHOST_FIFO_LEN) {
+        /* FIFO overflow */
+        return;
+    }
+    n = (s->fifo_pos + s->fifo_len) & (BCM2835_SDHOST_FIFO_LEN - 1);
+    s->fifo_len++;
+    s->fifo[n] = value;
+}
+
+static uint32_t bcm2835_sdhost_fifo_pop(BCM2835SDHostState *s)
+{
+    uint32_t value;
+
+    if (s->fifo_len == 0) {
+        /* FIFO underflow */
+        return 0;
+    }
+    value = s->fifo[s->fifo_pos];
+    s->fifo_len--;
+    s->fifo_pos = (s->fifo_pos + 1) & (BCM2835_SDHOST_FIFO_LEN - 1);
+    return value;
+}
+
+static void bcm2835_sdhost_fifo_run(BCM2835SDHostState *s)
+{
+    uint32_t value = 0;
+    int n;
+    int is_read;
+
+    is_read = (s->cmd & SDCMD_READ_CMD) != 0;
+    if (s->datacnt != 0 && (!is_read || sdbus_data_ready(&s->sdbus))) {
+        if (is_read) {
+            n = 0;
+            while (s->datacnt && s->fifo_len < BCM2835_SDHOST_FIFO_LEN) {
+                value |= (uint32_t)sdbus_read_data(&s->sdbus) << (n * 8);
+                s->datacnt--;
+                n++;
+                if (n == 4) {
+                    bcm2835_sdhost_fifo_push(s, value);
+                    n = 0;
+                    value = 0;
+                }
+            }
+            if (n != 0) {
+                bcm2835_sdhost_fifo_push(s, value);
+            }
+        } else { /* write */
+            n = 0;
+            while (s->datacnt > 0 && (s->fifo_len > 0 || n > 0)) {
+                if (n == 0) {
+                    value = bcm2835_sdhost_fifo_pop(s);
+                    n = 4;
+                }
+                n--;
+                s->datacnt--;
+                sdbus_write_data(&s->sdbus, value & 0xff);
+                value >>= 8;
+            }
+        }
+    }
+    if (s->datacnt == 0) {
+        s->status |= SDHSTS_DATA_FLAG;
+
+        s->edm &= ~0xf;
+        s->edm |= SDEDM_FSM_DATAMODE;
+
+        if (s->config & SDHCFG_DATA_IRPT_EN) {
+            s->status |= SDHSTS_SDIO_IRPT;
+        }
+
+        if ((s->cmd & SDCMD_BUSYWAIT) && (s->config & SDHCFG_BUSY_IRPT_EN)) {
+            s->status |= SDHSTS_BUSY_IRPT;
+        }
+
+        if ((s->cmd & SDCMD_WRITE_CMD) && (s->config & SDHCFG_BLOCK_IRPT_EN)) {
+            s->status |= SDHSTS_BLOCK_IRPT;
+        }
+
+        bcm2835_sdhost_update_irq(s);
+    }
+
+    s->edm &= ~(0x1f << 4);
+    s->edm |= ((s->fifo_len & 0x1f) << 4);
+}
+
+static uint64_t bcm2835_sdhost_read(void *opaque, hwaddr offset,
+    unsigned size)
+{
+    BCM2835SDHostState *s = (BCM2835SDHostState *)opaque;
+    uint32_t res = 0;
+
+    switch (offset) {
+    case SDCMD:
+        res = s->cmd;
+        break;
+    case SDHSTS:
+        res = s->status;
+        break;
+    case SDRSP0:
+        res = s->rsp[0];
+        break;
+    case SDRSP1:
+        res = s->rsp[1];
+        break;
+    case SDRSP2:
+        res = s->rsp[2];
+        break;
+    case SDRSP3:
+        res = s->rsp[3];
+        break;
+    case SDEDM:
+        res = s->edm;
+        break;
+    case SDVDD:
+        res = s->vdd;
+        break;
+    case SDDATA:
+        res = bcm2835_sdhost_fifo_pop(s);
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDHBCT:
+        res = s->hbct;
+        break;
+    case SDHBLC:
+        res = s->hblc;
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                      __func__, offset);
+        res = 0;
+        break;
+    }
+
+    return res;
+}
+
+static void bcm2835_sdhost_write(void *opaque, hwaddr offset,
+    uint64_t value, unsigned size)
+{
+    BCM2835SDHostState *s = (BCM2835SDHostState *)opaque;
+
+    switch (offset) {
+    case SDCMD:
+        s->cmd = value;
+        if (value & SDCMD_NEW_FLAG) {
+            bcm2835_sdhost_send_command(s);
+            bcm2835_sdhost_fifo_run(s);
+            s->cmd &= ~SDCMD_NEW_FLAG;
+        }
+        break;
+    case SDTOUT:
+        break;
+    case SDCDIV:
+        break;
+    case SDHSTS:
+        s->status &= ~value;
+        bcm2835_sdhost_update_irq(s);
+        break;
+    case SDARG:
+        s->cmdarg = value;
+        break;
+    case SDEDM:
+        if ((value & 0xf) == 0xf) {
+            /* power down */
+            value &= ~0xf;
+        }
+        s->edm = value;
+        break;
+    case SDHCFG:
+        s->config = value;
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDVDD:
+        s->vdd = value;
+        break;
+    case SDDATA:
+        bcm2835_sdhost_fifo_push(s, value);
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDHBCT:
+        s->hbct = value;
+        break;
+    case SDHBLC:
+        s->hblc = value;
+        s->datacnt = s->hblc * s->hbct;
+        bcm2835_sdhost_fifo_run(s);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                      __func__, offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps bcm2835_sdhost_ops = {
+    .read = bcm2835_sdhost_read,
+    .write = bcm2835_sdhost_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_sdhost = {
+    .name = TYPE_BCM2835_SDHOST,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(cmd, BCM2835SDHostState),
+        VMSTATE_UINT32(cmdarg, BCM2835SDHostState),
+        VMSTATE_UINT32(status, BCM2835SDHostState),
+        VMSTATE_UINT32_ARRAY(rsp, BCM2835SDHostState, 4),
+        VMSTATE_UINT32(config, BCM2835SDHostState),
+        VMSTATE_UINT32(edm, BCM2835SDHostState),
+        VMSTATE_UINT32(vdd, BCM2835SDHostState),
+        VMSTATE_UINT32(hbct, BCM2835SDHostState),
+        VMSTATE_UINT32(hblc, BCM2835SDHostState),
+        VMSTATE_INT32(fifo_pos, BCM2835SDHostState),
+        VMSTATE_INT32(fifo_len, BCM2835SDHostState),
+        VMSTATE_UINT32_ARRAY(fifo, BCM2835SDHostState, BCM2835_SDHOST_FIFO_LEN),
+        VMSTATE_UINT32(datacnt, BCM2835SDHostState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_sdhost_init(Object *obj)
+{
+    BCM2835SDHostState *s = BCM2835_SDHOST(obj);
+
+    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
+                        TYPE_BCM2835_SDHOST_BUS, DEVICE(s), "sd-bus");
+
+    memory_region_init_io(&s->iomem, obj, &bcm2835_sdhost_ops, s,
+                          TYPE_BCM2835_SDHOST, 0x1000);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+    sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq);
+}
+
+static void bcm2835_sdhost_reset(DeviceState *dev)
+{
+    BCM2835SDHostState *s = BCM2835_SDHOST(dev);
+
+    s->cmd = 0;
+    s->cmdarg = 0;
+    s->edm = 0x0000c60f;
+    s->config = 0;
+    s->hbct = 0;
+    s->hblc = 0;
+    s->datacnt = 0;
+    s->fifo_pos = 0;
+    s->fifo_len = 0;
+}
+
+static void bcm2835_sdhost_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = bcm2835_sdhost_reset;
+    dc->vmsd = &vmstate_bcm2835_sdhost;
+}
+
+static TypeInfo bcm2835_sdhost_info = {
+    .name          = TYPE_BCM2835_SDHOST,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835SDHostState),
+    .class_init    = bcm2835_sdhost_class_init,
+    .instance_init = bcm2835_sdhost_init,
+};
+
+static const TypeInfo bcm2835_sdhost_bus_info = {
+    .name = TYPE_BCM2835_SDHOST_BUS,
+    .parent = TYPE_SD_BUS,
+    .instance_size = sizeof(SDBus),
+};
+
+static void bcm2835_sdhost_register_types(void)
+{
+    type_register_static(&bcm2835_sdhost_info);
+    type_register_static(&bcm2835_sdhost_bus_info);
+}
+
+type_init(bcm2835_sdhost_register_types)
diff --git a/hw/sd/core.c b/hw/sd/core.c
index 14c2bdf27b..295dc44ab7 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -131,6 +131,33 @@ void sdbus_set_readonly(SDBus *sdbus, bool readonly)
     }
 }
 
+void sdbus_reparent_card(SDBus *from, SDBus *to)
+{
+    SDState *card = get_card(from);
+    SDCardClass *sc;
+    bool readonly;
+
+    /* We directly reparent the card object rather than implementing this
+     * as a hotpluggable connection because we don't want to expose SD cards
+     * to users as being hotpluggable, and we can get away with it in this
+     * limited use case. This could perhaps be implemented more cleanly in
+     * future by adding support to the hotplug infrastructure for "device
+     * can be hotplugged only via code, not by user".
+     */
+
+    if (!card) {
+        return;
+    }
+
+    sc = SD_CARD_GET_CLASS(card);
+    readonly = sc->get_readonly(card);
+
+    sdbus_set_inserted(from, false);
+    qdev_set_parent_bus(DEVICE(card), &to->qbus);
+    sdbus_set_inserted(to, true);
+    sdbus_set_readonly(to, readonly);
+}
+
 static const TypeInfo sd_bus_info = {
     .name = TYPE_SD_BUS,
     .parent = TYPE_BUS,
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 8e88e8311a..ba47bff4db 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd)
     return sd->wp_switch;
 }
 
-static void sd_cardchange(void *opaque, bool load)
+static void sd_cardchange(void *opaque, bool load, Error **errp)
 {
     SDState *sd = opaque;
     DeviceState *dev = DEVICE(sd);
@@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj)
 static void sd_realize(DeviceState *dev, Error **errp)
 {
     SDState *sd = SD_CARD(dev);
+    int ret;
 
     if (sd->blk && blk_is_read_only(sd->blk)) {
         error_setg(errp, "Cannot use read-only drive as SD card");
@@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp)
     }
 
     if (sd->blk) {
+        ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
         blk_set_dev_ops(sd->blk, &sd_block_ops, sd);
     }
 }
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index da32b5f709..6d6a791ee9 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -119,6 +119,7 @@
     (SDHC_CAPAB_BASECLKFREQ << 8) | (SDHC_CAPAB_TOUNIT << 7) | \
     (SDHC_CAPAB_TOCLKFREQ))
 
+#define MASK_TRNMOD     0x0037
 #define MASKED_WRITE(reg, mask, val)  (reg = (reg & (mask)) | (val))
 
 static uint8_t sdhci_slotint(SDHCIState *s)
@@ -486,6 +487,11 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
     uint32_t boundary_chk = 1 << (((s->blksize & 0xf000) >> 12) + 12);
     uint32_t boundary_count = boundary_chk - (s->sdmasysad % boundary_chk);
 
+    if (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || !s->blkcnt) {
+        qemu_log_mask(LOG_UNIMP, "infinite transfer is not supported\n");
+        return;
+    }
+
     /* XXX: Some sd/mmc drivers (for example, u-boot-slp) do not account for
      * possible stop at page boundary if initial address is not page aligned,
      * allow them to work properly */
@@ -564,7 +570,6 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
 }
 
 /* single block SDMA transfer */
-
 static void sdhci_sdma_transfer_single_block(SDHCIState *s)
 {
     int n;
@@ -583,10 +588,7 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s)
             sdbus_write_data(&s->sdbus, s->fifo_buffer[n]);
         }
     }
-
-    if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
-        s->blkcnt--;
-    }
+    s->blkcnt--;
 
     sdhci_end_transfer(s);
 }
@@ -797,11 +799,6 @@ static void sdhci_data_transfer(void *opaque)
     if (s->trnmod & SDHC_TRNS_DMA) {
         switch (SDHC_DMA_TYPE(s->hostctl)) {
         case SDHC_CTRL_SDMA:
-            if ((s->trnmod & SDHC_TRNS_MULTI) &&
-                    (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || s->blkcnt == 0)) {
-                break;
-            }
-
             if ((s->blkcnt == 1) || !(s->trnmod & SDHC_TRNS_MULTI)) {
                 sdhci_sdma_transfer_single_block(s);
             } else {
@@ -1022,7 +1019,11 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
         /* Writing to last byte of sdmasysad might trigger transfer */
         if (!(mask & 0xFF000000) && TRANSFERRING_DATA(s->prnsts) && s->blkcnt &&
                 s->blksize && SDHC_DMA_TYPE(s->hostctl) == SDHC_CTRL_SDMA) {
-            sdhci_sdma_transfer_multi_blocks(s);
+            if (s->trnmod & SDHC_TRNS_MULTI) {
+                sdhci_sdma_transfer_multi_blocks(s);
+            } else {
+                sdhci_sdma_transfer_single_block(s);
+            }
         }
         break;
     case SDHC_BLKSIZE:
@@ -1050,7 +1051,7 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
         if (!(s->capareg & SDHC_CAN_DO_DMA)) {
             value &= ~SDHC_TRNS_DMA;
         }
-        MASKED_WRITE(s->trnmod, mask, value);
+        MASKED_WRITE(s->trnmod, mask, value & MASK_TRNMOD);
         MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16);
 
         /* Writing to the upper byte of CMDREG triggers SD command generation */
diff --git a/hw/sparc64/niagara.c b/hw/sparc64/niagara.c
index b55d4bb8d3..9a8d6109d4 100644
--- a/hw/sparc64/niagara.c
+++ b/hw/sparc64/niagara.c
@@ -35,6 +35,8 @@
 #include "hw/timer/sun4v-rtc.h"
 #include "exec/address-spaces.h"
 #include "sysemu/block-backend.h"
+#include "qemu/error-report.h"
+#include "sysemu/qtest.h"
 
 
 typedef struct NiagaraBoardState {
@@ -85,6 +87,17 @@ typedef struct NiagaraBoardState {
 #define NIAGARA_OBP_OFFSET  0x80000ULL
 #define PROM_SIZE_MAX       (4 * 1024 * 1024)
 
+static void add_rom_or_fail(const char *file, const hwaddr addr)
+{
+    /* XXX remove qtest_enabled() check once firmware files are
+     * in the qemu tree
+     */
+    if (!qtest_enabled() && rom_add_file_fixed(file, addr, -1)) {
+        error_report("Unable to load a firmware for -M niagara");
+        exit(1);
+    }
+
+}
 /* Niagara hardware initialisation */
 static void niagara_init(MachineState *machine)
 {
@@ -119,14 +132,13 @@ static void niagara_init(MachineState *machine)
                                          "sun4v.prom", PROM_SIZE_MAX);
     memory_region_add_subregion(sysmem, NIAGARA_PROM_BASE, &s->prom);
 
-    rom_add_file_fixed("nvram1", NIAGARA_NVRAM_BASE, -1);
-    rom_add_file_fixed("1up-md.bin", NIAGARA_MD_ROM_BASE, -1);
-    rom_add_file_fixed("1up-hv.bin", NIAGARA_HV_ROM_BASE, -1);
+    add_rom_or_fail("nvram1", NIAGARA_NVRAM_BASE);
+    add_rom_or_fail("1up-md.bin", NIAGARA_MD_ROM_BASE);
+    add_rom_or_fail("1up-hv.bin", NIAGARA_HV_ROM_BASE);
 
-    rom_add_file_fixed("reset.bin", NIAGARA_PROM_BASE, -1);
-    rom_add_file_fixed("q.bin", NIAGARA_PROM_BASE + NIAGARA_Q_OFFSET, -1);
-    rom_add_file_fixed("openboot.bin", NIAGARA_PROM_BASE + NIAGARA_OBP_OFFSET,
-                       -1);
+    add_rom_or_fail("reset.bin", NIAGARA_PROM_BASE);
+    add_rom_or_fail("q.bin", NIAGARA_PROM_BASE + NIAGARA_Q_OFFSET);
+    add_rom_or_fail("openboot.bin", NIAGARA_PROM_BASE + NIAGARA_OBP_OFFSET);
 
     /* the virtual ramdisk is kind of initrd, but it resides
        outside of the partition RAM */
@@ -146,9 +158,10 @@ static void niagara_init(MachineState *machine)
             exit(1);
         }
     }
-    serial_mm_init(sysmem, NIAGARA_UART_BASE, 0, NULL, 115200,
-                   serial_hds[0], DEVICE_BIG_ENDIAN);
-
+    if (serial_hds[0]) {
+        serial_mm_init(sysmem, NIAGARA_UART_BASE, 0, NULL, 115200,
+                       serial_hds[0], DEVICE_BIG_ENDIAN);
+    }
     empty_slot_init(NIAGARA_IOBBASE, NIAGARA_IOBSIZE);
     sun4v_rtc_init(NIAGARA_RTC_BASE);
 }
diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
index fc9966880f..dd6f27e2a3 100644
--- a/hw/timer/Makefile.objs
+++ b/hw/timer/Makefile.objs
@@ -1,5 +1,6 @@
 common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
 common-obj-$(CONFIG_ARM_MPTIMER) += arm_mptimer.o
+common-obj-$(CONFIG_ARM_V7M) += armv7m_systick.o
 common-obj-$(CONFIG_A9_GTIMER) += a9gtimer.o
 common-obj-$(CONFIG_CADENCE) += cadence_ttc.o
 common-obj-$(CONFIG_DS1338) += ds1338.o
diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c
new file mode 100644
index 0000000000..df8d2804b3
--- /dev/null
+++ b/hw/timer/armv7m_systick.c
@@ -0,0 +1,240 @@
+/*
+ * ARMv7M SysTick timer
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell
+ *
+ * This code is licensed under the GPL (version 2 or later).
+ */
+
+#include "qemu/osdep.h"
+#include "hw/timer/armv7m_systick.h"
+#include "qemu-common.h"
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
+#include "trace.h"
+
+/* qemu timers run at 1GHz.   We want something closer to 1MHz.  */
+#define SYSTICK_SCALE 1000ULL
+
+#define SYSTICK_ENABLE    (1 << 0)
+#define SYSTICK_TICKINT   (1 << 1)
+#define SYSTICK_CLKSOURCE (1 << 2)
+#define SYSTICK_COUNTFLAG (1 << 16)
+
+int system_clock_scale;
+
+/* Conversion factor from qemu timer to SysTick frequencies.  */
+static inline int64_t systick_scale(SysTickState *s)
+{
+    if (s->control & SYSTICK_CLKSOURCE) {
+        return system_clock_scale;
+    } else {
+        return 1000;
+    }
+}
+
+static void systick_reload(SysTickState *s, int reset)
+{
+    /* The Cortex-M3 Devices Generic User Guide says that "When the
+     * ENABLE bit is set to 1, the counter loads the RELOAD value from the
+     * SYST RVR register and then counts down". So, we need to check the
+     * ENABLE bit before reloading the value.
+     */
+    trace_systick_reload();
+
+    if ((s->control & SYSTICK_ENABLE) == 0) {
+        return;
+    }
+
+    if (reset) {
+        s->tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    }
+    s->tick += (s->reload + 1) * systick_scale(s);
+    timer_mod(s->timer, s->tick);
+}
+
+static void systick_timer_tick(void *opaque)
+{
+    SysTickState *s = (SysTickState *)opaque;
+
+    trace_systick_timer_tick();
+
+    s->control |= SYSTICK_COUNTFLAG;
+    if (s->control & SYSTICK_TICKINT) {
+        /* Tell the NVIC to pend the SysTick exception */
+        qemu_irq_pulse(s->irq);
+    }
+    if (s->reload == 0) {
+        s->control &= ~SYSTICK_ENABLE;
+    } else {
+        systick_reload(s, 0);
+    }
+}
+
+static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size)
+{
+    SysTickState *s = opaque;
+    uint32_t val;
+
+    switch (addr) {
+    case 0x0: /* SysTick Control and Status.  */
+        val = s->control;
+        s->control &= ~SYSTICK_COUNTFLAG;
+        break;
+    case 0x4: /* SysTick Reload Value.  */
+        val = s->reload;
+        break;
+    case 0x8: /* SysTick Current Value.  */
+    {
+        int64_t t;
+
+        if ((s->control & SYSTICK_ENABLE) == 0) {
+            val = 0;
+            break;
+        }
+        t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        if (t >= s->tick) {
+            val = 0;
+            break;
+        }
+        val = ((s->tick - (t + 1)) / systick_scale(s)) + 1;
+        /* The interrupt in triggered when the timer reaches zero.
+           However the counter is not reloaded until the next clock
+           tick.  This is a hack to return zero during the first tick.  */
+        if (val > s->reload) {
+            val = 0;
+        }
+        break;
+    }
+    case 0xc: /* SysTick Calibration Value.  */
+        val = 10000;
+        break;
+    default:
+        val = 0;
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SysTick: Bad read offset 0x%" HWADDR_PRIx "\n", addr);
+        break;
+    }
+
+    trace_systick_read(addr, val, size);
+    return val;
+}
+
+static void systick_write(void *opaque, hwaddr addr,
+                          uint64_t value, unsigned size)
+{
+    SysTickState *s = opaque;
+
+    trace_systick_write(addr, value, size);
+
+    switch (addr) {
+    case 0x0: /* SysTick Control and Status.  */
+    {
+        uint32_t oldval = s->control;
+
+        s->control &= 0xfffffff8;
+        s->control |= value & 7;
+        if ((oldval ^ value) & SYSTICK_ENABLE) {
+            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+            if (value & SYSTICK_ENABLE) {
+                if (s->tick) {
+                    s->tick += now;
+                    timer_mod(s->timer, s->tick);
+                } else {
+                    systick_reload(s, 1);
+                }
+            } else {
+                timer_del(s->timer);
+                s->tick -= now;
+                if (s->tick < 0) {
+                    s->tick = 0;
+                }
+            }
+        } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
+            /* This is a hack. Force the timer to be reloaded
+               when the reference clock is changed.  */
+            systick_reload(s, 1);
+        }
+        break;
+    }
+    case 0x4: /* SysTick Reload Value.  */
+        s->reload = value;
+        break;
+    case 0x8: /* SysTick Current Value.  Writes reload the timer.  */
+        systick_reload(s, 1);
+        s->control &= ~SYSTICK_COUNTFLAG;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SysTick: Bad write offset 0x%" HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps systick_ops = {
+    .read = systick_read,
+    .write = systick_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+};
+
+static void systick_reset(DeviceState *dev)
+{
+    SysTickState *s = SYSTICK(dev);
+
+    s->control = 0;
+    s->reload = 0;
+    s->tick = 0;
+    timer_del(s->timer);
+}
+
+static void systick_instance_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    SysTickState *s = SYSTICK(obj);
+
+    memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0);
+    sysbus_init_mmio(sbd, &s->iomem);
+    sysbus_init_irq(sbd, &s->irq);
+    s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
+}
+
+static const VMStateDescription vmstate_systick = {
+    .name = "armv7m_systick",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(control, SysTickState),
+        VMSTATE_UINT32(reload, SysTickState),
+        VMSTATE_INT64(tick, SysTickState),
+        VMSTATE_TIMER_PTR(timer, SysTickState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void systick_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_systick;
+    dc->reset = systick_reset;
+}
+
+static const TypeInfo armv7m_systick_info = {
+    .name = TYPE_SYSTICK,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_init = systick_instance_init,
+    .instance_size = sizeof(SysTickState),
+    .class_init = systick_class_init,
+};
+
+static void armv7m_systick_register_types(void)
+{
+    type_register_static(&armv7m_systick_info);
+}
+
+type_init(armv7m_systick_register_types)
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 010ccbf207..4b9b54bf2e 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -296,18 +296,23 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
     return reg_value;
 }
 
-static void imx_gpt_reset(DeviceState *dev)
-{
-    IMXGPTState *s = IMX_GPT(dev);
 
+static void imx_gpt_reset_common(IMXGPTState *s, bool is_soft_reset)
+{
     /* stop timer */
     ptimer_stop(s->timer);
 
-    /*
-     * Soft reset doesn't touch some bits; hard reset clears them
+    /* Soft reset and hard reset differ only in their handling of the CR
+     * register -- soft reset preserves the values of some bits there.
      */
-    s->cr &= ~(GPT_CR_EN|GPT_CR_ENMOD|GPT_CR_STOPEN|GPT_CR_DOZEN|
-               GPT_CR_WAITEN|GPT_CR_DBGEN);
+    if (is_soft_reset) {
+        /* Clear all CR bits except those that are preserved by soft reset. */
+        s->cr &= GPT_CR_EN | GPT_CR_ENMOD | GPT_CR_STOPEN | GPT_CR_DOZEN |
+            GPT_CR_WAITEN | GPT_CR_DBGEN |
+            (GPT_CR_CLKSRC_MASK << GPT_CR_CLKSRC_SHIFT);
+    } else {
+        s->cr = 0;
+    }
     s->sr = 0;
     s->pr = 0;
     s->ir = 0;
@@ -333,6 +338,18 @@ static void imx_gpt_reset(DeviceState *dev)
     }
 }
 
+static void imx_gpt_soft_reset(DeviceState *dev)
+{
+    IMXGPTState *s = IMX_GPT(dev);
+    imx_gpt_reset_common(s, true);
+}
+
+static void imx_gpt_reset(DeviceState *dev)
+{
+    IMXGPTState *s = IMX_GPT(dev);
+    imx_gpt_reset_common(s, false);
+}
+
 static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
                           unsigned size)
 {
@@ -348,7 +365,7 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
         s->cr = value & ~0x7c14;
         if (s->cr & GPT_CR_SWR) { /* force reset */
             /* handle the reset */
-            imx_gpt_reset(DEVICE(s));
+            imx_gpt_soft_reset(DEVICE(s));
         } else {
             /* set our freq, as the source might have changed */
             imx_gpt_set_freq(s);
diff --git a/hw/timer/mips_gictimer.c b/hw/timer/mips_gictimer.c
index 3698889475..f5c5806724 100644
--- a/hw/timer/mips_gictimer.c
+++ b/hw/timer/mips_gictimer.c
@@ -14,6 +14,11 @@
 
 #define TIMER_PERIOD 10 /* 10 ns period for 100 Mhz frequency */
 
+uint32_t mips_gictimer_get_freq(MIPSGICTimerState *gic)
+{
+    return NANOSECONDS_PER_SECOND / TIMER_PERIOD;
+}
+
 static void gic_vptimer_update(MIPSGICTimerState *gictimer,
                                    uint32_t vp_index, uint64_t now)
 {
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index 3495c41c18..d17cfe6b39 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -49,3 +49,9 @@ aspeed_timer_ctrl_pulse_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d"
 aspeed_timer_set_ctrl2(uint32_t value) "Value: 0x%" PRIx32
 aspeed_timer_set_value(int timer, int reg, uint32_t value) "Timer %d register %d: 0x%" PRIx32
 aspeed_timer_read(uint64_t offset, unsigned size, uint64_t value) "From 0x%" PRIx64 ": of size %u: 0x%" PRIx64
+
+# hw/timer/armv7m_systick.c
+systick_reload(void) "systick reload"
+systick_timer_tick(void) "systick reload"
+systick_read(uint64_t addr, uint32_t value, unsigned size) "systick read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
+systick_write(uint64_t addr, uint32_t value, unsigned size) "systick write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index 1dcc35c8f8..24f1608b4b 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -8,7 +8,6 @@
 #include "monitor/monitor.h"
 #include "trace.h"
 #include "qemu/cutils.h"
-#include "migration/migration.h"
 
 static void usb_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent);
 
@@ -136,11 +135,12 @@ USBDevice *usb_device_find_device(USBDevice *dev, uint8_t addr)
     return NULL;
 }
 
-static void usb_device_handle_destroy(USBDevice *dev)
+static void usb_device_unrealize(USBDevice *dev, Error **errp)
 {
     USBDeviceClass *klass = USB_DEVICE_GET_CLASS(dev);
-    if (klass->handle_destroy) {
-        klass->handle_destroy(dev);
+
+    if (klass->unrealize) {
+        klass->unrealize(dev, errp);
     }
 }
 
@@ -291,7 +291,7 @@ static void usb_qdev_unrealize(DeviceState *qdev, Error **errp)
     if (dev->attached) {
         usb_device_detach(dev);
     }
-    usb_device_handle_destroy(dev);
+    usb_device_unrealize(dev, errp);
     if (dev->port) {
         usb_release_port(dev);
     }
@@ -687,8 +687,6 @@ USBDevice *usbdevice_create(const char *cmdline)
     const char *params;
     int len;
     USBDevice *dev;
-    ObjectClass *klass;
-    DeviceClass *dc;
 
     params = strchr(cmdline,':');
     if (params) {
@@ -723,22 +721,6 @@ USBDevice *usbdevice_create(const char *cmdline)
         return NULL;
     }
 
-    klass = object_class_by_name(f->name);
-    if (klass == NULL) {
-        error_report("Device '%s' not found", f->name);
-        return NULL;
-    }
-
-    dc = DEVICE_CLASS(klass);
-
-    if (only_migratable) {
-        if (dc->vmsd->unmigratable) {
-            error_report("Device %s is not migratable, but --only-migratable "
-                         "was specified", f->name);
-            return NULL;
-        }
-    }
-
     if (f->usbdevice_init) {
         dev = f->usbdevice_init(bus, params);
     } else {
diff --git a/hw/usb/dev-audio.c b/hw/usb/dev-audio.c
index 87cab0a3d1..343345235c 100644
--- a/hw/usb/dev-audio.c
+++ b/hw/usb/dev-audio.c
@@ -617,7 +617,7 @@ static void usb_audio_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_audio_handle_destroy(USBDevice *dev)
+static void usb_audio_unrealize(USBDevice *dev, Error **errp)
 {
     USBAudioState *s = USB_AUDIO(dev);
 
@@ -683,7 +683,7 @@ static void usb_audio_class_init(ObjectClass *klass, void *data)
     k->handle_reset   = usb_audio_handle_reset;
     k->handle_control = usb_audio_handle_control;
     k->handle_data    = usb_audio_handle_data;
-    k->handle_destroy = usb_audio_handle_destroy;
+    k->unrealize      = usb_audio_unrealize;
     k->set_interface  = usb_audio_set_interface;
 }
 
diff --git a/hw/usb/dev-bluetooth.c b/hw/usb/dev-bluetooth.c
index 91a4a0b8b9..443e3c301d 100644
--- a/hw/usb/dev-bluetooth.c
+++ b/hw/usb/dev-bluetooth.c
@@ -496,7 +496,7 @@ static void usb_bt_out_hci_packet_acl(void *opaque,
     usb_bt_fifo_enqueue(&s->acl, data, len);
 }
 
-static void usb_bt_handle_destroy(USBDevice *dev)
+static void usb_bt_unrealize(USBDevice *dev, Error **errp)
 {
     struct USBBtState *s = (struct USBBtState *) dev->opaque;
 
@@ -559,7 +559,7 @@ static void usb_bt_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_bt_handle_reset;
     uc->handle_control = usb_bt_handle_control;
     uc->handle_data    = usb_bt_handle_data;
-    uc->handle_destroy = usb_bt_handle_destroy;
+    uc->unrealize      = usb_bt_unrealize;
     dc->vmsd = &vmstate_usb_bt;
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
 }
diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c
index dda0bf0df0..c40019df96 100644
--- a/hw/usb/dev-hid.c
+++ b/hw/usb/dev-hid.c
@@ -690,7 +690,7 @@ static void usb_hid_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_hid_handle_destroy(USBDevice *dev)
+static void usb_hid_unrealize(USBDevice *dev, Error **errp)
 {
     USBHIDState *us = USB_HID(dev);
 
@@ -785,7 +785,7 @@ static void usb_hid_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_hid_handle_reset;
     uc->handle_control = usb_hid_handle_control;
     uc->handle_data    = usb_hid_handle_data;
-    uc->handle_destroy = usb_hid_handle_destroy;
+    uc->unrealize      = usb_hid_unrealize;
     uc->handle_attach  = usb_desc_attach;
 }
 
diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c
index a33f21cb38..9fe7333946 100644
--- a/hw/usb/dev-hub.c
+++ b/hw/usb/dev-hub.c
@@ -497,7 +497,7 @@ static void usb_hub_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_hub_handle_destroy(USBDevice *dev)
+static void usb_hub_unrealize(USBDevice *dev, Error **errp)
 {
     USBHubState *s = (USBHubState *)dev;
     int i;
@@ -575,7 +575,7 @@ static void usb_hub_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_hub_handle_reset;
     uc->handle_control = usb_hub_handle_control;
     uc->handle_data    = usb_hub_handle_data;
-    uc->handle_destroy = usb_hub_handle_destroy;
+    uc->unrealize      = usb_hub_unrealize;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->fw_name = "hub";
     dc->vmsd = &vmstate_usb_hub;
diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index c0f1193ba9..85fc81bf43 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1324,7 +1324,7 @@ static void usbnet_cleanup(NetClientState *nc)
     s->nic = NULL;
 }
 
-static void usb_net_handle_destroy(USBDevice *dev)
+static void usb_net_unrealize(USBDevice *dev, Error **errp)
 {
     USBNetState *s = (USBNetState *) dev;
 
@@ -1428,7 +1428,7 @@ static void usb_net_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_net_handle_reset;
     uc->handle_control = usb_net_handle_control;
     uc->handle_data    = usb_net_handle_data;
-    uc->handle_destroy = usb_net_handle_destroy;
+    uc->unrealize      = usb_net_unrealize;
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
     dc->fw_name = "network";
     dc->vmsd = &vmstate_usb_net;
diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c
index 7cd4ed0d17..757b8b3f5a 100644
--- a/hw/usb/dev-smartcard-reader.c
+++ b/hw/usb/dev-smartcard-reader.c
@@ -1163,7 +1163,7 @@ static void ccid_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void ccid_handle_destroy(USBDevice *dev)
+static void ccid_unrealize(USBDevice *dev, Error **errp)
 {
     USBCCIDState *s = USB_CCID_DEV(dev);
 
@@ -1470,7 +1470,7 @@ static void ccid_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = ccid_handle_reset;
     uc->handle_control = ccid_handle_control;
     uc->handle_data    = ccid_handle_data;
-    uc->handle_destroy = ccid_handle_destroy;
+    uc->unrealize      = ccid_unrealize;
     dc->desc = "CCID Rev 1.1 smartcard reader";
     dc->vmsd = &ccid_vmstate;
     dc->props = ccid_properties;
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index c607f7606d..8a61ec94c8 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -589,6 +589,13 @@ static const struct SCSIBusInfo usb_msd_scsi_info_bot = {
     .load_request = usb_msd_load_request,
 };
 
+static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp)
+{
+    MSDState *s = USB_STORAGE_DEV(dev);
+
+    object_unref(OBJECT(&s->bus));
+}
+
 static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
@@ -603,7 +610,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
 
     blkconf_serial(&s->conf, &dev->serial);
     blkconf_blocksizes(&s->conf);
-    blkconf_apply_backend_options(&s->conf);
+    blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
     /*
      * Hack alert: this pretends to be a block device, but it's really
@@ -635,6 +646,13 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
     s->scsi_dev = scsi_dev;
 }
 
+static void usb_msd_unrealize_bot(USBDevice *dev, Error **errp)
+{
+    MSDState *s = USB_STORAGE_DEV(dev);
+
+    object_unref(OBJECT(&s->bus));
+}
+
 static void usb_msd_realize_bot(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
@@ -755,6 +773,7 @@ static void usb_msd_class_initfn_storage(ObjectClass *klass, void *data)
     USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
 
     uc->realize = usb_msd_realize_storage;
+    uc->unrealize = usb_msd_unrealize_storage;
     dc->props = msd_properties;
 }
 
@@ -817,6 +836,7 @@ static void usb_msd_class_initfn_bot(ObjectClass *klass, void *data)
     USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
 
     uc->realize = usb_msd_realize_bot;
+    uc->unrealize = usb_msd_unrealize_bot;
     uc->attached_settable = true;
 }
 
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index da2fb7017e..fffc424396 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -891,11 +891,13 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_uas_handle_destroy(USBDevice *dev)
+static void usb_uas_unrealize(USBDevice *dev, Error **errp)
 {
     UASDevice *uas = USB_UAS(dev);
 
     qemu_bh_delete(uas->status_bh);
+
+    object_unref(OBJECT(&uas->bus));
 }
 
 static void usb_uas_realize(USBDevice *dev, Error **errp)
@@ -944,7 +946,7 @@ static void usb_uas_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_uas_handle_reset;
     uc->handle_control = usb_uas_handle_control;
     uc->handle_data    = usb_uas_handle_data;
-    uc->handle_destroy = usb_uas_handle_destroy;
+    uc->unrealize      = usb_uas_unrealize;
     uc->attached_settable = true;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     dc->fw_name = "storage";
diff --git a/hw/usb/dev-wacom.c b/hw/usb/dev-wacom.c
index c4702dbba0..bf70013059 100644
--- a/hw/usb/dev-wacom.c
+++ b/hw/usb/dev-wacom.c
@@ -329,7 +329,7 @@ static void usb_wacom_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_wacom_handle_destroy(USBDevice *dev)
+static void usb_wacom_unrealize(USBDevice *dev, Error **errp)
 {
     USBWacomState *s = (USBWacomState *) dev;
 
@@ -364,7 +364,7 @@ static void usb_wacom_class_init(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_wacom_handle_reset;
     uc->handle_control = usb_wacom_handle_control;
     uc->handle_data    = usb_wacom_handle_data;
-    uc->handle_destroy = usb_wacom_handle_destroy;
+    uc->unrealize      = usb_wacom_unrealize;
     set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
     dc->desc = "QEMU PenPartner Tablet";
     dc->vmsd = &vmstate_usb_wacom;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 21c93e0372..fe8406ac64 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -1001,7 +1001,7 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
     if (ohci_read_td(ohci, addr, &td)) {
         trace_usb_ohci_td_read_error(addr);
         ohci_die(ohci);
-        return 0;
+        return 1;
     }
 
     dir = OHCI_BM(ed->flags, ED_D);
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 28dd2f2c9a..f0af852709 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -635,6 +635,11 @@ static bool xhci_get_flag(XHCIState *xhci, enum xhci_flags bit)
     return xhci->flags & (1 << bit);
 }
 
+static void xhci_set_flag(XHCIState *xhci, enum xhci_flags bit)
+{
+    xhci->flags |= (1 << bit);
+}
+
 static uint64_t xhci_mfindex_get(XHCIState *xhci)
 {
     int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
@@ -3839,17 +3844,21 @@ static const VMStateDescription vmstate_xhci = {
     }
 };
 
-static Property xhci_properties[] = {
+static Property nec_xhci_properties[] = {
     DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO),
     DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO),
     DEFINE_PROP_BIT("superspeed-ports-first",
                     XHCIState, flags, XHCI_FLAG_SS_FIRST, true),
     DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags,
                     XHCI_FLAG_FORCE_PCIE_ENDCAP, false),
-    DEFINE_PROP_BIT("streams", XHCIState, flags,
-                    XHCI_FLAG_ENABLE_STREAMS, true),
     DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS),
     DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static Property xhci_properties[] = {
+    DEFINE_PROP_BIT("streams", XHCIState, flags,
+                    XHCI_FLAG_ENABLE_STREAMS, true),
     DEFINE_PROP_UINT32("p2",    XHCIState, numports_2, 4),
     DEFINE_PROP_UINT32("p3",    XHCIState, numports_3, 4),
     DEFINE_PROP_END_OF_LIST(),
@@ -3881,7 +3890,9 @@ static const TypeInfo xhci_info = {
 static void nec_xhci_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
+    dc->props       = nec_xhci_properties;
     k->vendor_id    = PCI_VENDOR_ID_NEC;
     k->device_id    = PCI_DEVICE_ID_NEC_UPD720200;
     k->revision     = 0x03;
@@ -3902,10 +3913,22 @@ static void qemu_xhci_class_init(ObjectClass *klass, void *data)
     k->revision     = 0x01;
 }
 
+static void qemu_xhci_instance_init(Object *obj)
+{
+    XHCIState *xhci = XHCI(obj);
+
+    xhci->msi      = ON_OFF_AUTO_OFF;
+    xhci->msix     = ON_OFF_AUTO_AUTO;
+    xhci->numintrs = MAXINTRS;
+    xhci->numslots = MAXSLOTS;
+    xhci_set_flag(xhci, XHCI_FLAG_SS_FIRST);
+}
+
 static const TypeInfo qemu_xhci_info = {
     .name          = TYPE_QEMU_XHCI,
     .parent        = TYPE_XHCI,
     .class_init    = qemu_xhci_class_init,
+    .instance_init = qemu_xhci_instance_init,
 };
 
 static void xhci_register_types(void)
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 7791c6d520..c9876a5b0f 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -1065,7 +1065,7 @@ static void usb_host_instance_init(Object *obj)
                                   &udev->qdev, NULL);
 }
 
-static void usb_host_handle_destroy(USBDevice *udev)
+static void usb_host_unrealize(USBDevice *udev, Error **errp)
 {
     USBHostDevice *s = USB_HOST_DEVICE(udev);
 
@@ -1568,7 +1568,7 @@ static void usb_host_class_initfn(ObjectClass *klass, void *data)
     uc->handle_data    = usb_host_handle_data;
     uc->handle_control = usb_host_handle_control;
     uc->handle_reset   = usb_host_handle_reset;
-    uc->handle_destroy = usb_host_handle_destroy;
+    uc->unrealize      = usb_host_unrealize;
     uc->flush_ep_queue = usb_host_flush_ep_queue;
     uc->alloc_streams  = usb_host_alloc_streams;
     uc->free_streams   = usb_host_free_streams;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 860f5c35eb..0efe62f725 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1427,7 +1427,7 @@ static void usbredir_cleanup_device_queues(USBRedirDevice *dev)
     }
 }
 
-static void usbredir_handle_destroy(USBDevice *udev)
+static void usbredir_unrealize(USBDevice *udev, Error **errp)
 {
     USBRedirDevice *dev = USB_REDIRECT(udev);
     Chardev *chr = qemu_chr_fe_get_driver(&dev->cs);
@@ -2513,7 +2513,7 @@ static void usbredir_class_initfn(ObjectClass *klass, void *data)
 
     uc->realize        = usbredir_realize;
     uc->product_desc   = "USB Redirection Device";
-    uc->handle_destroy = usbredir_handle_destroy;
+    uc->unrealize      = usbredir_unrealize;
     uc->cancel_packet  = usbredir_cancel_packet;
     uc->handle_reset   = usbredir_handle_reset;
     uc->handle_data    = usbredir_handle_data;
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index e9b493b939..e995e32dee 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1367,14 +1367,45 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     uint16_t cmd_orig, cmd;
     Error *err = NULL;
 
+    /* This must be an Intel VGA device. */
+    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
+        !vfio_is_vga(vdev) || nr != 4) {
+        return;
+    }
+
     /*
-     * This must be an Intel VGA device at address 00:02.0 for us to even
-     * consider enabling legacy mode.  The vBIOS has dependencies on the
-     * PCI bus address.
+     * IGD is not a standard, they like to change their specs often.  We
+     * only attempt to support back to SandBridge and we hope that newer
+     * devices maintain compatibility with generation 8.
      */
-    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
-        !vfio_is_vga(vdev) || nr != 4 ||
-        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
+    gen = igd_gen(vdev);
+    if (gen != 6 && gen != 8) {
+        error_report("IGD device %s is unsupported by IGD quirks, "
+                     "try SandyBridge or newer", vdev->vbasedev.name);
+        return;
+    }
+
+    /*
+     * Regardless of running in UPT or legacy mode, the guest graphics
+     * driver may attempt to use stolen memory, however only legacy mode
+     * has BIOS support for reserving stolen memory in the guest VM.
+     * Emulate the GMCH register in all cases and zero out the stolen
+     * memory size here. Legacy mode may request allocation and re-write
+     * this below.
+     */
+    gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
+    gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
+
+    /* GMCH is read-only, emulated */
+    pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
+    pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
+    pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
+
+    /*
+     * This must be at address 00:02.0 for us to even onsider enabling
+     * legacy mode.  The vBIOS has dependencies on the PCI bus address.
+     */
+    if (&vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
                                        0, PCI_DEVFN(0x2, 0))) {
         return;
     }
@@ -1394,18 +1425,6 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     }
 
     /*
-     * IGD is not a standard, they like to change their specs often.  We
-     * only attempt to support back to SandBridge and we hope that newer
-     * devices maintain compatibility with generation 8.
-     */
-    gen = igd_gen(vdev);
-    if (gen != 6 && gen != 8) {
-        error_report("IGD device %s is unsupported in legacy mode, "
-                     "try SandyBridge or newer", vdev->vbasedev.name);
-        return;
-    }
-
-    /*
      * Most of what we're doing here is to enable the ROM to run, so if
      * there's no ROM, there's no point in setting up this quirk.
      * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
@@ -1460,8 +1479,6 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
         goto out;
     }
 
-    gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
-
     /*
      * If IGD VGA Disable is clear (expected) and VGA is not already enabled,
      * try to enable it.  Probably shouldn't be using legacy mode without VGA,
@@ -1532,12 +1549,11 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
      * when IVD (IGD VGA Disable) is clear, but the claim is that it's unused,
      * so let's not waste VM memory for it.
      */
-    gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
-
     if (vdev->igd_gms) {
         if (vdev->igd_gms <= 0x10) {
             gms_mb = vdev->igd_gms * 32;
             gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8);
+            pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
         } else {
             error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);
             vdev->igd_gms = 0;
@@ -1557,11 +1573,6 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
                     bdsm_size, sizeof(*bdsm_size));
 
-    /* GMCH is read-only, emulated */
-    pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
-    pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
-    pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
-
     /* BDSM is read-write, emulated.  The BIOS needs to be able to write it */
     pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
     pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 332f41d662..03a3d01549 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1880,16 +1880,26 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
     /*
      * Extended capabilities are chained with each pointing to the next, so we
      * can drop anything other than the head of the chain simply by modifying
-     * the previous next pointer.  For the head of the chain, we can modify the
-     * capability ID to something that cannot match a valid capability.  ID
-     * 0 is reserved for this since absence of capabilities is indicated by
-     * 0 for the ID, version, AND next pointer.  However, pcie_add_capability()
-     * uses ID 0 as reserved for list management and will incorrectly match and
-     * assert if we attempt to pre-load the head of the chain with this ID.
-     * Use ID 0xFFFF temporarily since it is also seems to be reserved in
-     * part for identifying absence of capabilities in a root complex register
-     * block.  If the ID still exists after adding capabilities, switch back to
-     * zero.  We'll mark this entire first dword as emulated for this purpose.
+     * the previous next pointer.  Seed the head of the chain here such that
+     * we can simply skip any capabilities we want to drop below, regardless
+     * of their position in the chain.  If this stub capability still exists
+     * after we add the capabilities we want to expose, update the capability
+     * ID to zero.  Note that we cannot seed with the capability header being
+     * zero as this conflicts with definition of an absent capability chain
+     * and prevents capabilities beyond the head of the list from being added.
+     * By replacing the dummy capability ID with zero after walking the device
+     * chain, we also transparently mark extended capabilities as absent if
+     * no capabilities were added.  Note that the PCIe spec defines an absence
+     * of extended capabilities to be determined by a value of zero for the
+     * capability ID, version, AND next pointer.  A non-zero next pointer
+     * should be sufficient to indicate additional capabilities are present,
+     * which will occur if we call pcie_add_capability() below.  The entire
+     * first dword is emulated to support this.
+     *
+     * NB. The kernel side does similar masking, so be prepared that our
+     * view of the device may also contain a capability ID zero in the head
+     * of the chain.  Skip it for the same reason that we cannot seed the
+     * chain with a zero capability.
      */
     pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE,
                  PCI_EXT_CAP(0xFFFF, 0, 0));
@@ -1915,6 +1925,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
                                    PCI_EXT_CAP_NEXT_MASK);
 
         switch (cap_id) {
+        case 0: /* kernel masked capability */
         case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
         case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
             trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
@@ -2506,12 +2517,16 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev)
 static void vfio_req_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    Error *err = NULL;
 
     if (!event_notifier_test_and_clear(&vdev->req_notifier)) {
         return;
     }
 
-    qdev_unplug(&vdev->pdev.qdev, NULL);
+    qdev_unplug(&vdev->pdev.qdev, &err);
+    if (err) {
+        error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+    }
 }
 
 static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 5ce42af9d4..b76f3f62a0 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1153,7 +1153,7 @@ static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
     PCIDevice *dev = &proxy->pci_dev;
 
-    return pci_get_address_space(dev);
+    return pci_device_iommu_address_space(dev);
 }
 
 static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 23483c752f..efce4b343a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -282,12 +282,17 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
     caches = atomic_rcu_read(&vq->vring.caches);
     pa = offsetof(VRingUsed, ring[vq->vring.num]);
     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 }
 
 void virtio_queue_set_notification(VirtQueue *vq, int enable)
 {
     vq->notification = enable;
 
+    if (!vq->vring.desc) {
+        return;
+    }
+
     rcu_read_lock();
     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
         vring_set_avail_event(vq, vring_avail_idx(vq));
@@ -1852,7 +1857,10 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         if (k->has_variable_vring_alignment) {
             qemu_put_be32(f, vdev->vq[i].vring.align);
         }
-        /* XXX virtio-1 devices */
+        /*
+         * Save desc now, the rest of the ring addresses are saved in
+         * subsections for VIRTIO-1 devices.
+         */
         qemu_put_be64(f, vdev->vq[i].vring.desc);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
         if (k->save_queue) {
@@ -1993,14 +2001,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
         vdev->vq[i].signalled_used_valid = false;
         vdev->vq[i].notification = true;
 
-        if (vdev->vq[i].vring.desc) {
-            /* XXX virtio-1 devices */
-            virtio_queue_update_rings(vdev, i);
-        } else if (vdev->vq[i].last_avail_idx) {
+        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
             error_report("VQ %d address 0x0 "
                          "inconsistent with Host index 0x%x",
                          i, vdev->vq[i].last_avail_idx);
-                return -1;
+            return -1;
         }
         if (k->load_queue) {
             ret = k->load_queue(qbus->parent, i, f);
@@ -2061,6 +2066,19 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
     for (i = 0; i < num; i++) {
         if (vdev->vq[i].vring.desc) {
             uint16_t nheads;
+
+            /*
+             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
+             * only the region cache needs to be set up.  Legacy devices need
+             * to calculate used and avail ring addresses based on the desc
+             * address.
+             */
+            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+                virtio_init_region_cache(vdev, i);
+            } else {
+                virtio_queue_update_rings(vdev, i);
+            }
+
             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
             /* Check it isn't doing strange things with descriptor numbers. */
             if (nheads > vdev->vq[i].vring.num) {
@@ -2291,7 +2309,7 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque)
     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
     bool progress;
 
-    if (virtio_queue_empty(vq)) {
+    if (!vq->vring.desc || virtio_queue_empty(vq)) {
         return false;
     }