summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.shippable.yml2
-rw-r--r--CODING_STYLE7
-rw-r--r--MAINTAINERS11
-rw-r--r--audio/audio.c11
-rw-r--r--audio/audio.h5
-rw-r--r--audio/mixeng.c32
-rw-r--r--audio/sdlaudio.c48
-rw-r--r--block.c583
-rw-r--r--block/backup.c22
-rw-r--r--block/blkdebug.c2
-rwxr-xr-xblock/blkreplay.c1
-rw-r--r--block/blkverify.c1
-rw-r--r--block/block-backend.c116
-rw-r--r--block/bochs.c1
-rw-r--r--block/cloop.c1
-rw-r--r--block/commit.c176
-rw-r--r--block/crypto.c1
-rw-r--r--block/curl.c24
-rw-r--r--block/dmg.c1
-rw-r--r--block/io.c41
-rw-r--r--block/iscsi.c83
-rw-r--r--block/mirror.c239
-rw-r--r--block/nfs.c23
-rw-r--r--block/parallels.c4
-rw-r--r--block/qcow.c4
-rw-r--r--block/qcow2.c19
-rw-r--r--block/qed.c4
-rw-r--r--block/quorum.c11
-rw-r--r--block/raw-format.c1
-rw-r--r--block/rbd.c553
-rw-r--r--block/replication.c3
-rw-r--r--block/sheepdog.c2
-rw-r--r--block/stream.c47
-rw-r--r--block/vdi.c4
-rw-r--r--block/vhdx.c4
-rw-r--r--block/vmdk.c7
-rw-r--r--block/vpc.c4
-rw-r--r--block/vvfat.c24
-rw-r--r--blockdev.c157
-rw-r--r--blockjob.c62
-rwxr-xr-xconfigure1
-rw-r--r--cputlb.c15
-rw-r--r--default-configs/arm-softmmu.mak2
-rw-r--r--default-configs/i386-softmmu.mak1
-rw-r--r--default-configs/x86_64-softmmu.mak1
-rw-r--r--docs/mach-virt-graphical.cfg281
-rw-r--r--docs/mach-virt-serial.cfg243
-rw-r--r--docs/migration.txt71
-rw-r--r--docs/q35-chipset.cfg152
-rw-r--r--docs/q35-emulated.cfg288
-rw-r--r--docs/q35-virtio-graphical.cfg248
-rw-r--r--docs/q35-virtio-serial.cfg193
-rw-r--r--docs/replay.txt7
-rw-r--r--docs/specs/vmgenid.txt245
-rw-r--r--exec.c83
-rw-r--r--fpu/softfloat.c2
-rw-r--r--fsdev/Makefile.objs2
-rw-r--r--fsdev/file-op-9p.h3
-rw-r--r--fsdev/qemu-fsdev-opts.c3
-rw-r--r--fsdev/qemu-fsdev-throttle.c118
-rw-r--r--fsdev/qemu-fsdev-throttle.h39
-rw-r--r--hmp-commands-info.hx14
-rw-r--r--hmp.c42
-rw-r--r--hmp.h1
-rw-r--r--hw/9pfs/9p-local.c1031
-rw-r--r--hw/9pfs/9p-local.h20
-rw-r--r--hw/9pfs/9p-posix-acl.c44
-rw-r--r--hw/9pfs/9p-util.c69
-rw-r--r--hw/9pfs/9p-util.h54
-rw-r--r--hw/9pfs/9p-xattr-user.c24
-rw-r--r--hw/9pfs/9p-xattr.c166
-rw-r--r--hw/9pfs/9p-xattr.h87
-rw-r--r--hw/9pfs/9p.c19
-rw-r--r--hw/9pfs/Makefile.objs2
-rw-r--r--hw/9pfs/cofile.c2
-rw-r--r--hw/acpi/Makefile.objs1
-rw-r--r--hw/acpi/aml-build.c2
-rw-r--r--hw/acpi/bios-linker-loader.c66
-rw-r--r--hw/acpi/pcihp.c11
-rw-r--r--hw/acpi/piix4.c2
-rw-r--r--hw/acpi/tco.c1
-rw-r--r--hw/acpi/vmgenid.c258
-rw-r--r--hw/arm/armv7m.c379
-rw-r--r--hw/arm/bcm2835_peripherals.c58
-rw-r--r--hw/arm/exynos4210.c18
-rw-r--r--hw/arm/netduino2.c7
-rw-r--r--hw/arm/stm32f205_soc.c28
-rw-r--r--hw/arm/virt.c32
-rw-r--r--hw/block/block.c24
-rw-r--r--hw/block/fdc.c28
-rw-r--r--hw/block/m25p80.c8
-rw-r--r--hw/block/nand.c7
-rw-r--r--hw/block/nvme.c8
-rw-r--r--hw/block/onenand.c7
-rw-r--r--hw/block/pflash_cfi01.c18
-rw-r--r--hw/block/pflash_cfi02.c19
-rw-r--r--hw/block/virtio-blk.c8
-rw-r--r--hw/core/bus.c2
-rw-r--r--hw/core/loader.c17
-rw-r--r--hw/core/or-irq.c3
-rw-r--r--hw/core/ptimer.c8
-rw-r--r--hw/core/qdev-properties-system.c9
-rw-r--r--hw/core/qdev.c21
-rw-r--r--hw/core/register.c30
-rw-r--r--hw/display/milkymist-tmu2.c2
-rw-r--r--hw/display/virtio-gpu-3d.c13
-rw-r--r--hw/gpio/Makefile.objs1
-rw-r--r--hw/gpio/bcm2835_gpio.c353
-rw-r--r--hw/i386/acpi-build.c28
-rw-r--r--hw/ide/core.c2
-rw-r--r--hw/ide/qdev.c9
-rw-r--r--hw/intc/Makefile.objs2
-rw-r--r--hw/intc/arm_gic.c31
-rw-r--r--hw/intc/arm_gic_common.c23
-rw-r--r--hw/intc/arm_gicv3_common.c38
-rw-r--r--hw/intc/arm_gicv3_cpuif.c8
-rw-r--r--hw/intc/arm_gicv3_kvm.c629
-rw-r--r--hw/intc/armv7m_nvic.c965
-rw-r--r--hw/intc/gic_internal.h7
-rw-r--r--hw/intc/gicv3_internal.h3
-rw-r--r--hw/intc/trace-events15
-rw-r--r--hw/intc/xics.c461
-rw-r--r--hw/intc/xics_kvm.c184
-rw-r--r--hw/intc/xics_spapr.c128
-rw-r--r--hw/misc/Makefile.objs3
-rw-r--r--hw/misc/bcm2835_rng.c149
-rw-r--r--hw/misc/exynos4210_clk.c164
-rw-r--r--hw/net/cadence_gem.c2
-rw-r--r--hw/nvram/spapr_nvram.c8
-rw-r--r--hw/pci/pci.c28
-rw-r--r--hw/ppc/spapr.c196
-rw-r--r--hw/ppc/spapr_cpu_core.c24
-rw-r--r--hw/ppc/spapr_events.c10
-rw-r--r--hw/ppc/spapr_hcall.c89
-rw-r--r--hw/ppc/spapr_pci.c312
-rw-r--r--hw/ppc/spapr_vio.c2
-rw-r--r--hw/s390x/ipl.c90
-rw-r--r--hw/s390x/ipl.h5
-rw-r--r--hw/s390x/s390-virtio-ccw.c3
-rw-r--r--hw/s390x/s390-virtio.c2
-rw-r--r--hw/s390x/s390-virtio.h1
-rw-r--r--hw/scsi/scsi-disk.c12
-rw-r--r--hw/sd/Makefile.objs1
-rw-r--r--hw/sd/bcm2835_sdhost.c429
-rw-r--r--hw/sd/core.c27
-rw-r--r--hw/sd/sd.c8
-rw-r--r--hw/sd/sdhci.c25
-rw-r--r--hw/timer/Makefile.objs1
-rw-r--r--hw/timer/armv7m_systick.c240
-rw-r--r--hw/timer/imx_gpt.c33
-rw-r--r--hw/timer/trace-events6
-rw-r--r--hw/usb/bus.c19
-rw-r--r--hw/usb/dev-storage.c22
-rw-r--r--hw/usb/dev-uas.c2
-rw-r--r--hw/virtio/virtio-pci.c2
-rw-r--r--hw/virtio/virtio.c32
-rw-r--r--include/block/block.h46
-rw-r--r--include/block/block_int.h126
-rw-r--r--include/block/blockjob.h14
-rw-r--r--include/block/blockjob_int.h4
-rw-r--r--include/exec/cpu-common.h2
-rw-r--r--include/glib-compat.h21
-rw-r--r--include/hw/acpi/acpi_dev_interface.h1
-rw-r--r--include/hw/acpi/aml-build.h1
-rw-r--r--include/hw/acpi/bios-linker-loader.h7
-rw-r--r--include/hw/acpi/vmgenid.h35
-rw-r--r--include/hw/arm/arm.h12
-rw-r--r--include/hw/arm/armv7m.h63
-rw-r--r--include/hw/arm/armv7m_nvic.h62
-rw-r--r--include/hw/arm/bcm2835_peripherals.h6
-rw-r--r--include/hw/arm/stm32f205_soc.h4
-rw-r--r--include/hw/arm/virt.h1
-rw-r--r--include/hw/block/block.h8
-rw-r--r--include/hw/elf_ops.h13
-rw-r--r--include/hw/gpio/bcm2835_gpio.h39
-rw-r--r--include/hw/intc/arm_gicv3_common.h1
-rw-r--r--include/hw/loader.h13
-rw-r--r--include/hw/misc/bcm2835_rng.h27
-rw-r--r--include/hw/pci-host/spapr.h2
-rw-r--r--include/hw/pci/pci.h4
-rw-r--r--include/hw/pci/pci_ids.h112
-rw-r--r--include/hw/ppc/spapr.h5
-rw-r--r--include/hw/ppc/spapr_vio.h2
-rw-r--r--include/hw/ppc/xics.h97
-rw-r--r--include/hw/ptimer.h1
-rw-r--r--include/hw/sd/bcm2835_sdhost.h48
-rw-r--r--include/hw/sd/sd.h11
-rw-r--r--include/hw/timer/armv7m_systick.h34
-rw-r--r--include/migration/migration.h6
-rw-r--r--include/migration/postcopy-ram.h13
-rw-r--r--include/migration/vmstate.h4
-rw-r--r--include/qemu-common.h2
-rw-r--r--include/qemu-io.h1
-rw-r--r--include/qemu/throttle-options.h92
-rw-r--r--include/qemu/timer.h5
-rw-r--r--include/standard-headers/asm-x86/hyperv.h8
-rw-r--r--include/standard-headers/linux/input-event-codes.h2
-rw-r--r--include/standard-headers/linux/pci_regs.h25
-rw-r--r--include/standard-headers/linux/virtio_ids.h1
-rw-r--r--include/sysemu/block-backend.h9
-rw-r--r--include/sysemu/replay.h7
-rw-r--r--include/ui/console.h21
-rw-r--r--include/ui/gtk.h25
-rw-r--r--include/ui/sdl2.h13
-rw-r--r--linux-headers/asm-arm/kvm.h15
-rw-r--r--linux-headers/asm-arm/unistd-common.h357
-rw-r--r--linux-headers/asm-arm/unistd-eabi.h5
-rw-r--r--linux-headers/asm-arm/unistd-oabi.h17
-rw-r--r--linux-headers/asm-arm/unistd.h419
-rw-r--r--linux-headers/asm-arm64/kvm.h13
-rw-r--r--linux-headers/asm-powerpc/kvm.h27
-rw-r--r--linux-headers/asm-powerpc/unistd.h1
-rw-r--r--linux-headers/asm-x86/kvm_para.h13
-rw-r--r--linux-headers/linux/kvm.h24
-rw-r--r--linux-headers/linux/kvm_para.h2
-rw-r--r--linux-headers/linux/userfaultfd.h67
-rw-r--r--linux-headers/linux/vfio.h10
-rw-r--r--linux-user/main.c1
-rw-r--r--linux-user/signal.c286
-rw-r--r--linux-user/syscall.c148
-rw-r--r--linux-user/syscall_defs.h8
-rw-r--r--migration/block.c21
-rw-r--r--migration/colo.c49
-rw-r--r--migration/migration.c36
-rw-r--r--migration/postcopy-ram.c142
-rw-r--r--migration/ram.c109
-rw-r--r--migration/savevm.c38
-rw-r--r--migration/trace-events2
-rw-r--r--migration/vmstate.c97
-rw-r--r--monitor.c4
-rw-r--r--nbd/server.c16
-rw-r--r--net/vhost-user.c53
-rw-r--r--pc-bios/bios-256k.binbin262144 -> 262144 bytes
-rw-r--r--pc-bios/bios.binbin131072 -> 131072 bytes
-rw-r--r--pc-bios/openbios-ppcbin750840 -> 750840 bytes
-rw-r--r--pc-bios/openbios-sparc32bin382048 -> 382048 bytes
-rw-r--r--pc-bios/openbios-sparc64bin1593408 -> 1593408 bytes
-rw-r--r--pc-bios/s390-ccw.imgbin26392 -> 26456 bytes
-rw-r--r--pc-bios/s390-ccw/bootmap.c8
-rw-r--r--pc-bios/s390-ccw/iplb.h3
-rw-r--r--pc-bios/s390-ccw/main.c20
-rw-r--r--pc-bios/s390-ccw/virtio.c1
-rw-r--r--pc-bios/s390-ccw/virtio.h1
-rw-r--r--pc-bios/vgabios-cirrus.binbin38400 -> 38400 bytes
-rw-r--r--pc-bios/vgabios-qxl.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-stdvga.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-virtio.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios-vmware.binbin38912 -> 38912 bytes
-rw-r--r--pc-bios/vgabios.binbin38400 -> 38400 bytes
-rw-r--r--qapi-schema.json102
-rw-r--r--qapi/block-core.json96
-rw-r--r--qdev-monitor.c9
-rw-r--r--qemu-img-cmds.hx4
-rw-r--r--qemu-img.c334
-rw-r--r--qemu-img.texi16
-rw-r--r--qemu-io-cmds.c28
-rw-r--r--qemu-options.hx9
-rw-r--r--qtest.c1
-rw-r--r--replay/Makefile.objs1
-rw-r--r--replay/replay-audio.c79
-rw-r--r--replay/replay-internal.h4
m---------roms/openbios0
m---------roms/seabios0
-rwxr-xr-xscripts/update-linux-headers.sh13
-rwxr-xr-xscripts/vmstate-static-checker.py5
-rw-r--r--stubs/Makefile.objs1
-rw-r--r--stubs/vmgenid.c9
-rw-r--r--stubs/vmstate.c6
-rw-r--r--target/alpha/cpu.h3
-rw-r--r--target/arm/cpu.c16
-rw-r--r--target/arm/cpu.h25
-rw-r--r--target/arm/helper.c245
-rw-r--r--target/arm/translate-a64.c12
-rw-r--r--target/arm/translate.c8
-rw-r--r--target/i386/cpu-qom.h8
-rw-r--r--target/i386/cpu.c455
-rw-r--r--target/i386/cpu.h4
-rw-r--r--target/i386/fpu_helper.c12
-rw-r--r--target/ppc/Makefile.objs5
-rw-r--r--target/ppc/arch_dump.c154
-rw-r--r--target/ppc/cpu.c47
-rw-r--r--target/ppc/cpu.h50
-rw-r--r--target/ppc/int_helper.c34
-rw-r--r--target/ppc/kvm.c128
-rw-r--r--target/ppc/kvm_ppc.h20
-rw-r--r--target/ppc/machine.c5
-rw-r--r--target/ppc/misc_helper.c8
-rw-r--r--target/ppc/mmu-hash32.c14
-rw-r--r--target/ppc/mmu-hash32.h34
-rw-r--r--target/ppc/mmu-hash64.c193
-rw-r--r--target/ppc/mmu-hash64.h65
-rw-r--r--target/ppc/mmu_helper.c51
-rw-r--r--target/ppc/translate.c115
-rw-r--r--target/ppc/translate_init.c39
-rw-r--r--target/s390x/cpu_models.c2
-rw-r--r--target/sparc/translate.c27
-rw-r--r--tcg/aarch64/tcg-target.inc.c4
-rw-r--r--tests/Makefile.include5
-rw-r--r--tests/acpi-test-data/q35/DSDTbin7860 -> 7824 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.bridgebin7877 -> 7841 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.cphpbin8323 -> 8287 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.ipmibtbin7935 -> 7899 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.memhpbin9225 -> 9189 bytes
-rw-r--r--tests/acpi-utils.c65
-rw-r--r--tests/acpi-utils.h94
-rw-r--r--tests/bios-tables-test.c134
-rw-r--r--tests/docker/dockerfiles/debian-s390x-cross.docker22
-rw-r--r--tests/e1000-test.c1
-rw-r--r--tests/e1000e-test.c6
-rw-r--r--tests/eepro100-test.c1
-rw-r--r--tests/endianness-test.c3
-rw-r--r--tests/hd-geo-test.c53
-rw-r--r--tests/i440fx-test.c5
-rw-r--r--tests/ide-test.c13
-rw-r--r--tests/ipmi-bt-test.c1
-rw-r--r--tests/ipmi-kcs-test.c1
-rw-r--r--tests/libqos/usb.c6
-rw-r--r--tests/libqos/usb.h1
-rw-r--r--tests/libqos/virtio-pci.c38
-rw-r--r--tests/libqos/virtio-pci.h6
-rw-r--r--tests/libqtest.c10
-rw-r--r--tests/postcopy-test.c2
-rw-r--r--tests/prom-env-test.c2
-rw-r--r--tests/ptimer-test-stubs.c5
-rw-r--r--tests/ptimer-test.c122
-rw-r--r--tests/pvpanic-test.c1
-rw-r--r--tests/q35-test.c3
-rw-r--r--tests/qemu-iotests/049.out14
-rw-r--r--tests/qemu-iotests/051.pc.out6
-rwxr-xr-xtests/qemu-iotests/05511
-rw-r--r--tests/qemu-iotests/085.out2
-rwxr-xr-xtests/qemu-iotests/1412
-rw-r--r--tests/qemu-iotests/141.out4
-rw-r--r--tests/qemu-iotests/172.out53
-rw-r--r--tests/tco-test.c35
-rw-r--r--tests/test-blockjob-txn.c6
-rw-r--r--tests/test-blockjob.c10
-rw-r--r--tests/test-filter-mirror.c2
-rw-r--r--tests/test-filter-redirector.c4
-rw-r--r--tests/test-throttle.c7
-rw-r--r--tests/test-vmstate.c122
-rw-r--r--tests/usb-hcd-ehci-test.c19
-rw-r--r--tests/usb-hcd-uhci-test.c1
-rw-r--r--tests/vhost-user-test.c11
-rw-r--r--tests/virtio-9p-test.c2
-rw-r--r--tests/virtio-blk-test.c29
-rw-r--r--tests/virtio-scsi-test.c2
-rw-r--r--ui/console.c30
-rw-r--r--ui/gtk-egl.c25
-rw-r--r--ui/gtk-gl-area.c12
-rw-r--r--ui/gtk.c12
-rw-r--r--ui/sdl.c10
-rw-r--r--ui/sdl2-gl.c28
-rw-r--r--ui/sdl2.c3
-rw-r--r--ui/spice-core.c6
-rw-r--r--ui/spice-display.c74
-rw-r--r--ui/vnc.c3
-rw-r--r--util/qemu-option.c2
-rw-r--r--util/qemu-timer.c5
359 files changed, 14168 insertions, 4396 deletions
diff --git a/.shippable.yml b/.shippable.yml
index 1a1fd7a91d..653bd750fe 100644
--- a/.shippable.yml
+++ b/.shippable.yml
@@ -5,6 +5,8 @@ env:
       TARGET_LIST=arm-softmmu,arm-linux-user
     - IMAGE=debian-arm64-cross
       TARGET_LIST=aarch64-softmmu,aarch64-linux-user
+    - IMAGE=debian-s390x-cross
+      TARGET_LIST=s390x-softmmu,s390x-linux-user
 build:
   pre_ci:
     - make docker-image-${IMAGE}
diff --git a/CODING_STYLE b/CODING_STYLE
index f53180bf3f..2fa0c0b65b 100644
--- a/CODING_STYLE
+++ b/CODING_STYLE
@@ -116,3 +116,10 @@ if (a == 1) {
 Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
 Besides, good compilers already warn users when '==' is mis-typed as '=',
 even when the constant is on the right.
+
+7. Comment style
+
+We use traditional C-style /* */ comments and avoid // comments.
+
+Rationale: The // form is valid in C99, so this is purely a matter of
+consistency of style. The checkpatch script will warn you about this.
diff --git a/MAINTAINERS b/MAINTAINERS
index be79f68f46..b9a2171e9b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -908,6 +908,8 @@ F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
+F: tests/bios-tables-test.c
+F: tests/acpi-utils.[hc]
 
 ppc4xx
 M: Alexander Graf <agraf@suse.de>
@@ -1122,6 +1124,15 @@ F: hw/nvram/chrp_nvram.c
 F: include/hw/nvram/chrp_nvram.h
 F: tests/prom-env-test.c
 
+VM Generation ID
+M: Ben Warren <ben@skyportsystems.com>
+S: Maintained
+F: hw/acpi/vmgenid.c
+F: include/hw/acpi/vmgenid.h
+F: docs/specs/vmgenid.txt
+F: tests/vmgenid-test.c
+F: stubs/vmgenid.c
+
 Subsystems
 ----------
 Audio
diff --git a/audio/audio.c b/audio/audio.c
index c845a44f0a..c8898d8422 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -28,6 +28,7 @@
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "qemu/cutils.h"
+#include "sysemu/replay.h"
 
 #define AUDIO_CAP "audio"
 #include "audio_int.h"
@@ -1112,7 +1113,7 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
     if (audio_is_timer_needed ()) {
-        timer_mod (s->ts,
+        timer_mod_anticipate_ns(s->ts,
             qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
     }
     else {
@@ -1387,6 +1388,7 @@ static void audio_run_out (AudioState *s)
 
         prev_rpos = hw->rpos;
         played = hw->pcm_ops->run_out (hw, live);
+        replay_audio_out(&played);
         if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
             dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
                    hw->rpos, hw->samples, played);
@@ -1450,9 +1452,12 @@ static void audio_run_in (AudioState *s)
 
     while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
         SWVoiceIn *sw;
-        int captured, min;
+        int captured = 0, min;
 
-        captured = hw->pcm_ops->run_in (hw);
+        if (replay_mode != REPLAY_MODE_PLAY) {
+            captured = hw->pcm_ops->run_in(hw);
+        }
+        replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
 
         min = audio_pcm_hw_find_min_in (hw);
         hw->total_samples_captured += captured - min;
diff --git a/audio/audio.h b/audio/audio.h
index c3c51988f5..f4339a185e 100644
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -166,4 +166,9 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
 bool audio_is_cleaning_up(void);
 void audio_cleanup(void);
 
+void audio_sample_to_uint64(void *samples, int pos,
+                            uint64_t *left, uint64_t *right);
+void audio_sample_from_uint64(void *samples, int pos,
+                            uint64_t left, uint64_t right);
+
 #endif /* QEMU_AUDIO_H */
diff --git a/audio/mixeng.c b/audio/mixeng.c
index 66c0328d42..0bf9b5360f 100644
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/bswap.h"
+#include "qemu/error-report.h"
 #include "audio.h"
 
 #define AUDIO_CAP "mixeng"
@@ -267,6 +268,37 @@ f_sample *mixeng_clip[2][2][2][3] = {
     }
 };
 
+
+void audio_sample_to_uint64(void *samples, int pos,
+                            uint64_t *left, uint64_t *right)
+{
+    struct st_sample *sample = samples;
+    sample += pos;
+#ifdef FLOAT_MIXENG
+    error_report(
+        "Coreaudio and floating point samples are not supported by replay yet");
+    abort();
+#else
+    *left = sample->l;
+    *right = sample->r;
+#endif
+}
+
+void audio_sample_from_uint64(void *samples, int pos,
+                            uint64_t left, uint64_t right)
+{
+    struct st_sample *sample = samples;
+    sample += pos;
+#ifdef FLOAT_MIXENG
+    error_report(
+        "Coreaudio and floating point samples are not supported by replay yet");
+    abort();
+#else
+    sample->l = left;
+    sample->r = right;
+#endif
+}
+
 /*
  * August 21, 1998
  * Copyright 1998 Fabrice Bellard.
diff --git a/audio/sdlaudio.c b/audio/sdlaudio.c
index db69fe1416..e8d91d22af 100644
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -38,10 +38,14 @@
 #define AUDIO_CAP "sdl"
 #include "audio_int.h"
 
+#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
+
 typedef struct SDLVoiceOut {
     HWVoiceOut hw;
     int live;
+#if USE_SEMAPHORE
     int rpos;
+#endif
     int decr;
 } SDLVoiceOut;
 
@@ -53,8 +57,10 @@ static struct {
 
 static struct SDLAudioState {
     int exit;
+#if USE_SEMAPHORE
     SDL_mutex *mutex;
     SDL_sem *sem;
+#endif
     int initialized;
     bool driver_created;
 } glob_sdl;
@@ -73,31 +79,45 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)
 
 static int sdl_lock (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
     if (SDL_LockMutex (s->mutex)) {
         sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
         return -1;
     }
+#else
+    SDL_LockAudio();
+#endif
+
     return 0;
 }
 
 static int sdl_unlock (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
     if (SDL_UnlockMutex (s->mutex)) {
         sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
         return -1;
     }
+#else
+    SDL_UnlockAudio();
+#endif
+
     return 0;
 }
 
 static int sdl_post (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
     if (SDL_SemPost (s->sem)) {
         sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
         return -1;
     }
+#endif
+
     return 0;
 }
 
+#if USE_SEMAPHORE
 static int sdl_wait (SDLAudioState *s, const char *forfn)
 {
     if (SDL_SemWait (s->sem)) {
@@ -106,6 +126,7 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
     }
     return 0;
 }
+#endif
 
 static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
 {
@@ -246,6 +267,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
         int to_mix, decr;
 
         /* dolog ("in callback samples=%d\n", samples); */
+#if USE_SEMAPHORE
         sdl_wait (s, "sdl_callback");
         if (s->exit) {
             return;
@@ -264,6 +286,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
         if (!sdl->live) {
             goto again;
         }
+#else
+        if (s->exit || !sdl->live) {
+            break;
+        }
+#endif
 
         /* dolog ("in callback live=%d\n", live); */
         to_mix = audio_MIN (samples, sdl->live);
@@ -274,7 +301,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
 
             /* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
             hw->clip (buf, src, chunk);
+#if USE_SEMAPHORE
             sdl->rpos = (sdl->rpos + chunk) % hw->samples;
+#else
+            hw->rpos = (hw->rpos + chunk) % hw->samples;
+#endif
             to_mix -= chunk;
             buf += chunk << hw->info.shift;
         }
@@ -282,12 +313,21 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
         sdl->live -= decr;
         sdl->decr += decr;
 
+#if USE_SEMAPHORE
     again:
         if (sdl_unlock (s, "sdl_callback")) {
             return;
         }
+#endif
     }
     /* dolog ("done len=%d\n", len); */
+
+#if (SDL_MAJOR_VERSION >= 2)
+    /* SDL2 does not clear the remaining buffer for us, so do it on our own */
+    if (samples) {
+        memset(buf, 0, samples << hw->info.shift);
+    }
+#endif
 }
 
 static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -315,8 +355,12 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
     decr = audio_MIN (sdl->decr, live);
     sdl->decr -= decr;
 
+#if USE_SEMAPHORE
     sdl->live = live - decr;
     hw->rpos = sdl->rpos;
+#else
+    sdl->live = live;
+#endif
 
     if (sdl->live > 0) {
         sdl_unlock_and_post (s, "sdl_run_out");
@@ -405,6 +449,7 @@ static void *sdl_audio_init (void)
         return NULL;
     }
 
+#if USE_SEMAPHORE
     s->mutex = SDL_CreateMutex ();
     if (!s->mutex) {
         sdl_logerr ("Failed to create SDL mutex\n");
@@ -419,6 +464,7 @@ static void *sdl_audio_init (void)
         SDL_QuitSubSystem (SDL_INIT_AUDIO);
         return NULL;
     }
+#endif
 
     s->driver_created = true;
     return s;
@@ -428,8 +474,10 @@ static void sdl_audio_fini (void *opaque)
 {
     SDLAudioState *s = opaque;
     sdl_close (s);
+#if USE_SEMAPHORE
     SDL_DestroySemaphore (s->sem);
     SDL_DestroyMutex (s->mutex);
+#endif
     SDL_QuitSubSystem (SDL_INIT_AUDIO);
     s->driver_created = false;
 }
diff --git a/block.c b/block.c
index b663204f3f..f293ccb5af 100644
--- a/block.c
+++ b/block.c
@@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
     return 0;
 }
 
+static char *bdrv_child_get_parent_desc(BdrvChild *c)
+{
+    BlockDriverState *parent = c->opaque;
+    return g_strdup(bdrv_get_device_or_node_name(parent));
+}
+
 static void bdrv_child_cb_drained_begin(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
@@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
 }
 
 const BdrvChildRole child_file = {
+    .get_parent_desc = bdrv_child_get_parent_desc,
     .inherit_options = bdrv_inherited_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
@@ -794,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
 }
 
 const BdrvChildRole child_format = {
+    .get_parent_desc = bdrv_child_get_parent_desc,
     .inherit_options = bdrv_inherited_fmt_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
 };
 
+static void bdrv_backing_attach(BdrvChild *c)
+{
+    BlockDriverState *parent = c->opaque;
+    BlockDriverState *backing_hd = c->bs;
+
+    assert(!parent->backing_blocker);
+    error_setg(&parent->backing_blocker,
+               "node is used as backing hd of '%s'",
+               bdrv_get_device_or_node_name(parent));
+
+    parent->open_flags &= ~BDRV_O_NO_BACKING;
+    pstrcpy(parent->backing_file, sizeof(parent->backing_file),
+            backing_hd->filename);
+    pstrcpy(parent->backing_format, sizeof(parent->backing_format),
+            backing_hd->drv ? backing_hd->drv->format_name : "");
+
+    bdrv_op_block_all(backing_hd, parent->backing_blocker);
+    /* Otherwise we won't be able to commit or stream */
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
+                    parent->backing_blocker);
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
+                    parent->backing_blocker);
+    /*
+     * We do backup in 3 ways:
+     * 1. drive backup
+     *    The target bs is new opened, and the source is top BDS
+     * 2. blockdev backup
+     *    Both the source and the target are top BDSes.
+     * 3. internal backup(used for block replication)
+     *    Both the source and the target are backing file
+     *
+     * In case 1 and 2, neither the source nor the target is the backing file.
+     * In case 3, we will block the top BDS, so there is only one block job
+     * for the top BDS and its backing chain.
+     */
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+                    parent->backing_blocker);
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+                    parent->backing_blocker);
+}
+
+static void bdrv_backing_detach(BdrvChild *c)
+{
+    BlockDriverState *parent = c->opaque;
+
+    assert(parent->backing_blocker);
+    bdrv_op_unblock_all(c->bs, parent->backing_blocker);
+    error_free(parent->backing_blocker);
+    parent->backing_blocker = NULL;
+}
+
 /*
  * Returns the options and flags that bs->backing should get, based on the
  * given options and flags for the parent BDS
@@ -823,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
     *child_flags = flags;
 }
 
-static const BdrvChildRole child_backing = {
+const BdrvChildRole child_backing = {
+    .get_parent_desc = bdrv_child_get_parent_desc,
+    .attach          = bdrv_backing_attach,
+    .detach          = bdrv_backing_detach,
     .inherit_options = bdrv_backing_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
@@ -1326,15 +1388,352 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     return 0;
 }
 
-static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+/*
+ * Check whether permissions on this node can be changed in a way that
+ * @cumulative_perms and @cumulative_shared_perms are the new cumulative
+ * permissions of all its parents. This involves checking whether all necessary
+ * permission changes to child nodes can be performed.
+ *
+ * A call to this function must always be followed by a call to bdrv_set_perm()
+ * or bdrv_abort_perm_update().
+ */
+static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
+                           uint64_t cumulative_shared_perms, Error **errp)
+{
+    BlockDriver *drv = bs->drv;
+    BdrvChild *c;
+    int ret;
+
+    /* Write permissions never work with read-only images */
+    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
+        bdrv_is_read_only(bs))
+    {
+        error_setg(errp, "Block node is read-only");
+        return -EPERM;
+    }
+
+    /* Check this node */
+    if (!drv) {
+        return 0;
+    }
+
+    if (drv->bdrv_check_perm) {
+        return drv->bdrv_check_perm(bs, cumulative_perms,
+                                    cumulative_shared_perms, errp);
+    }
+
+    /* Drivers that never have children can omit .bdrv_child_perm() */
+    if (!drv->bdrv_child_perm) {
+        assert(QLIST_EMPTY(&bs->children));
+        return 0;
+    }
+
+    /* Check all children */
+    QLIST_FOREACH(c, &bs->children, next) {
+        uint64_t cur_perm, cur_shared;
+        drv->bdrv_child_perm(bs, c, c->role,
+                             cumulative_perms, cumulative_shared_perms,
+                             &cur_perm, &cur_shared);
+        ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Notifies drivers that after a previous bdrv_check_perm() call, the
+ * permission update is not performed and any preparations made for it (e.g.
+ * taken file locks) need to be undone.
+ *
+ * This function recursively notifies all child nodes.
+ */
+static void bdrv_abort_perm_update(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    BdrvChild *c;
+
+    if (!drv) {
+        return;
+    }
+
+    if (drv->bdrv_abort_perm_update) {
+        drv->bdrv_abort_perm_update(bs);
+    }
+
+    QLIST_FOREACH(c, &bs->children, next) {
+        bdrv_child_abort_perm_update(c);
+    }
+}
+
+static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
+                          uint64_t cumulative_shared_perms)
+{
+    BlockDriver *drv = bs->drv;
+    BdrvChild *c;
+
+    if (!drv) {
+        return;
+    }
+
+    /* Update this node */
+    if (drv->bdrv_set_perm) {
+        drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
+    }
+
+    /* Drivers that never have children can omit .bdrv_child_perm() */
+    if (!drv->bdrv_child_perm) {
+        assert(QLIST_EMPTY(&bs->children));
+        return;
+    }
+
+    /* Update all children */
+    QLIST_FOREACH(c, &bs->children, next) {
+        uint64_t cur_perm, cur_shared;
+        drv->bdrv_child_perm(bs, c, c->role,
+                             cumulative_perms, cumulative_shared_perms,
+                             &cur_perm, &cur_shared);
+        bdrv_child_set_perm(c, cur_perm, cur_shared);
+    }
+}
+
+static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+                                     uint64_t *shared_perm)
+{
+    BdrvChild *c;
+    uint64_t cumulative_perms = 0;
+    uint64_t cumulative_shared_perms = BLK_PERM_ALL;
+
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        cumulative_perms |= c->perm;
+        cumulative_shared_perms &= c->shared_perm;
+    }
+
+    *perm = cumulative_perms;
+    *shared_perm = cumulative_shared_perms;
+}
+
+static char *bdrv_child_user_desc(BdrvChild *c)
+{
+    if (c->role->get_parent_desc) {
+        return c->role->get_parent_desc(c);
+    }
+
+    return g_strdup("another user");
+}
+
+static char *bdrv_perm_names(uint64_t perm)
+{
+    struct perm_name {
+        uint64_t perm;
+        const char *name;
+    } permissions[] = {
+        { BLK_PERM_CONSISTENT_READ, "consistent read" },
+        { BLK_PERM_WRITE,           "write" },
+        { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
+        { BLK_PERM_RESIZE,          "resize" },
+        { BLK_PERM_GRAPH_MOD,       "change children" },
+        { 0, NULL }
+    };
+
+    char *result = g_strdup("");
+    struct perm_name *p;
+
+    for (p = permissions; p->name; p++) {
+        if (perm & p->perm) {
+            char *old = result;
+            result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name);
+            g_free(old);
+        }
+    }
+
+    return result;
+}
+
+/*
+ * Checks whether a new reference to @bs can be added if the new user requires
+ * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set,
+ * this old reference is ignored in the calculations; this allows checking
+ * permission updates for an existing reference.
+ *
+ * Needs to be followed by a call to either bdrv_set_perm() or
+ * bdrv_abort_perm_update(). */
+static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
+                                  uint64_t new_shared_perm,
+                                  BdrvChild *ignore_child, Error **errp)
+{
+    BdrvChild *c;
+    uint64_t cumulative_perms = new_used_perm;
+    uint64_t cumulative_shared_perms = new_shared_perm;
+
+    /* There is no reason why anyone couldn't tolerate write_unchanged */
+    assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
+
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c == ignore_child) {
+            continue;
+        }
+
+        if ((new_used_perm & c->shared_perm) != new_used_perm) {
+            char *user = bdrv_child_user_desc(c);
+            char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
+            error_setg(errp, "Conflicts with use by %s as '%s', which does not "
+                             "allow '%s' on %s",
+                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
+            g_free(user);
+            g_free(perm_names);
+            return -EPERM;
+        }
+
+        if ((c->perm & new_shared_perm) != c->perm) {
+            char *user = bdrv_child_user_desc(c);
+            char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
+            error_setg(errp, "Conflicts with use by %s as '%s', which uses "
+                             "'%s' on %s",
+                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
+            g_free(user);
+            g_free(perm_names);
+            return -EPERM;
+        }
+
+        cumulative_perms |= c->perm;
+        cumulative_shared_perms &= c->shared_perm;
+    }
+
+    return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp);
+}
+
+/* Needs to be followed by a call to either bdrv_child_set_perm() or
+ * bdrv_child_abort_perm_update(). */
+int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                          Error **errp)
+{
+    return bdrv_check_update_perm(c->bs, perm, shared, c, errp);
+}
+
+void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
+{
+    uint64_t cumulative_perms, cumulative_shared_perms;
+
+    c->perm = perm;
+    c->shared_perm = shared;
+
+    bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
+                             &cumulative_shared_perms);
+    bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
+}
+
+void bdrv_child_abort_perm_update(BdrvChild *c)
+{
+    bdrv_abort_perm_update(c->bs);
+}
+
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                            Error **errp)
+{
+    int ret;
+
+    ret = bdrv_child_check_perm(c, perm, shared, errp);
+    if (ret < 0) {
+        bdrv_child_abort_perm_update(c);
+        return ret;
+    }
+
+    bdrv_child_set_perm(c, perm, shared);
+
+    return 0;
+}
+
+#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+                                 | BLK_PERM_WRITE \
+                                 | BLK_PERM_WRITE_UNCHANGED \
+                                 | BLK_PERM_RESIZE)
+#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
+
+void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
+                               const BdrvChildRole *role,
+                               uint64_t perm, uint64_t shared,
+                               uint64_t *nperm, uint64_t *nshared)
+{
+    if (c == NULL) {
+        *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+        *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
+        return;
+    }
+
+    *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
+             (c->perm & DEFAULT_PERM_UNCHANGED);
+    *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
+               (c->shared_perm & DEFAULT_PERM_UNCHANGED);
+}
+
+void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
+                               const BdrvChildRole *role,
+                               uint64_t perm, uint64_t shared,
+                               uint64_t *nperm, uint64_t *nshared)
+{
+    bool backing = (role == &child_backing);
+    assert(role == &child_backing || role == &child_file);
+
+    if (!backing) {
+        /* Apart from the modifications below, the same permissions are
+         * forwarded and left alone as for filters */
+        bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);
+
+        /* Format drivers may touch metadata even if the guest doesn't write */
+        if (!bdrv_is_read_only(bs)) {
+            perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
+        }
+
+        /* bs->file always needs to be consistent because of the metadata. We
+         * can never allow other users to resize or write to it. */
+        perm |= BLK_PERM_CONSISTENT_READ;
+        shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+    } else {
+        /* We want consistent read from backing files if the parent needs it.
+         * No other operations are performed on backing files. */
+        perm &= BLK_PERM_CONSISTENT_READ;
+
+        /* If the parent can deal with changing data, we're okay with a
+         * writable and resizable backing file. */
+        /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */
+        if (shared & BLK_PERM_WRITE) {
+            shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
+        } else {
+            shared = 0;
+        }
+
+        shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
+                  BLK_PERM_WRITE_UNCHANGED;
+    }
+
+    *nperm = perm;
+    *nshared = shared;
+}
+
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs,
+                               bool check_new_perm)
 {
     BlockDriverState *old_bs = child->bs;
+    uint64_t perm, shared_perm;
 
     if (old_bs) {
         if (old_bs->quiesce_counter && child->role->drained_end) {
             child->role->drained_end(child);
         }
+        if (child->role->detach) {
+            child->role->detach(child);
+        }
         QLIST_REMOVE(child, next_parent);
+
+        /* Update permissions for old node. This is guaranteed to succeed
+         * because we're just taking a parent away, so we're loosening
+         * restrictions. */
+        bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
+        bdrv_check_perm(old_bs, perm, shared_perm, &error_abort);
+        bdrv_set_perm(old_bs, perm, shared_perm);
     }
 
     child->bs = new_bs;
@@ -1344,23 +1743,46 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
         if (new_bs->quiesce_counter && child->role->drained_begin) {
             child->role->drained_begin(child);
         }
+
+        bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
+        if (check_new_perm) {
+            bdrv_check_perm(new_bs, perm, shared_perm, &error_abort);
+        }
+        bdrv_set_perm(new_bs, perm, shared_perm);
+
+        if (child->role->attach) {
+            child->role->attach(child);
+        }
     }
 }
 
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildRole *child_role,
-                                  void *opaque)
+                                  uint64_t perm, uint64_t shared_perm,
+                                  void *opaque, Error **errp)
 {
-    BdrvChild *child = g_new(BdrvChild, 1);
+    BdrvChild *child;
+    int ret;
+
+    ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp);
+    if (ret < 0) {
+        bdrv_abort_perm_update(child_bs);
+        return NULL;
+    }
+
+    child = g_new(BdrvChild, 1);
     *child = (BdrvChild) {
-        .bs     = NULL,
-        .name   = g_strdup(child_name),
-        .role   = child_role,
-        .opaque = opaque,
+        .bs             = NULL,
+        .name           = g_strdup(child_name),
+        .role           = child_role,
+        .perm           = perm,
+        .shared_perm    = shared_perm,
+        .opaque         = opaque,
     };
 
-    bdrv_replace_child(child, child_bs);
+    /* This performs the matching bdrv_set_perm() for the above check. */
+    bdrv_replace_child(child, child_bs, false);
 
     return child;
 }
@@ -1368,10 +1790,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BlockDriverState *child_bs,
                              const char *child_name,
-                             const BdrvChildRole *child_role)
+                             const BdrvChildRole *child_role,
+                             Error **errp)
 {
-    BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
-                                              parent_bs);
+    BdrvChild *child;
+    uint64_t perm, shared_perm;
+
+    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
+
+    assert(parent_bs->drv);
+    parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
+                                    perm, shared_perm, &perm, &shared_perm);
+
+    child = bdrv_root_attach_child(child_bs, child_name, child_role,
+                                   perm, shared_perm, parent_bs, errp);
+    if (child == NULL) {
+        return NULL;
+    }
+
     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
     return child;
 }
@@ -1383,7 +1819,7 @@ static void bdrv_detach_child(BdrvChild *child)
         child->next.le_prev = NULL;
     }
 
-    bdrv_replace_child(child, NULL);
+    bdrv_replace_child(child, NULL, false);
 
     g_free(child->name);
     g_free(child);
@@ -1447,57 +1883,28 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
  * Sets the backing file link of a BDS. A new reference is created; callers
  * which don't need their own reference any more must call bdrv_unref().
  */
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                         Error **errp)
 {
     if (backing_hd) {
         bdrv_ref(backing_hd);
     }
 
     if (bs->backing) {
-        assert(bs->backing_blocker);
-        bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
         bdrv_unref_child(bs, bs->backing);
-    } else if (backing_hd) {
-        error_setg(&bs->backing_blocker,
-                   "node is used as backing hd of '%s'",
-                   bdrv_get_device_or_node_name(bs));
     }
 
     if (!backing_hd) {
-        error_free(bs->backing_blocker);
-        bs->backing_blocker = NULL;
         bs->backing = NULL;
         goto out;
     }
-    bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
-    bs->open_flags &= ~BDRV_O_NO_BACKING;
-    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
-    pstrcpy(bs->backing_format, sizeof(bs->backing_format),
-            backing_hd->drv ? backing_hd->drv->format_name : "");
 
-    bdrv_op_block_all(backing_hd, bs->backing_blocker);
-    /* Otherwise we won't be able to commit or stream */
-    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
-                    bs->backing_blocker);
-    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
-                    bs->backing_blocker);
-    /*
-     * We do backup in 3 ways:
-     * 1. drive backup
-     *    The target bs is new opened, and the source is top BDS
-     * 2. blockdev backup
-     *    Both the source and the target are top BDSes.
-     * 3. internal backup(used for block replication)
-     *    Both the source and the target are backing file
-     *
-     * In case 1 and 2, neither the source nor the target is the backing file.
-     * In case 3, we will block the top BDS, so there is only one block job
-     * for the top BDS and its backing chain.
-     */
-    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
-                    bs->backing_blocker);
-    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
-                    bs->backing_blocker);
+    bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing,
+                                    errp);
+    if (!bs->backing) {
+        bdrv_unref(backing_hd);
+    }
+
 out:
     bdrv_refresh_limits(bs, NULL);
 }
@@ -1580,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
 
     /* Hook up the backing file link; drop our reference, bs owns the
      * backing_hd reference now */
-    bdrv_set_backing_hd(bs, backing_hd);
+    bdrv_set_backing_hd(bs, backing_hd, &local_err);
     bdrv_unref(backing_hd);
+    if (local_err) {
+        ret = -EINVAL;
+        goto free_exit;
+    }
 
     qdict_del(parent_options, bdref_key);
 
@@ -1648,6 +2059,7 @@ BdrvChild *bdrv_open_child(const char *filename,
                            const BdrvChildRole *child_role,
                            bool allow_none, Error **errp)
 {
+    BdrvChild *c;
     BlockDriverState *bs;
 
     bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -1656,7 +2068,13 @@ BdrvChild *bdrv_open_child(const char *filename,
         return NULL;
     }
 
-    return bdrv_attach_child(parent, bs, bdref_key, child_role);
+    c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
+    if (!c) {
+        bdrv_unref(bs);
+        return NULL;
+    }
+
+    return c;
 }
 
 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
@@ -1669,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
     int64_t total_size;
     QemuOpts *opts = NULL;
     BlockDriverState *bs_snapshot;
+    Error *local_err = NULL;
     int ret;
 
     /* if snapshot, we create a temporary backing file and open it
@@ -1718,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
      * call bdrv_unref() on it), so in order to be able to return one, we have
      * to increase bs_snapshot's refcount here */
     bdrv_ref(bs_snapshot);
-    bdrv_append(bs_snapshot, bs);
+    bdrv_append(bs_snapshot, bs, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto out;
+    }
 
     g_free(tmp_filename);
     return bs_snapshot;
@@ -1862,9 +2286,12 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
             goto fail;
         }
         if (file_bs != NULL) {
-            file = blk_new();
-            blk_insert_bs(file, file_bs);
+            file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+            blk_insert_bs(file, file_bs, &local_err);
             bdrv_unref(file_bs);
+            if (local_err) {
+                goto fail;
+            }
 
             qdict_put(options, "file",
                       qstring_from_str(bdrv_get_node_name(file_bs)));
@@ -2405,7 +2832,7 @@ static void bdrv_close(BlockDriverState *bs)
         bs->drv->bdrv_close(bs);
         bs->drv = NULL;
 
-        bdrv_set_backing_hd(bs, NULL);
+        bdrv_set_backing_hd(bs, NULL, &error_abort);
 
         if (bs->file != NULL) {
             bdrv_unref_child(bs, bs->file);
@@ -2465,10 +2892,13 @@ static void change_parent_backing_link(BlockDriverState *from,
     BdrvChild *c, *next, *to_c;
 
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+        if (c->role->stay_at_node) {
+            continue;
+        }
         if (c->role == &child_backing) {
-            /* @from is generally not allowed to be a backing file, except for
-             * when @to is the overlay. In that case, @from may not be replaced
-             * by @to as @to's backing node. */
+            /* If @from is a backing file of @to, ignore the child to avoid
+             * creating a loop. We only want to change the pointer of other
+             * parents. */
             QLIST_FOREACH(to_c, &to->children, next) {
                 if (to_c == c) {
                     break;
@@ -2479,9 +2909,10 @@ static void change_parent_backing_link(BlockDriverState *from,
             }
         }
 
-        assert(c->role != &child_backing);
         bdrv_ref(to);
-        bdrv_replace_child(c, to);
+        /* FIXME Are we sure that bdrv_replace_child() can't run into
+         * &error_abort because of permissions? */
+        bdrv_replace_child(c, to, true);
         bdrv_unref(from);
     }
 }
@@ -2502,19 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from,
  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
  * reference of its own, it must call bdrv_ref().
  */
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+                 Error **errp)
 {
-    assert(!bdrv_requests_pending(bs_top));
-    assert(!bdrv_requests_pending(bs_new));
+    Error *local_err = NULL;
 
-    bdrv_ref(bs_top);
+    assert(!atomic_read(&bs_top->in_flight));
+    assert(!atomic_read(&bs_new->in_flight));
+
+    bdrv_set_backing_hd(bs_new, bs_top, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto out;
+    }
 
     change_parent_backing_link(bs_top, bs_new);
-    bdrv_set_backing_hd(bs_new, bs_top);
-    bdrv_unref(bs_top);
 
     /* bs_new is now referenced by its new parents, we don't need the
      * additional reference any more. */
+out:
     bdrv_unref(bs_new);
 }
 
@@ -2658,6 +3095,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
                            BlockDriverState *base, const char *backing_file_str)
 {
     BlockDriverState *new_top_bs = NULL;
+    Error *local_err = NULL;
     int ret = -EIO;
 
     if (!top->drv || !base->drv) {
@@ -2690,7 +3128,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
     if (ret) {
         goto exit;
     }
-    bdrv_set_backing_hd(new_top_bs, base);
+
+    bdrv_set_backing_hd(new_top_bs, base, &local_err);
+    if (local_err) {
+        ret = -EPERM;
+        error_report_err(local_err);
+        goto exit;
+    }
 
     ret = 0;
 exit:
@@ -2705,6 +3149,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
     BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
     int ret;
+
+    assert(child->perm & BLK_PERM_RESIZE);
+
     if (!drv)
         return -ENOMEDIUM;
     if (!drv->bdrv_truncate)
diff --git a/block/backup.c b/block/backup.c
index fe010e78e3..d1ab617c7e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         goto error;
     }
 
-    job = block_job_create(job_id, &backup_job_driver, bs, speed,
-                           creation_flags, cb, opaque, errp);
+    /* job->common.len is fixed, so we can't allow resize */
+    job = block_job_create(job_id, &backup_job_driver, bs,
+                           BLK_PERM_CONSISTENT_READ,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                           BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
+                           speed, creation_flags, cb, opaque, errp);
     if (!job) {
         goto error;
     }
 
-    job->target = blk_new();
-    blk_insert_bs(job->target, target);
+    /* The target must match the source in size, so no resize here either */
+    job->target = blk_new(BLK_PERM_WRITE,
+                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                          BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
+    ret = blk_insert_bs(job->target, target, errp);
+    if (ret < 0) {
+        goto error;
+    }
 
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
@@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
     }
 
-    block_job_add_bdrv(&job->common, target);
+    /* Required permissions are already taken with target's blk_new() */
+    block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
+                       &error_abort);
     job->common.len = len;
     block_job_txn_add_job(txn, &job->common);
 
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 6117ce5fca..67e8024e36 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_file_open         = blkdebug_open,
     .bdrv_close             = blkdebug_close,
     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
+    .bdrv_child_perm        = bdrv_filter_default_perms,
+
     .bdrv_getlength         = blkdebug_getlength,
     .bdrv_truncate          = blkdebug_truncate,
     .bdrv_refresh_filename  = blkdebug_refresh_filename,
diff --git a/block/blkreplay.c b/block/blkreplay.c
index cfc8c5be02..e1102119fb 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = {
 
     .bdrv_file_open         = blkreplay_open,
     .bdrv_close             = blkreplay_close,
+    .bdrv_child_perm        = bdrv_filter_default_perms,
     .bdrv_getlength         = blkreplay_getlength,
 
     .bdrv_co_preadv         = blkreplay_co_preadv,
diff --git a/block/blkverify.c b/block/blkverify.c
index 43a940c2f5..9a1e21c6ad 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_parse_filename              = blkverify_parse_filename,
     .bdrv_file_open                   = blkverify_open,
     .bdrv_close                       = blkverify_close,
+    .bdrv_child_perm                  = bdrv_filter_default_perms,
     .bdrv_getlength                   = blkverify_getlength,
     .bdrv_refresh_filename            = blkverify_refresh_filename,
 
diff --git a/block/block-backend.c b/block/block-backend.c
index 492e71e41f..daa7908d01 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -59,6 +59,9 @@ struct BlockBackend {
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
 
+    uint64_t perm;
+    uint64_t shared_perm;
+
     bool allow_write_beyond_eof;
 
     NotifierList remove_bs_notifiers, insert_bs_notifiers;
@@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
 
 static void drive_info_del(DriveInfo *dinfo);
 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+static char *blk_get_attached_dev_id(BlockBackend *blk);
 
 /* All BlockBackends */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child);
 static void blk_root_change_media(BdrvChild *child, bool load);
 static void blk_root_resize(BdrvChild *child);
 
+static char *blk_root_get_parent_desc(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+    char *dev_id;
+
+    if (blk->name) {
+        return g_strdup(blk->name);
+    }
+
+    dev_id = blk_get_attached_dev_id(blk);
+    if (*dev_id) {
+        return dev_id;
+    } else {
+        /* TODO Callback into the BB owner for something more detailed */
+        g_free(dev_id);
+        return g_strdup("a block device");
+    }
+}
+
 static const char *blk_root_get_name(BdrvChild *child)
 {
     return blk_name(child->opaque);
@@ -110,6 +133,7 @@ static const BdrvChildRole child_root = {
     .change_media       = blk_root_change_media,
     .resize             = blk_root_resize,
     .get_name           = blk_root_get_name,
+    .get_parent_desc    = blk_root_get_parent_desc,
 
     .drained_begin      = blk_root_drained_begin,
     .drained_end        = blk_root_drained_end,
@@ -117,15 +141,23 @@ static const BdrvChildRole child_root = {
 
 /*
  * Create a new BlockBackend with a reference count of one.
- * Store an error through @errp on failure, unless it's null.
+ *
+ * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
+ * to request for a block driver node that is attached to this BlockBackend.
+ * @shared_perm is a bitmask which describes which permissions may be granted
+ * to other users of the attached node.
+ * Both sets of permissions can be changed later using blk_set_perm().
+ *
  * Return the new BlockBackend on success, null on failure.
  */
-BlockBackend *blk_new(void)
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
 {
     BlockBackend *blk;
 
     blk = g_new0(BlockBackend, 1);
     blk->refcnt = 1;
+    blk->perm = perm;
+    blk->shared_perm = shared_perm;
     blk_set_enable_write_cache(blk, true);
 
     qemu_co_queue_init(&blk->public.throttled_reqs[0]);
@@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
     BlockBackend *blk;
     BlockDriverState *bs;
+    uint64_t perm;
+
+    /* blk_new_open() is mainly used in .bdrv_create implementations and the
+     * tools where sharing isn't a concern because the BDS stays private, so we
+     * just request permission according to the flags.
+     *
+     * The exceptions are xen_disk and blockdev_init(); in these cases, the
+     * caller of blk_new_open() doesn't make use of the permissions, but they
+     * shouldn't hurt either. We can still share everything here because the
+     * guest devices will add their own blockers if they can't share. */
+    perm = BLK_PERM_CONSISTENT_READ;
+    if (flags & BDRV_O_RDWR) {
+        perm |= BLK_PERM_WRITE;
+    }
+    if (flags & BDRV_O_RESIZE) {
+        perm |= BLK_PERM_RESIZE;
+    }
 
-    blk = blk_new();
+    blk = blk_new(perm, BLK_PERM_ALL);
     bs = bdrv_open(filename, reference, options, flags, errp);
     if (!bs) {
         blk_unref(blk);
         return NULL;
     }
 
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+                                       perm, BLK_PERM_ALL, blk, &error_abort);
 
     return blk;
 }
@@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk)
 /*
  * Associates a new BlockDriverState with @blk.
  */
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+                                       blk->perm, blk->shared_perm, blk, errp);
+    if (blk->root == NULL) {
+        return -EPERM;
+    }
     bdrv_ref(bs);
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 
     notifier_list_notify(&blk->insert_bs_notifiers, blk);
     if (blk->public.throttle_state) {
         throttle_timers_attach_aio_context(
             &blk->public.throttle_timers, bdrv_get_aio_context(bs));
     }
+
+    return 0;
+}
+
+/*
+ * Sets the permission bitmasks that the user of the BlockBackend needs.
+ */
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                 Error **errp)
+{
+    int ret;
+
+    if (blk->root) {
+        ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    blk->perm = perm;
+    blk->shared_perm = shared_perm;
+
+    return 0;
+}
+
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
+{
+    *perm = blk->perm;
+    *shared_perm = blk->shared_perm;
 }
 
 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
     blk->dev_ops = NULL;
     blk->dev_opaque = NULL;
     blk->guest_block_size = 512;
+    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
     blk_unref(blk);
 }
 
@@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
 
 /*
  * Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
+ *
+ * If @load is true, notify of media load. This action can fail, meaning that
+ * the medium cannot be loaded. @errp is set then.
+ *
+ * If @load is false, notify of media eject. This can never fail.
+ *
  * Also send DEVICE_TRAY_MOVED events as appropriate.
  */
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
 {
     if (blk->dev_ops && blk->dev_ops->change_media_cb) {
         bool tray_was_open, tray_is_open;
+        Error *local_err = NULL;
 
         assert(!blk->legacy_dev);
 
         tray_was_open = blk_dev_is_tray_open(blk);
-        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+        blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
+        if (local_err) {
+            assert(load == true);
+            error_propagate(errp, local_err);
+            return;
+        }
         tray_is_open = blk_dev_is_tray_open(blk);
 
         if (tray_was_open != tray_is_open) {
@@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
 
 static void blk_root_change_media(BdrvChild *child, bool load)
 {
-    blk_dev_change_media_cb(child->opaque, load);
+    blk_dev_change_media_cb(child->opaque, load, NULL);
 }
 
 /*
diff --git a/block/bochs.c b/block/bochs.c
index 7dd2ac4f51..516da56c3b 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = {
     .instance_size	= sizeof(BDRVBochsState),
     .bdrv_probe		= bochs_probe,
     .bdrv_open		= bochs_open,
+    .bdrv_child_perm     = bdrv_format_default_perms,
     .bdrv_refresh_limits = bochs_refresh_limits,
     .bdrv_co_preadv = bochs_co_preadv,
     .bdrv_close		= bochs_close,
diff --git a/block/cloop.c b/block/cloop.c
index 877c9b0d1b..a6c7b9dbe6 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = {
     .instance_size  = sizeof(BDRVCloopState),
     .bdrv_probe     = cloop_probe,
     .bdrv_open      = cloop_open,
+    .bdrv_child_perm     = bdrv_format_default_perms,
     .bdrv_refresh_limits = cloop_refresh_limits,
     .bdrv_co_preadv = cloop_co_preadv,
     .bdrv_close     = cloop_close,
diff --git a/block/commit.c b/block/commit.c
index c284e8535d..22a0a4db98 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockDriverState *active;
+    BlockDriverState *commit_top_bs;
     BlockBackend *top;
     BlockBackend *base;
     BlockdevOnError on_error;
@@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque)
     BlockDriverState *active = s->active;
     BlockDriverState *top = blk_bs(s->top);
     BlockDriverState *base = blk_bs(s->base);
-    BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
+    BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
     int ret = data->ret;
+    bool remove_commit_top_bs = false;
+
+    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
+     * the normal backing chain can be restored. */
+    blk_unref(s->base);
 
     if (!block_job_is_cancelled(&s->common) && ret == 0) {
         /* success */
-        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
+        ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
+                                     s->backing_file_str);
+    } else if (overlay_bs) {
+        /* XXX Can (or should) we somehow keep 'consistent read' blocked even
+         * after the failed/cancelled commit job is gone? If we already wrote
+         * something to base, the intermediate images aren't valid any more. */
+        remove_commit_top_bs = true;
     }
 
     /* restore base open flags here if appropriate (e.g., change the base back
@@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque)
     }
     g_free(s->backing_file_str);
     blk_unref(s->top);
-    blk_unref(s->base);
     block_job_completed(&s->common, ret);
     g_free(data);
+
+    /* If bdrv_drop_intermediate() didn't already do that, remove the commit
+     * filter driver from the backing chain. Do this as the final step so that
+     * the 'consistent read' permission can be granted.  */
+    if (remove_commit_top_bs) {
+        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+    }
 }
 
 static void coroutine_fn commit_run(void *opaque)
@@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = {
     .start         = commit_run,
 };
 
+static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static void bdrv_commit_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                       const BdrvChildRole *role,
+                                       uint64_t perm, uint64_t shared,
+                                       uint64_t *nperm, uint64_t *nshared)
+{
+    *nperm = 0;
+    *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_commit_top = {
+    .format_name        = "commit_top",
+    .bdrv_co_preadv     = bdrv_commit_top_preadv,
+    .bdrv_close         = bdrv_commit_top_close,
+    .bdrv_child_perm    = bdrv_commit_top_child_perm,
+};
+
 void commit_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, BlockDriverState *top, int64_t speed,
                   BlockdevOnError on_error, const char *backing_file_str,
-                  Error **errp)
+                  const char *filter_node_name, Error **errp)
 {
     CommitBlockJob *s;
     BlockReopenQueue *reopen_queue = NULL;
@@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     int orig_base_flags;
     BlockDriverState *iter;
     BlockDriverState *overlay_bs;
+    BlockDriverState *commit_top_bs = NULL;
     Error *local_err = NULL;
+    int ret;
 
     assert(top != bs);
     if (top == base) {
@@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         return;
     }
 
-    s = block_job_create(job_id, &commit_job_driver, bs, speed,
-                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
+                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
     if (!s) {
         return;
     }
@@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
         if (local_err != NULL) {
             error_propagate(errp, local_err);
-            block_job_unref(&s->common);
-            return;
+            goto fail;
         }
     }
 
+    /* Insert commit_top block node above top, so we can block consistent read
+     * on the backing chain below it */
+    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
+                                         errp);
+    if (commit_top_bs == NULL) {
+        goto fail;
+    }
+
+    bdrv_set_backing_hd(commit_top_bs, top, &error_abort);
+    bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort);
+
+    s->commit_top_bs = commit_top_bs;
+    bdrv_unref(commit_top_bs);
 
     /* Block all nodes between top and base, because they will
      * disappear from the chain after this operation. */
     assert(bdrv_chain_contains(top, base));
-    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
-        block_job_add_bdrv(&s->common, iter);
+    for (iter = top; iter != base; iter = backing_bs(iter)) {
+        /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
+         * at s->base (if writes are blocked for a node, they are also blocked
+         * for its backing file). The other options would be a second filter
+         * driver above s->base. */
+        ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                                 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+                                 errp);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
+    ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
+    if (ret < 0) {
+        goto fail;
     }
+
     /* overlay_bs must be blocked because it needs to be modified to
-     * update the backing image string, but if it's the root node then
-     * don't block it again */
-    if (bs != overlay_bs) {
-        block_job_add_bdrv(&s->common, overlay_bs);
+     * update the backing image string. */
+    ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
+                             BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
+    if (ret < 0) {
+        goto fail;
     }
 
-    s->base = blk_new();
-    blk_insert_bs(s->base, base);
+    s->base = blk_new(BLK_PERM_CONSISTENT_READ
+                      | BLK_PERM_WRITE
+                      | BLK_PERM_RESIZE,
+                      BLK_PERM_CONSISTENT_READ
+                      | BLK_PERM_GRAPH_MOD
+                      | BLK_PERM_WRITE_UNCHANGED);
+    ret = blk_insert_bs(s->base, base, errp);
+    if (ret < 0) {
+        goto fail;
+    }
 
-    s->top = blk_new();
-    blk_insert_bs(s->top, top);
+    /* Required permissions are already taken with block_job_add_bdrv() */
+    s->top = blk_new(0, BLK_PERM_ALL);
+    blk_insert_bs(s->top, top, errp);
+    if (ret < 0) {
+        goto fail;
+    }
 
     s->active = bs;
 
@@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 
     trace_commit_start(bs, base, top, s);
     block_job_start(&s->common);
+    return;
+
+fail:
+    if (s->base) {
+        blk_unref(s->base);
+    }
+    if (s->top) {
+        blk_unref(s->top);
+    }
+    if (commit_top_bs) {
+        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+    }
+    block_job_unref(&s->common);
 }
 
 
@@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 int bdrv_commit(BlockDriverState *bs)
 {
     BlockBackend *src, *backing;
+    BlockDriverState *backing_file_bs = NULL;
+    BlockDriverState *commit_top_bs = NULL;
     BlockDriver *drv = bs->drv;
     int64_t sector, total_sectors, length, backing_length;
     int n, ro, open_flags;
     int ret = 0;
     uint8_t *buf = NULL;
+    Error *local_err = NULL;
 
     if (!drv)
         return -ENOMEDIUM;
@@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs)
         }
     }
 
-    src = blk_new();
-    blk_insert_bs(src, bs);
+    src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+    backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
 
-    backing = blk_new();
-    blk_insert_bs(backing, bs->backing->bs);
+    ret = blk_insert_bs(src, bs, &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }
+
+    /* Insert commit_top block node above backing, so we can write to it */
+    backing_file_bs = backing_bs(bs);
+
+    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
+                                         &local_err);
+    if (commit_top_bs == NULL) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }
+
+    bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
+    bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
+
+    ret = blk_insert_bs(backing, backing_file_bs, &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }
 
     length = blk_getlength(src);
     if (length < 0) {
@@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs)
 ro_cleanup:
     qemu_vfree(buf);
 
-    blk_unref(src);
     blk_unref(backing);
+    if (backing_file_bs) {
+        bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
+    }
+    bdrv_unref(commit_top_bs);
+    blk_unref(src);
 
     if (ro) {
         /* ignoring error return here */
diff --git a/block/crypto.c b/block/crypto.c
index 7cb2ff2946..4a2038888d 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = {
     .bdrv_probe         = block_crypto_probe_luks,
     .bdrv_open          = block_crypto_open_luks,
     .bdrv_close         = block_crypto_close,
+    .bdrv_child_perm    = bdrv_format_default_perms,
     .bdrv_create        = block_crypto_create_luks,
     .bdrv_truncate      = block_crypto_truncate,
     .create_opts        = &block_crypto_create_opts_luks,
diff --git a/block/curl.c b/block/curl.c
index 2939cc77e9..e83dcd8f50 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -135,6 +135,7 @@ typedef struct BDRVCURLState {
     char *cookie;
     bool accept_range;
     AioContext *aio_context;
+    QemuMutex mutex;
     char *username;
     char *password;
     char *proxyusername;
@@ -333,6 +334,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
     return FIND_RET_NONE;
 }
 
+/* Called with s->mutex held.  */
 static void curl_multi_check_completion(BDRVCURLState *s)
 {
     int msgs_in_queue;
@@ -374,7 +376,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                         continue;
                     }
 
+                    qemu_mutex_unlock(&s->mutex);
                     acb->common.cb(acb->common.opaque, -EPROTO);
+                    qemu_mutex_lock(&s->mutex);
                     qemu_aio_unref(acb);
                     state->acb[i] = NULL;
                 }
@@ -386,6 +390,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
     }
 }
 
+/* Called with s->mutex held.  */
 static void curl_multi_do_locked(CURLState *s)
 {
     CURLSocket *socket, *next_socket;
@@ -409,19 +414,19 @@ static void curl_multi_do(void *arg)
 {
     CURLState *s = (CURLState *)arg;
 
-    aio_context_acquire(s->s->aio_context);
+    qemu_mutex_lock(&s->s->mutex);
     curl_multi_do_locked(s);
-    aio_context_release(s->s->aio_context);
+    qemu_mutex_unlock(&s->s->mutex);
 }
 
 static void curl_multi_read(void *arg)
 {
     CURLState *s = (CURLState *)arg;
 
-    aio_context_acquire(s->s->aio_context);
+    qemu_mutex_lock(&s->s->mutex);
     curl_multi_do_locked(s);
     curl_multi_check_completion(s->s);
-    aio_context_release(s->s->aio_context);
+    qemu_mutex_unlock(&s->s->mutex);
 }
 
 static void curl_multi_timeout_do(void *arg)
@@ -434,11 +439,11 @@ static void curl_multi_timeout_do(void *arg)
         return;
     }
 
-    aio_context_acquire(s->aio_context);
+    qemu_mutex_lock(&s->mutex);
     curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 
     curl_multi_check_completion(s);
-    aio_context_release(s->aio_context);
+    qemu_mutex_unlock(&s->mutex);
 #else
     abort();
 #endif
@@ -771,6 +776,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     curl_easy_cleanup(state->curl);
     state->curl = NULL;
 
+    qemu_mutex_init(&s->mutex);
     curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
     qemu_opts_del(opts);
@@ -801,12 +807,11 @@ static void curl_readv_bh_cb(void *p)
     CURLAIOCB *acb = p;
     BlockDriverState *bs = acb->common.bs;
     BDRVCURLState *s = bs->opaque;
-    AioContext *ctx = bdrv_get_aio_context(bs);
 
     size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
     size_t end;
 
-    aio_context_acquire(ctx);
+    qemu_mutex_lock(&s->mutex);
 
     // In case we have the requested data already (e.g. read-ahead),
     // we can just call the callback and be done.
@@ -854,7 +859,7 @@ static void curl_readv_bh_cb(void *p)
     curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 
 out:
-    aio_context_release(ctx);
+    qemu_mutex_unlock(&s->mutex);
     if (ret != -EINPROGRESS) {
         acb->common.cb(acb->common.opaque, ret);
         qemu_aio_unref(acb);
@@ -883,6 +888,7 @@ static void curl_close(BlockDriverState *bs)
 
     DPRINTF("CURL: Close\n");
     curl_detach_aio_context(bs);
+    qemu_mutex_destroy(&s->mutex);
 
     g_free(s->cookie);
     g_free(s->url);
diff --git a/block/dmg.c b/block/dmg.c
index 8e387cdfe5..a7d25fc47b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = {
     .bdrv_probe     = dmg_probe,
     .bdrv_open      = dmg_open,
     .bdrv_refresh_limits = dmg_refresh_limits,
+    .bdrv_child_perm     = bdrv_format_default_perms,
     .bdrv_co_preadv = dmg_co_preadv,
     .bdrv_close     = dmg_close,
 };
diff --git a/block/io.c b/block/io.c
index d5c45447fd..8f38d46de0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
     return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
 }
 
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
         int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
 {
+    BlockDriverState *bs = child->bs;
+
     /* Perform I/O through a temporary buffer so that users who scribble over
      * their read buffer while the operation is in progress do not end up
      * modifying the image file.  This is critical for zero-copy guest I/O
@@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
     size_t skip_bytes;
     int ret;
 
+    assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+
     /* Cover entire cluster so no additional backing file I/O is required when
      * allocating cluster in the image file.
      */
@@ -1001,10 +1005,11 @@ err:
  * handles copy on read, zeroing after EOF, and fragmentation of large
  * reads; any other features must be implemented by the caller.
  */
-static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
     int64_t align, QEMUIOVector *qiov, int flags)
 {
+    BlockDriverState *bs = child->bs;
     int64_t total_bytes, max_bytes;
     int ret = 0;
     uint64_t bytes_remaining = bytes;
@@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
         }
 
         if (!ret || pnum != nb_sectors) {
-            ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
+            ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
             goto out;
         }
     }
@@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
     }
 
     tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
-    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+    ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
                               use_local_qiov ? &local_qiov : qiov,
                               flags);
     tracked_request_end(&req);
@@ -1306,10 +1311,11 @@ fail:
  * Forwards an already correctly aligned write request to the BlockDriver,
  * after possibly fragmenting it.
  */
-static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
     int64_t align, QEMUIOVector *qiov, int flags)
 {
+    BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
     bool waited;
     int ret;
@@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     assert(!waited || !req->serialising);
     assert(req->overlap_offset <= offset);
     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
+    assert(child->perm & BLK_PERM_WRITE);
+    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
@@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
                                                 int64_t offset,
                                                 unsigned int bytes,
                                                 BdrvRequestFlags flags,
                                                 BdrvTrackedRequest *req)
 {
+    BlockDriverState *bs = child->bs;
     uint8_t *buf = NULL;
     QEMUIOVector local_qiov;
     struct iovec iov;
@@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         mark_request_serialising(req, align);
         wait_serialising_requests(req);
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
                                   align, &local_qiov, 0);
         if (ret < 0) {
             goto fail;
@@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
 
         memset(buf + head_padding_bytes, 0, zero_bytes);
-        ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
                                    align, &local_qiov,
                                    flags & ~BDRV_REQ_ZERO_WRITE);
         if (ret < 0) {
@@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
     if (bytes >= align) {
         /* Write the aligned part in the middle. */
         uint64_t aligned_bytes = bytes & ~(align - 1);
-        ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
+        ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
                                    NULL, flags);
         if (ret < 0) {
             goto fail;
@@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         mark_request_serialising(req, align);
         wait_serialising_requests(req);
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, req, offset, align,
+        ret = bdrv_aligned_preadv(child, req, offset, align,
                                   align, &local_qiov, 0);
         if (ret < 0) {
             goto fail;
@@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
 
         memset(buf, 0, bytes);
-        ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
+        ret = bdrv_aligned_pwritev(child, req, offset, align, align,
                                    &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
     }
 fail:
@@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
     tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
 
     if (!qiov) {
-        ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
+        ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
         goto out;
     }
 
@@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
         qemu_iovec_init_external(&head_qiov, &head_iov, 1);
 
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
                                   align, &head_qiov, 0);
         if (ret < 0) {
             goto fail;
@@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
         qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
 
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
-                                  align, &tail_qiov, 0);
+        ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
+                                  align, align, &tail_qiov, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
         bytes = ROUND_UP(bytes, align);
     }
 
-    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
+    ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
                                use_local_qiov ? &local_qiov : qiov,
                                flags);
 
diff --git a/block/iscsi.c b/block/iscsi.c
index c4f813bfd2..76319a1a6e 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -58,6 +58,7 @@ typedef struct IscsiLun {
     int events;
     QEMUTimer *nop_timer;
     QEMUTimer *event_timer;
+    QemuMutex mutex;
     struct scsi_inquiry_logical_block_provisioning lbp;
     struct scsi_inquiry_block_limits bl;
     unsigned char *zeroblock;
@@ -252,6 +253,7 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
     return ret;
 }
 
+/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                         void *command_data, void *opaque)
@@ -352,6 +354,7 @@ static const AIOCBInfo iscsi_aiocb_info = {
 static void iscsi_process_read(void *arg);
 static void iscsi_process_write(void *arg);
 
+/* Called with QemuMutex held.  */
 static void
 iscsi_set_events(IscsiLun *iscsilun)
 {
@@ -395,10 +398,10 @@ iscsi_process_read(void *arg)
     IscsiLun *iscsilun = arg;
     struct iscsi_context *iscsi = iscsilun->iscsi;
 
-    aio_context_acquire(iscsilun->aio_context);
+    qemu_mutex_lock(&iscsilun->mutex);
     iscsi_service(iscsi, POLLIN);
     iscsi_set_events(iscsilun);
-    aio_context_release(iscsilun->aio_context);
+    qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static void
@@ -407,10 +410,10 @@ iscsi_process_write(void *arg)
     IscsiLun *iscsilun = arg;
     struct iscsi_context *iscsi = iscsilun->iscsi;
 
-    aio_context_acquire(iscsilun->aio_context);
+    qemu_mutex_lock(&iscsilun->mutex);
     iscsi_service(iscsi, POLLOUT);
     iscsi_set_events(iscsilun);
-    aio_context_release(iscsilun->aio_context);
+    qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -589,6 +592,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
     uint64_t lba;
     uint32_t num_sectors;
     bool fua = flags & BDRV_REQ_FUA;
+    int r = 0;
 
     if (fua) {
         assert(iscsilun->dpofua);
@@ -604,6 +608,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
     lba = sector_qemu2lun(sector_num, iscsilun);
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -640,7 +645,9 @@ retry:
 #endif
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.task != NULL) {
@@ -655,12 +662,15 @@ retry:
 
     if (iTask.status != SCSI_STATUS_GOOD) {
         iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
     }
 
     iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
 
-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }
 
 
@@ -693,18 +703,21 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
         goto out;
     }
 
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
                                   sector_qemu2lun(sector_num, iscsilun),
                                   8 + 16, iscsi_co_generic_cb,
                                   &iTask) == NULL) {
         ret = -ENOMEM;
-        goto out;
+        goto out_unlock;
     }
 
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.do_retry) {
@@ -721,20 +734,20 @@ retry:
          * because the device is busy or the cmd is not
          * supported) we pretend all blocks are allocated
          * for backwards compatibility */
-        goto out;
+        goto out_unlock;
     }
 
     lbas = scsi_datain_unmarshall(iTask.task);
     if (lbas == NULL) {
         ret = -EIO;
-        goto out;
+        goto out_unlock;
     }
 
     lbasd = &lbas->descriptors[0];
 
     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
         ret = -EIO;
-        goto out;
+        goto out_unlock;
     }
 
     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
@@ -756,6 +769,8 @@ retry:
     if (*pnum > nb_sectors) {
         *pnum = nb_sectors;
     }
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
 out:
     if (iTask.task != NULL) {
         scsi_free_scsi_task(iTask.task);
@@ -818,6 +833,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -855,7 +871,9 @@ retry:
 #endif
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.task != NULL) {
@@ -867,6 +885,7 @@ retry:
         iTask.complete = 0;
         goto retry;
     }
+    qemu_mutex_unlock(&iscsilun->mutex);
 
     if (iTask.status != SCSI_STATUS_GOOD) {
         return iTask.err_code;
@@ -881,6 +900,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
     struct IscsiTask iTask;
 
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
@@ -889,7 +909,9 @@ retry:
 
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.task != NULL) {
@@ -901,6 +923,7 @@ retry:
         iTask.complete = 0;
         goto retry;
     }
+    qemu_mutex_unlock(&iscsilun->mutex);
 
     if (iTask.status != SCSI_STATUS_GOOD) {
         return iTask.err_code;
@@ -910,6 +933,7 @@ retry:
 }
 
 #ifdef __linux__
+/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
                      void *command_data, void *opaque)
@@ -1034,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
     acb->task->expxferlen = acb->ioh->dxfer_len;
 
     data.size = 0;
+    qemu_mutex_lock(&iscsilun->mutex);
     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
         if (acb->ioh->iovec_count == 0) {
             data.data = acb->ioh->dxferp;
@@ -1049,6 +1074,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
                                  iscsi_aio_ioctl_cb,
                                  (data.size > 0) ? &data : NULL,
                                  acb) != 0) {
+        qemu_mutex_unlock(&iscsilun->mutex);
         scsi_free_scsi_task(acb->task);
         qemu_aio_unref(acb);
         return NULL;
@@ -1068,6 +1094,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
     }
 
     iscsi_set_events(iscsilun);
+    qemu_mutex_unlock(&iscsilun->mutex);
 
     return &acb->common;
 }
@@ -1092,6 +1119,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
     IscsiLun *iscsilun = bs->opaque;
     struct IscsiTask iTask;
     struct unmap_list list;
+    int r = 0;
 
     if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
         return -ENOTSUP;
@@ -1106,15 +1134,19 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
     list.num = count / iscsilun->block_size;
 
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
                          iscsi_co_generic_cb, &iTask) == NULL) {
-        return -ENOMEM;
+        r = -ENOMEM;
+        goto out_unlock;
     }
 
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.task != NULL) {
@@ -1131,17 +1163,20 @@ retry:
         /* the target might fail with a check condition if it
            is not happy with the alignment of the UNMAP request
            we silently fail in this case */
-        return 0;
+        goto out_unlock;
     }
 
     if (iTask.status != SCSI_STATUS_GOOD) {
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
     }
 
     iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
                                count >> BDRV_SECTOR_BITS);
 
-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }
 
 static int
@@ -1153,6 +1188,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
     uint64_t lba;
     uint32_t nb_blocks;
     bool use_16_for_ws = iscsilun->use_16_for_rw;
+    int r = 0;
 
     if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
         return -ENOTSUP;
@@ -1186,6 +1222,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
         }
     }
 
+    qemu_mutex_lock(&iscsilun->mutex);
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
     if (use_16_for_ws) {
@@ -1205,7 +1242,9 @@ retry:
 
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1215,7 +1254,8 @@ retry:
         /* WRITE SAME is not supported by the target */
         iscsilun->has_write_same = false;
         scsi_free_scsi_task(iTask.task);
-        return -ENOTSUP;
+        r = -ENOTSUP;
+        goto out_unlock;
     }
 
     if (iTask.task != NULL) {
@@ -1231,7 +1271,8 @@ retry:
     if (iTask.status != SCSI_STATUS_GOOD) {
         iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
                                    count >> BDRV_SECTOR_BITS);
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
     }
 
     if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1242,7 +1283,9 @@ retry:
                                      count >> BDRV_SECTOR_BITS);
     }
 
-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }
 
 static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
@@ -1331,7 +1374,7 @@ static void iscsi_nop_timed_event(void *opaque)
 {
     IscsiLun *iscsilun = opaque;
 
-    aio_context_acquire(iscsilun->aio_context);
+    qemu_mutex_lock(&iscsilun->mutex);
     if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
         error_report("iSCSI: NOP timeout. Reconnecting...");
         iscsilun->request_timed_out = true;
@@ -1344,7 +1387,7 @@ static void iscsi_nop_timed_event(void *opaque)
     iscsi_set_events(iscsilun);
 
 out:
-    aio_context_release(iscsilun->aio_context);
+    qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1890,6 +1933,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     scsi_free_scsi_task(task);
     task = NULL;
 
+    qemu_mutex_init(&iscsilun->mutex);
     iscsi_attach_aio_context(bs, iscsilun->aio_context);
 
     /* Guess the internal cluster (page) size of the iscsi target by the means
@@ -1935,6 +1979,7 @@ static void iscsi_close(BlockDriverState *bs)
     iscsi_destroy_context(iscsi);
     g_free(iscsilun->zeroblock);
     iscsi_allocmap_free(iscsilun);
+    qemu_mutex_destroy(&iscsilun->mutex);
     memset(iscsilun, 0, sizeof(IscsiLun));
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index 3d50857300..57f26c33a4 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -38,7 +38,10 @@ typedef struct MirrorBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockBackend *target;
+    BlockDriverState *mirror_top_bs;
+    BlockDriverState *source;
     BlockDriverState *base;
+
     /* The name of the graph node to replace */
     char *replaces;
     /* The BDS to replace */
@@ -327,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
 
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
-    BlockDriverState *source = blk_bs(s->common.blk);
+    BlockDriverState *source = s->source;
     int64_t sector_num, first_chunk;
     uint64_t delay_ns = 0;
     /* At least the first dirty chunk is mirrored in one iteration. */
@@ -386,7 +389,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
                             nb_chunks * sectors_per_chunk);
     bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
     while (nb_chunks > 0 && sector_num < end) {
-        int ret;
+        int64_t ret;
         int io_sectors, io_sectors_acct;
         BlockDriverState *file;
         enum MirrorMethod {
@@ -497,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque)
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
     MirrorExitData *data = opaque;
     AioContext *replace_aio_context = NULL;
-    BlockDriverState *src = blk_bs(s->common.blk);
+    BlockDriverState *src = s->source;
     BlockDriverState *target_bs = blk_bs(s->target);
+    BlockDriverState *mirror_top_bs = s->mirror_top_bs;
+    Error *local_err = NULL;
 
     /* Make sure that the source BDS doesn't go away before we called
      * block_job_completed(). */
     bdrv_ref(src);
+    bdrv_ref(mirror_top_bs);
+
+    /* We don't access the source any more. Dropping any WRITE/RESIZE is
+     * required before it could become a backing file of target_bs. */
+    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+                            &error_abort);
+    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+        BlockDriverState *backing = s->is_none_mode ? src : s->base;
+        if (backing_bs(target_bs) != backing) {
+            bdrv_set_backing_hd(target_bs, backing, &local_err);
+            if (local_err) {
+                error_report_err(local_err);
+                data->ret = -EPERM;
+            }
+        }
+    }
 
     if (s->to_replace) {
         replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -524,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque)
         bdrv_drained_begin(target_bs);
         bdrv_replace_in_backing_chain(to_replace, target_bs);
         bdrv_drained_end(target_bs);
-
-        /* We just changed the BDS the job BB refers to */
-        blk_remove_bs(job->blk);
-        blk_insert_bs(job->blk, src);
     }
     if (s->to_replace) {
         bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -540,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque)
     g_free(s->replaces);
     blk_unref(s->target);
     s->target = NULL;
+
+    /* Remove the mirror filter driver from the graph. Before this, get rid of
+     * the blockers on the intermediate nodes so that the resulting state is
+     * valid. */
+    block_job_remove_all_bdrv(job);
+    bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
+
+    /* We just changed the BDS the job BB refers to (with either or both of the
+     * bdrv_replace_in_backing_chain() calls), so switch the BB back so the
+     * cleanup does the right thing. We don't need any permissions any more
+     * now. */
+    blk_remove_bs(job->blk);
+    blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
+    blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
+
     block_job_completed(&s->common, data->ret);
+
     g_free(data);
     bdrv_drained_end(src);
+    bdrv_unref(mirror_top_bs);
     bdrv_unref(src);
 }
 
@@ -562,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
     int64_t sector_num, end;
     BlockDriverState *base = s->base;
-    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *bs = s->source;
     BlockDriverState *target_bs = blk_bs(s->target);
     int ret, n;
 
@@ -644,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     MirrorExitData *data;
-    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *bs = s->source;
     BlockDriverState *target_bs = blk_bs(s->target);
     bool need_drain = true;
     int64_t length;
@@ -876,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
 static void mirror_complete(BlockJob *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    BlockDriverState *src, *target;
+    BlockDriverState *target;
 
-    src = blk_bs(job->blk);
     target = blk_bs(s->target);
 
     if (!s->synced) {
@@ -910,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp)
         replace_aio_context = bdrv_get_aio_context(s->to_replace);
         aio_context_acquire(replace_aio_context);
 
+        /* TODO Translate this into permission system. Current definition of
+         * GRAPH_MOD would require to request it for the parents; they might
+         * not even be BlockDriverStates, however, so a BdrvChild can't address
+         * them. May need redefinition of GRAPH_MOD. */
         error_setg(&s->replace_blocker,
                    "block device is in use by block-job-complete");
         bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -918,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
         aio_context_release(replace_aio_context);
     }
 
-    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
-        BlockDriverState *backing = s->is_none_mode ? src : s->base;
-        if (backing_bs(target) != backing) {
-            bdrv_set_backing_hd(target, backing);
-        }
-    }
-
     s->should_complete = true;
     block_job_enter(&s->common);
 }
@@ -980,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = {
     .drain                  = mirror_drain,
 };
 
+static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
+{
+    return bdrv_co_flush(bs->backing->bs);
+}
+
+static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
+    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+    BlockDriverState **file)
+{
+    *pnum = nb_sectors;
+    *file = bs->backing->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+           (sector_num << BDRV_SECTOR_BITS);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
+    int64_t offset, int count, BdrvRequestFlags flags)
+{
+    return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
+    int64_t offset, int count)
+{
+    return bdrv_co_pdiscard(bs->backing->bs, offset, count);
+}
+
+static void bdrv_mirror_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                       const BdrvChildRole *role,
+                                       uint64_t perm, uint64_t shared,
+                                       uint64_t *nperm, uint64_t *nshared)
+{
+    /* Must be able to forward guest writes to the real image */
+    *nperm = 0;
+    if (perm & BLK_PERM_WRITE) {
+        *nperm |= BLK_PERM_WRITE;
+    }
+
+    *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_mirror_top = {
+    .format_name                = "mirror_top",
+    .bdrv_co_preadv             = bdrv_mirror_top_preadv,
+    .bdrv_co_pwritev            = bdrv_mirror_top_pwritev,
+    .bdrv_co_pwrite_zeroes      = bdrv_mirror_top_pwrite_zeroes,
+    .bdrv_co_pdiscard           = bdrv_mirror_top_pdiscard,
+    .bdrv_co_flush              = bdrv_mirror_top_flush,
+    .bdrv_co_get_block_status   = bdrv_mirror_top_get_block_status,
+    .bdrv_close                 = bdrv_mirror_top_close,
+    .bdrv_child_perm            = bdrv_mirror_top_child_perm,
+};
+
 static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                              int creation_flags, BlockDriverState *target,
                              const char *replaces, int64_t speed,
@@ -992,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                              void *opaque, Error **errp,
                              const BlockJobDriver *driver,
                              bool is_none_mode, BlockDriverState *base,
-                             bool auto_complete)
+                             bool auto_complete, const char *filter_node_name)
 {
     MirrorBlockJob *s;
+    BlockDriverState *mirror_top_bs;
+    bool target_graph_mod;
+    bool target_is_backing;
+    Error *local_err = NULL;
+    int ret;
 
     if (granularity == 0) {
         granularity = bdrv_get_default_bitmap_granularity(target);
@@ -1011,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
 
-    s = block_job_create(job_id, driver, bs, speed, creation_flags,
-                         cb, opaque, errp);
-    if (!s) {
+    /* In the case of active commit, add dummy driver to provide consistent
+     * reads on the top, while disabling it in the intermediate nodes, and make
+     * the backing chain writable. */
+    mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
+                                         BDRV_O_RDWR, errp);
+    if (mirror_top_bs == NULL) {
         return;
     }
+    mirror_top_bs->total_sectors = bs->total_sectors;
 
-    s->target = blk_new();
-    blk_insert_bs(s->target, target);
+    /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
+     * it alive until block_job_create() even if bs has no parent. */
+    bdrv_ref(mirror_top_bs);
+    bdrv_drained_begin(bs);
+    bdrv_append(mirror_top_bs, bs, &local_err);
+    bdrv_drained_end(bs);
+
+    if (local_err) {
+        bdrv_unref(mirror_top_bs);
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Make sure that the source is not resized while the job is running */
+    s = block_job_create(job_id, driver, mirror_top_bs,
+                         BLK_PERM_CONSISTENT_READ,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
+                         creation_flags, cb, opaque, errp);
+    bdrv_unref(mirror_top_bs);
+    if (!s) {
+        goto fail;
+    }
+    s->source = bs;
+    s->mirror_top_bs = mirror_top_bs;
+
+    /* No resize for the target either; while the mirror is still running, a
+     * consistent read isn't necessarily possible. We could possibly allow
+     * writes and graph modifications, though it would likely defeat the
+     * purpose of a mirror, so leave them blocked for now.
+     *
+     * In the case of active commit, things look a bit different, though,
+     * because the target is an already populated backing file in active use.
+     * We can allow anything except resize there.*/
+    target_is_backing = bdrv_chain_contains(bs, target);
+    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
+    s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
+                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
+                        BLK_PERM_WRITE_UNCHANGED |
+                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
+                                             BLK_PERM_WRITE |
+                                             BLK_PERM_GRAPH_MOD : 0));
+    ret = blk_insert_bs(s->target, target, errp);
+    if (ret < 0) {
+        goto fail;
+    }
 
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
@@ -1041,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
         return;
     }
 
-    block_job_add_bdrv(&s->common, target);
+    /* Required permissions are already taken with blk_new() */
+    block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
+                       &error_abort);
+
     /* In commit_active_start() all intermediate nodes disappear, so
      * any jobs in them must be blocked */
-    if (bdrv_chain_contains(bs, target)) {
+    if (target_is_backing) {
         BlockDriverState *iter;
         for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
-            block_job_add_bdrv(&s->common, iter);
+            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
+             * ourselves at s->base (if writes are blocked for a node, they are
+             * also blocked for its backing file). The other options would be a
+             * second filter driver above s->base (== target). */
+            ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+                                     errp);
+            if (ret < 0) {
+                goto fail;
+            }
         }
     }
 
     trace_mirror_start(bs, s, opaque);
     block_job_start(&s->common);
+    return;
+
+fail:
+    if (s) {
+        g_free(s->replaces);
+        blk_unref(s->target);
+        block_job_unref(&s->common);
+    }
+
+    bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
 }
 
 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1061,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
-                  bool unmap, Error **errp)
+                  bool unmap, const char *filter_node_name, Error **errp)
 {
     bool is_none_mode;
     BlockDriverState *base;
@@ -1075,12 +1251,14 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
     mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                      speed, granularity, buf_size, backing_mode,
                      on_source_error, on_target_error, unmap, NULL, NULL, errp,
-                     &mirror_job_driver, is_none_mode, base, false);
+                     &mirror_job_driver, is_none_mode, base, false,
+                     filter_node_name);
 }
 
 void commit_active_start(const char *job_id, BlockDriverState *bs,
                          BlockDriverState *base, int creation_flags,
                          int64_t speed, BlockdevOnError on_error,
+                         const char *filter_node_name,
                          BlockCompletionFunc *cb, void *opaque, Error **errp,
                          bool auto_complete)
 {
@@ -1096,7 +1274,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
     mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                      MIRROR_LEAVE_BACKING_CHAIN,
                      on_error, on_error, true, cb, opaque, &local_err,
-                     &commit_active_job_driver, false, base, auto_complete);
+                     &commit_active_job_driver, false, base, auto_complete,
+                     filter_node_name);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error_restore_flags;
diff --git a/block/nfs.c b/block/nfs.c
index ffb54be065..890d5d4aff 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -54,6 +54,7 @@ typedef struct NFSClient {
     int events;
     bool has_zero_init;
     AioContext *aio_context;
+    QemuMutex mutex;
     blkcnt_t st_blocks;
     bool cache_used;
     NFSServer *server;
@@ -191,6 +192,7 @@ static void nfs_parse_filename(const char *filename, QDict *options,
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);
 
+/* Called with QemuMutex held.  */
 static void nfs_set_events(NFSClient *client)
 {
     int ev = nfs_which_events(client->context);
@@ -209,20 +211,20 @@ static void nfs_process_read(void *arg)
 {
     NFSClient *client = arg;
 
-    aio_context_acquire(client->aio_context);
+    qemu_mutex_lock(&client->mutex);
     nfs_service(client->context, POLLIN);
     nfs_set_events(client);
-    aio_context_release(client->aio_context);
+    qemu_mutex_unlock(&client->mutex);
 }
 
 static void nfs_process_write(void *arg)
 {
     NFSClient *client = arg;
 
-    aio_context_acquire(client->aio_context);
+    qemu_mutex_lock(&client->mutex);
     nfs_service(client->context, POLLOUT);
     nfs_set_events(client);
-    aio_context_release(client->aio_context);
+    qemu_mutex_unlock(&client->mutex);
 }
 
 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -242,6 +244,7 @@ static void nfs_co_generic_bh_cb(void *opaque)
     aio_co_wake(task->co);
 }
 
+/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                   void *private_data)
@@ -273,12 +276,15 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
     nfs_co_init_task(bs, &task);
     task.iov = iov;
 
+    qemu_mutex_lock(&client->mutex);
     if (nfs_pread_async(client->context, client->fh,
                         offset, bytes, nfs_co_generic_cb, &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
         return -ENOMEM;
     }
 
     nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
     while (!task.complete) {
         qemu_coroutine_yield();
     }
@@ -317,9 +323,11 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
         buf = iov->iov[0].iov_base;
     }
 
+    qemu_mutex_lock(&client->mutex);
     if (nfs_pwrite_async(client->context, client->fh,
                          offset, bytes, buf,
                          nfs_co_generic_cb, &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
         if (my_buffer) {
             g_free(buf);
         }
@@ -327,6 +335,7 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
     }
 
     nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
     while (!task.complete) {
         qemu_coroutine_yield();
     }
@@ -349,12 +358,15 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
 
     nfs_co_init_task(bs, &task);
 
+    qemu_mutex_lock(&client->mutex);
     if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
                         &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
         return -ENOMEM;
     }
 
     nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
     while (!task.complete) {
         qemu_coroutine_yield();
     }
@@ -440,6 +452,7 @@ static void nfs_file_close(BlockDriverState *bs)
 {
     NFSClient *client = bs->opaque;
     nfs_client_close(client);
+    qemu_mutex_destroy(&client->mutex);
 }
 
 static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -647,6 +660,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
     if (ret < 0) {
         return ret;
     }
+    qemu_mutex_init(&client->mutex);
     bs->total_sectors = ret;
     ret = 0;
     return ret;
@@ -702,6 +716,7 @@ static int nfs_has_zero_init(BlockDriverState *bs)
     return client->has_zero_init;
 }
 
+/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
                                void *private_data)
diff --git a/block/parallels.c b/block/parallels.c
index b2ec09f7e6..19935e29a9 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     file = blk_new_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                        &local_err);
     if (file == NULL) {
         error_propagate(errp, local_err);
         return -EIO;
@@ -762,6 +763,7 @@ static BlockDriver bdrv_parallels = {
     .bdrv_probe		= parallels_probe,
     .bdrv_open		= parallels_open,
     .bdrv_close		= parallels_close,
+    .bdrv_child_perm          = bdrv_format_default_perms,
     .bdrv_co_get_block_status = parallels_co_get_block_status,
     .bdrv_has_zero_init       = bdrv_has_zero_init_1,
     .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
diff --git a/block/qcow.c b/block/qcow.c
index 038b05ab1b..9d6ac83959 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     qcow_blk = blk_new_open(filename, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                            &local_err);
     if (qcow_blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -1052,6 +1053,7 @@ static BlockDriver bdrv_qcow = {
     .bdrv_probe		= qcow_probe,
     .bdrv_open		= qcow_open,
     .bdrv_close		= qcow_close,
+    .bdrv_child_perm        = bdrv_format_default_perms,
     .bdrv_reopen_prepare    = qcow_reopen_prepare,
     .bdrv_create            = qcow_create,
     .bdrv_has_zero_init     = bdrv_has_zero_init_1,
diff --git a/block/qcow2.c b/block/qcow2.c
index 21e61427eb..6a92d2ef3f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         return -EIO;
@@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     options = qdict_new();
     qdict_put(options, "driver", qstring_from_str("qcow2"));
     blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -3113,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     uint64_t cluster_size = s->cluster_size;
     bool encrypt;
     int refcount_bits = s->refcount_bits;
+    Error *local_err = NULL;
     int ret;
     QemuOptDesc *desc = opts->list->desc;
     Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3262,11 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     }
 
     if (new_size) {
-        BlockBackend *blk = blk_new();
-        blk_insert_bs(blk, bs);
+        BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+        ret = blk_insert_bs(blk, bs, &local_err);
+        if (ret < 0) {
+            error_report_err(local_err);
+            blk_unref(blk);
+            return ret;
+        }
+
         ret = blk_truncate(blk, new_size);
         blk_unref(blk);
-
         if (ret < 0) {
             return ret;
         }
@@ -3403,6 +3411,7 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_reopen_commit   = qcow2_reopen_commit,
     .bdrv_reopen_abort    = qcow2_reopen_abort,
     .bdrv_join_options    = qcow2_join_options,
+    .bdrv_child_perm      = bdrv_format_default_perms,
     .bdrv_create        = qcow2_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
     .bdrv_co_get_block_status = qcow2_co_get_block_status,
diff --git a/block/qed.c b/block/qed.c
index 62a0a09326..5ec7fd83f2 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         return -EIO;
@@ -1704,6 +1705,7 @@ static BlockDriver bdrv_qed = {
     .bdrv_open                = bdrv_qed_open,
     .bdrv_close               = bdrv_qed_close,
     .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
+    .bdrv_child_perm          = bdrv_format_default_perms,
     .bdrv_create              = bdrv_qed_create,
     .bdrv_has_zero_init       = bdrv_has_zero_init_1,
     .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
diff --git a/block/quorum.c b/block/quorum.c
index 86e2072dce..40205fb1b3 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
 
     /* We can safely add the child now */
     bdrv_ref(child_bs);
-    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+
+    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
+    if (child == NULL) {
+        s->next_child_index--;
+        bdrv_unref(child_bs);
+        goto out;
+    }
     s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
     s->children[s->num_children++] = child;
 
+out:
     bdrv_drained_end(bs);
 }
 
@@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = {
     .bdrv_add_child                     = quorum_add_child,
     .bdrv_del_child                     = quorum_del_child,
 
+    .bdrv_child_perm                    = bdrv_filter_default_perms,
+
     .is_filter                          = true,
     .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
diff --git a/block/raw-format.c b/block/raw-format.c
index ce34d1b1cd..86fbc657eb 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -467,6 +467,7 @@ BlockDriver bdrv_raw = {
     .bdrv_reopen_abort    = &raw_reopen_abort,
     .bdrv_open            = &raw_open,
     .bdrv_close           = &raw_close,
+    .bdrv_child_perm      = bdrv_filter_default_perms,
     .bdrv_create          = &raw_create,
     .bdrv_co_preadv       = &raw_co_preadv,
     .bdrv_co_pwritev      = &raw_co_pwritev,
diff --git a/block/rbd.c b/block/rbd.c
index 22e8e69cbd..ee13f3d9d3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -18,6 +18,7 @@
 #include "block/block_int.h"
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
+#include "qapi/qmp/qstring.h"
 
 #include <rbd/librbd.h>
 
@@ -102,10 +103,10 @@ typedef struct BDRVRBDState {
     char *snap;
 } BDRVRBDState;
 
-static int qemu_rbd_next_tok(char *dst, int dst_len,
-                             char *src, char delim,
-                             const char *name,
-                             char **p, Error **errp)
+static char *qemu_rbd_next_tok(int max_len,
+                               char *src, char delim,
+                               const char *name,
+                               char **p, Error **errp)
 {
     int l;
     char *end;
@@ -127,17 +128,15 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
         }
     }
     l = strlen(src);
-    if (l >= dst_len) {
+    if (l >= max_len) {
         error_setg(errp, "%s too long", name);
-        return -EINVAL;
+        return NULL;
     } else if (l == 0) {
         error_setg(errp, "%s too short", name);
-        return -EINVAL;
+        return NULL;
     }
 
-    pstrcpy(dst, dst_len, src);
-
-    return 0;
+    return src;
 }
 
 static void qemu_rbd_unescape(char *src)
@@ -153,87 +152,134 @@ static void qemu_rbd_unescape(char *src)
     *p = '\0';
 }
 
-static int qemu_rbd_parsename(const char *filename,
-                              char *pool, int pool_len,
-                              char *snap, int snap_len,
-                              char *name, int name_len,
-                              char *conf, int conf_len,
-                              Error **errp)
+static void qemu_rbd_parse_filename(const char *filename, QDict *options,
+                                    Error **errp)
 {
     const char *start;
-    char *p, *buf;
-    int ret;
+    char *p, *buf, *keypairs;
+    char *found_str;
+    size_t max_keypair_size;
+    Error *local_err = NULL;
 
     if (!strstart(filename, "rbd:", &start)) {
         error_setg(errp, "File name must start with 'rbd:'");
-        return -EINVAL;
+        return;
     }
 
+    max_keypair_size = strlen(start) + 1;
     buf = g_strdup(start);
+    keypairs = g_malloc0(max_keypair_size);
     p = buf;
-    *snap = '\0';
-    *conf = '\0';
 
-    ret = qemu_rbd_next_tok(pool, pool_len, p,
-                            '/', "pool name", &p, errp);
-    if (ret < 0 || !p) {
-        ret = -EINVAL;
+    found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p,
+                                  '/', "pool name", &p, &local_err);
+    if (local_err) {
+        goto done;
+    }
+    if (!p) {
+        error_setg(errp, "Pool name is required");
         goto done;
     }
-    qemu_rbd_unescape(pool);
+    qemu_rbd_unescape(found_str);
+    qdict_put(options, "pool", qstring_from_str(found_str));
 
     if (strchr(p, '@')) {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                '@', "object name", &p, errp);
-        if (ret < 0) {
+        found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
+                                      '@', "object name", &p, &local_err);
+        if (local_err) {
             goto done;
         }
-        ret = qemu_rbd_next_tok(snap, snap_len, p,
-                                ':', "snap name", &p, errp);
-        qemu_rbd_unescape(snap);
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "image", qstring_from_str(found_str));
+
+        found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p,
+                                      ':', "snap name", &p, &local_err);
+        if (local_err) {
+            goto done;
+        }
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "snapshot", qstring_from_str(found_str));
     } else {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                ':', "object name", &p, errp);
+        found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
+                                      ':', "object name", &p, &local_err);
+        if (local_err) {
+            goto done;
+        }
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "image", qstring_from_str(found_str));
     }
-    qemu_rbd_unescape(name);
-    if (ret < 0 || !p) {
+    if (!p) {
         goto done;
     }
 
-    ret = qemu_rbd_next_tok(conf, conf_len, p,
-                            '\0', "configuration", &p, errp);
-
-done:
-    g_free(buf);
-    return ret;
-}
-
-static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
-{
-    const char *p = conf;
+    found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+                                  '\0', "configuration", &p, &local_err);
+    if (local_err) {
+        goto done;
+    }
 
-    while (*p) {
-        int len;
-        const char *end = strchr(p, ':');
+    p = found_str;
 
-        if (end) {
-            len = end - p;
-        } else {
-            len = strlen(p);
+    /* The following are essentially all key/value pairs, and we treat
+     * 'id' and 'conf' a bit special.  Key/value pairs may be in any order. */
+    while (p) {
+        char *name, *value;
+        name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+                                 '=', "conf option name", &p, &local_err);
+        if (local_err) {
+            break;
         }
 
-        if (strncmp(p, "id=", 3) == 0) {
-            len -= 3;
-            strncpy(clientname, p + 3, len);
-            clientname[len] = '\0';
-            return clientname;
+        if (!p) {
+            error_setg(errp, "conf option %s has no value", name);
+            break;
         }
-        if (end == NULL) {
+
+        qemu_rbd_unescape(name);
+
+        value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
+                                  ':', "conf option value", &p, &local_err);
+        if (local_err) {
             break;
         }
-        p = end + 1;
+        qemu_rbd_unescape(value);
+
+        if (!strcmp(name, "conf")) {
+            qdict_put(options, "conf", qstring_from_str(value));
+        } else if (!strcmp(name, "id")) {
+            qdict_put(options, "user" , qstring_from_str(value));
+        } else {
+            /* FIXME: This is pretty ugly, and not the right way to do this.
+             *        These should be contained in a structure, and then
+             *        passed explicitly as individual key/value pairs to
+             *        rados.  Consider this legacy code that needs to be
+             *        updated. */
+            char *tmp = g_malloc0(max_keypair_size);
+            /* only use a delimiter if it is not the first keypair found */
+            /* These are sets of unknown key/value pairs we'll pass along
+             * to ceph */
+            if (keypairs[0]) {
+                snprintf(tmp, max_keypair_size, ":%s=%s", name, value);
+                pstrcat(keypairs, max_keypair_size, tmp);
+            } else {
+                snprintf(keypairs, max_keypair_size, "%s=%s", name, value);
+            }
+            g_free(tmp);
+        }
     }
-    return NULL;
+
+    if (keypairs[0]) {
+        qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs));
+    }
+
+
+done:
+    if (local_err) {
+        error_propagate(errp, local_err);
+    }
+    g_free(buf);
+    g_free(keypairs);
+    return;
 }
 
 
@@ -256,26 +302,24 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
     return 0;
 }
 
-
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
-                             bool only_read_conf_file,
-                             Error **errp)
+static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs,
+                                 Error **errp)
 {
     char *p, *buf;
-    char name[RBD_MAX_CONF_NAME_SIZE];
-    char value[RBD_MAX_CONF_VAL_SIZE];
+    char *name;
+    char *value;
+    Error *local_err = NULL;
     int ret = 0;
 
-    buf = g_strdup(conf);
+    buf = g_strdup(keypairs);
     p = buf;
 
     while (p) {
-        ret = qemu_rbd_next_tok(name, sizeof(name), p,
-                                '=', "conf option name", &p, errp);
-        if (ret < 0) {
+        name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+                                 '=', "conf option name", &p, &local_err);
+        if (local_err) {
             break;
         }
-        qemu_rbd_unescape(name);
 
         if (!p) {
             error_setg(errp, "conf option %s has no value", name);
@@ -283,36 +327,24 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
             break;
         }
 
-        ret = qemu_rbd_next_tok(value, sizeof(value), p,
-                                ':', "conf option value", &p, errp);
-        if (ret < 0) {
+        value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
+                                  ':', "conf option value", &p, &local_err);
+        if (local_err) {
             break;
         }
-        qemu_rbd_unescape(value);
 
-        if (strcmp(name, "conf") == 0) {
-            /* read the conf file alone, so it doesn't override more
-               specific settings for a particular device */
-            if (only_read_conf_file) {
-                ret = rados_conf_read_file(cluster, value);
-                if (ret < 0) {
-                    error_setg_errno(errp, -ret, "error reading conf file %s",
-                                     value);
-                    break;
-                }
-            }
-        } else if (strcmp(name, "id") == 0) {
-            /* ignore, this is parsed by qemu_rbd_parse_clientname() */
-        } else if (!only_read_conf_file) {
-            ret = rados_conf_set(cluster, name, value);
-            if (ret < 0) {
-                error_setg_errno(errp, -ret, "invalid conf option %s", name);
-                ret = -EINVAL;
-                break;
-            }
+        ret = rados_conf_set(cluster, name, value);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "invalid conf option %s", name);
+            ret = -EINVAL;
+            break;
         }
     }
 
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+    }
     g_free(buf);
     return ret;
 }
@@ -328,33 +360,84 @@ static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
     }
 }
 
+static QemuOptsList runtime_opts = {
+    .name = "rbd",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "Specification of the rbd image",
+        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing the password",
+        },
+        {
+            .name = "conf",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados config file location",
+        },
+        {
+            .name = "pool",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados pool name",
+        },
+        {
+            .name = "image",
+            .type = QEMU_OPT_STRING,
+            .help = "Image name in the pool",
+        },
+        {
+            .name = "snapshot",
+            .type = QEMU_OPT_STRING,
+            .help = "Ceph snapshot name",
+        },
+        {
+            /* maps to 'id' in rados_create() */
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados id name",
+        },
+        {
+            .name = "keyvalue-pairs",
+            .type = QEMU_OPT_STRING,
+            .help = "Legacy rados key/value option parameters",
+        },
+        {
+            .name = "host",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "port",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "auth",
+            .type = QEMU_OPT_STRING,
+            .help = "Supported authentication method, either cephx or none",
+        },
+        { /* end of list */ }
+    },
+};
+
 static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 {
     Error *local_err = NULL;
     int64_t bytes = 0;
     int64_t objsize;
     int obj_order = 0;
-    char pool[RBD_MAX_POOL_NAME_SIZE];
-    char name[RBD_MAX_IMAGE_NAME_SIZE];
-    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
-    char conf[RBD_MAX_CONF_SIZE];
-    char clientname_buf[RBD_MAX_CONF_SIZE];
-    char *clientname;
+    const char *pool, *name, *conf, *clientname, *keypairs;
     const char *secretid;
     rados_t cluster;
     rados_ioctx_t io_ctx;
-    int ret;
+    QDict *options = NULL;
+    QemuOpts *rbd_opts = NULL;
+    int ret = 0;
 
     secretid = qemu_opt_get(opts, "password-secret");
 
-    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
-                           snap_buf, sizeof(snap_buf),
-                           name, sizeof(name),
-                           conf, sizeof(conf), &local_err) < 0) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
-    }
-
     /* Read out options */
     bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                      BDRV_SECTOR_SIZE);
@@ -362,35 +445,55 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
     if (objsize) {
         if ((objsize - 1) & objsize) {    /* not a power of 2? */
             error_setg(errp, "obj size needs to be power of 2");
-            return -EINVAL;
+            ret = -EINVAL;
+            goto exit;
         }
         if (objsize < 4096) {
             error_setg(errp, "obj size too small");
-            return -EINVAL;
+            ret = -EINVAL;
+            goto exit;
         }
         obj_order = ctz32(objsize);
     }
 
-    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    options = qdict_new();
+    qemu_rbd_parse_filename(filename, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        error_propagate(errp, local_err);
+        goto exit;
+    }
+
+    rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(rbd_opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    pool       = qemu_opt_get(rbd_opts, "pool");
+    conf       = qemu_opt_get(rbd_opts, "conf");
+    clientname = qemu_opt_get(rbd_opts, "user");
+    name       = qemu_opt_get(rbd_opts, "image");
+    keypairs   = qemu_opt_get(rbd_opts, "keyvalue-pairs");
+
     ret = rados_create(&cluster, clientname);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "error initializing");
-        return ret;
+        goto exit;
     }
 
-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(cluster, NULL);
-    } else if (conf[0] != '\0' &&
-               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
-        error_propagate(errp, local_err);
+    /* try default location when conf=NULL, but ignore failure */
+    ret = rados_conf_read_file(cluster, conf);
+    if (conf && ret < 0) {
+        error_setg_errno(errp, -ret, "error reading conf file %s", conf);
         ret = -EIO;
         goto shutdown;
     }
 
-    if (conf[0] != '\0' &&
-        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
-        error_propagate(errp, local_err);
+    ret = qemu_rbd_set_keypairs(cluster, keypairs, errp);
+    if (ret < 0) {
         ret = -EIO;
         goto shutdown;
     }
@@ -421,6 +524,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 
 shutdown:
     rados_shutdown(cluster);
+
+exit:
+    QDECREF(options);
+    qemu_opts_del(rbd_opts);
     return ret;
 }
 
@@ -471,38 +578,104 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
     qemu_aio_unref(acb);
 }
 
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "rbd",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "Specification of the rbd image",
-        },
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
-        { /* end of list */ }
-    },
-};
+#define RBD_MON_HOST          0
+#define RBD_AUTH_SUPPORTED    1
+
+static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type,
+                                 Error **errp)
+{
+    int num_entries;
+    QemuOpts *opts = NULL;
+    QDict *sub_options;
+    const char *host;
+    const char *port;
+    char *str;
+    char *rados_str = NULL;
+    Error *local_err = NULL;
+    int i;
+
+    assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED);
+
+    num_entries = qdict_array_entries(options, prefix);
+
+    if (num_entries < 0) {
+        error_setg(errp, "Parse error on RBD QDict array");
+        return NULL;
+    }
+
+    for (i = 0; i < num_entries; i++) {
+        char *strbuf = NULL;
+        const char *value;
+        char *rados_str_tmp;
+
+        str = g_strdup_printf("%s%d.", prefix, i);
+        qdict_extract_subqdict(options, &sub_options, str);
+        g_free(str);
+
+        opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+        qemu_opts_absorb_qdict(opts, sub_options, &local_err);
+        QDECREF(sub_options);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            g_free(rados_str);
+            rados_str = NULL;
+            goto exit;
+        }
+
+        if (type == RBD_MON_HOST) {
+            host = qemu_opt_get(opts, "host");
+            port = qemu_opt_get(opts, "port");
+
+            value = host;
+            if (port) {
+                /* check for ipv6 */
+                if (strchr(host, ':')) {
+                    strbuf = g_strdup_printf("[%s]:%s", host, port);
+                } else {
+                    strbuf = g_strdup_printf("%s:%s", host, port);
+                }
+                value = strbuf;
+            } else if (strchr(host, ':')) {
+                strbuf = g_strdup_printf("[%s]", host);
+                value = strbuf;
+            }
+        } else {
+            value = qemu_opt_get(opts, "auth");
+        }
+
+
+        /* each iteration in the for loop will build upon the string, and if
+         * rados_str is NULL then it is our first pass */
+        if (rados_str) {
+            /* separate options with ';', as that  is what rados_conf_set()
+             * requires */
+            rados_str_tmp = rados_str;
+            rados_str = g_strdup_printf("%s;%s", rados_str_tmp, value);
+            g_free(rados_str_tmp);
+        } else {
+            rados_str = g_strdup(value);
+        }
+
+        g_free(strbuf);
+        qemu_opts_del(opts);
+        opts = NULL;
+    }
+
+exit:
+    qemu_opts_del(opts);
+    return rados_str;
+}
 
 static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
     BDRVRBDState *s = bs->opaque;
-    char pool[RBD_MAX_POOL_NAME_SIZE];
-    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
-    char conf[RBD_MAX_CONF_SIZE];
-    char clientname_buf[RBD_MAX_CONF_SIZE];
-    char *clientname;
+    const char *pool, *snap, *conf, *clientname, *name, *keypairs;
     const char *secretid;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *filename;
+    char *mon_host = NULL;
+    char *auth_supported = NULL;
     int r;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
@@ -513,41 +686,63 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    filename = qemu_opt_get(opts, "filename");
-    secretid = qemu_opt_get(opts, "password-secret");
+    auth_supported = qemu_rbd_array_opts(options, "auth-supported.",
+                                         RBD_AUTH_SUPPORTED, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        r = -EINVAL;
+        goto failed_opts;
+    }
 
-    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
-                           snap_buf, sizeof(snap_buf),
-                           s->name, sizeof(s->name),
-                           conf, sizeof(conf), errp) < 0) {
+    mon_host = qemu_rbd_array_opts(options, "server.",
+                                   RBD_MON_HOST, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
         r = -EINVAL;
         goto failed_opts;
     }
 
-    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    secretid = qemu_opt_get(opts, "password-secret");
+
+    pool           = qemu_opt_get(opts, "pool");
+    conf           = qemu_opt_get(opts, "conf");
+    snap           = qemu_opt_get(opts, "snapshot");
+    clientname     = qemu_opt_get(opts, "user");
+    name           = qemu_opt_get(opts, "image");
+    keypairs       = qemu_opt_get(opts, "keyvalue-pairs");
+
     r = rados_create(&s->cluster, clientname);
     if (r < 0) {
         error_setg_errno(errp, -r, "error initializing");
         goto failed_opts;
     }
 
-    s->snap = NULL;
-    if (snap_buf[0] != '\0') {
-        s->snap = g_strdup(snap_buf);
+    s->snap = g_strdup(snap);
+    if (name) {
+        pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name);
+    }
+
+    /* try default location when conf=NULL, but ignore failure */
+    r = rados_conf_read_file(s->cluster, conf);
+    if (conf && r < 0) {
+        error_setg_errno(errp, -r, "error reading conf file %s", conf);
+        goto failed_shutdown;
+    }
+
+    r = qemu_rbd_set_keypairs(s->cluster, keypairs, errp);
+    if (r < 0) {
+        goto failed_shutdown;
     }
 
-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(s->cluster, NULL);
-    } else if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
+    if (mon_host) {
+        r = rados_conf_set(s->cluster, "mon_host", mon_host);
         if (r < 0) {
             goto failed_shutdown;
         }
     }
 
-    if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
+    if (auth_supported) {
+        r = rados_conf_set(s->cluster, "auth_supported", auth_supported);
         if (r < 0) {
             goto failed_shutdown;
         }
@@ -601,6 +796,8 @@ failed_shutdown:
     g_free(s->snap);
 failed_opts:
     qemu_opts_del(opts);
+    g_free(mon_host);
+    g_free(auth_supported);
     return r;
 }
 
@@ -1004,18 +1201,18 @@ static QemuOptsList qemu_rbd_create_opts = {
 };
 
 static BlockDriver bdrv_rbd = {
-    .format_name        = "rbd",
-    .instance_size      = sizeof(BDRVRBDState),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open     = qemu_rbd_open,
-    .bdrv_close         = qemu_rbd_close,
-    .bdrv_create        = qemu_rbd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_get_info      = qemu_rbd_getinfo,
-    .create_opts        = &qemu_rbd_create_opts,
-    .bdrv_getlength     = qemu_rbd_getlength,
-    .bdrv_truncate      = qemu_rbd_truncate,
-    .protocol_name      = "rbd",
+    .format_name            = "rbd",
+    .instance_size          = sizeof(BDRVRBDState),
+    .bdrv_parse_filename    = qemu_rbd_parse_filename,
+    .bdrv_file_open         = qemu_rbd_open,
+    .bdrv_close             = qemu_rbd_close,
+    .bdrv_create            = qemu_rbd_create,
+    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+    .bdrv_get_info          = qemu_rbd_getinfo,
+    .create_opts            = &qemu_rbd_create_opts,
+    .bdrv_getlength         = qemu_rbd_getlength,
+    .bdrv_truncate          = qemu_rbd_truncate,
+    .protocol_name          = "rbd",
 
     .bdrv_aio_readv         = qemu_rbd_aio_readv,
     .bdrv_aio_writev        = qemu_rbd_aio_writev,
diff --git a/block/replication.c b/block/replication.c
index eff85c77ba..22f170fd33 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
         s->replication_state = BLOCK_REPLICATION_FAILOVER;
         commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                             BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            replication_done, bs, errp, true);
+                            NULL, replication_done, bs, errp, true);
         break;
     default:
         aio_context_release(aio_context);
@@ -660,6 +660,7 @@ BlockDriver bdrv_replication = {
 
     .bdrv_open                  = replication_open,
     .bdrv_close                 = replication_close,
+    .bdrv_child_perm            = bdrv_filter_default_perms,
 
     .bdrv_getlength             = replication_getlength,
     .bdrv_co_readv              = replication_co_readv,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 860ba61502..743471043e 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp)
     int ret;
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out_with_err_set;
diff --git a/block/stream.c b/block/stream.c
index 1523ba7dfb..0113710845 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque)
     StreamCompleteData *data = opaque;
     BlockDriverState *bs = blk_bs(job->blk);
     BlockDriverState *base = s->base;
+    Error *local_err = NULL;
 
     if (!block_job_is_cancelled(&s->common) && data->reached_end &&
         data->ret == 0) {
@@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque)
             }
         }
         data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
-        bdrv_set_backing_hd(bs, base);
+        bdrv_set_backing_hd(bs, base, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            data->ret = -EPERM;
+            goto out;
+        }
     }
 
+out:
     /* Reopen the image back in read-only mode if necessary */
     if (s->bs_flags != bdrv_get_flags(bs)) {
+        /* Give up write permissions before making it read-only */
+        blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
         bdrv_reopen(bs, s->bs_flags, NULL);
     }
 
@@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
     BlockDriverState *iter;
     int orig_bs_flags;
 
-    s = block_job_create(job_id, &stream_job_driver, bs, speed,
-                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
-    if (!s) {
-        return;
-    }
-
     /* Make sure that the image is opened in read-write mode */
     orig_bs_flags = bdrv_get_flags(bs);
     if (!(orig_bs_flags & BDRV_O_RDWR)) {
         if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
-            block_job_unref(&s->common);
             return;
         }
     }
 
-    /* Block all intermediate nodes between bs and base, because they
-     * will disappear from the chain after this operation */
+    /* Prevent concurrent jobs trying to modify the graph structure here, we
+     * already have our own plans. Also don't allow resize as the image size is
+     * queried only at the job start and then cached. */
+    s = block_job_create(job_id, &stream_job_driver, bs,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_GRAPH_MOD,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_WRITE,
+                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    if (!s) {
+        goto fail;
+    }
+
+    /* Block all intermediate nodes between bs and base, because they will
+     * disappear from the chain after this operation. The streaming job reads
+     * every block only once, assuming that it doesn't change, so block writes
+     * and resizes. */
     for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
-        block_job_add_bdrv(&s->common, iter);
+        block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
+                           &error_abort);
     }
 
     s->base = base;
@@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
     s->on_error = on_error;
     trace_stream_start(bs, base, s);
     block_job_start(&s->common);
+    return;
+
+fail:
+    if (orig_bs_flags != bdrv_get_flags(bs)) {
+        bdrv_reopen(bs, s->bs_flags, NULL);
+    }
 }
diff --git a/block/vdi.c b/block/vdi.c
index 18b4773aac..9b4f70e977 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -891,6 +892,7 @@ static BlockDriver bdrv_vdi = {
     .bdrv_open = vdi_open,
     .bdrv_close = vdi_close,
     .bdrv_reopen_prepare = vdi_reopen_prepare,
+    .bdrv_child_perm          = bdrv_format_default_perms,
     .bdrv_create = vdi_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
     .bdrv_co_get_block_status = vdi_co_get_block_status,
diff --git a/block/vhdx.c b/block/vhdx.c
index 9918ee98ff..052a753159 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -1983,6 +1984,7 @@ static BlockDriver bdrv_vhdx = {
     .bdrv_open              = vhdx_open,
     .bdrv_close             = vhdx_close,
     .bdrv_reopen_prepare    = vhdx_reopen_prepare,
+    .bdrv_child_perm        = bdrv_format_default_perms,
     .bdrv_co_readv          = vhdx_co_readv,
     .bdrv_co_writev         = vhdx_co_writev,
     .bdrv_create            = vhdx_create,
diff --git a/block/vmdk.c b/block/vmdk.c
index 9d68ec5a4e..a9bd22bf93 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     new_blk = blk_new_open(filename, NULL, NULL,
-                           BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                           BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                           &local_err);
     if (new_blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -2359,6 +2361,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_open                    = vmdk_open,
     .bdrv_check                   = vmdk_check,
     .bdrv_reopen_prepare          = vmdk_reopen_prepare,
+    .bdrv_child_perm              = bdrv_format_default_perms,
     .bdrv_co_preadv               = vmdk_co_preadv,
     .bdrv_co_pwritev              = vmdk_co_pwritev,
     .bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
diff --git a/block/vpc.c b/block/vpc.c
index d0df2a1c54..f591d4be38 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -1067,6 +1068,7 @@ static BlockDriver bdrv_vpc = {
     .bdrv_open              = vpc_open,
     .bdrv_close             = vpc_close,
     .bdrv_reopen_prepare    = vpc_reopen_prepare,
+    .bdrv_child_perm        = bdrv_format_default_perms,
     .bdrv_create            = vpc_create,
 
     .bdrv_co_preadv             = vpc_co_preadv,
diff --git a/block/vvfat.c b/block/vvfat.c
index 7f230be006..aa61c329e7 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3041,7 +3041,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
                                    &error_abort);
     *(void**) backing->opaque = s;
 
-    bdrv_set_backing_hd(s->bs, backing);
+    bdrv_set_backing_hd(s->bs, backing, &error_abort);
     bdrv_unref(backing);
 
     return 0;
@@ -3052,6 +3052,27 @@ err:
     return ret;
 }
 
+static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
+                             const BdrvChildRole *role,
+                             uint64_t perm, uint64_t shared,
+                             uint64_t *nperm, uint64_t *nshared)
+{
+    BDRVVVFATState *s = bs->opaque;
+
+    assert(c == s->qcow || role == &child_backing);
+
+    if (c == s->qcow) {
+        /* This is a private node, nobody should try to attach to it */
+        *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+        *nshared = BLK_PERM_WRITE_UNCHANGED;
+    } else {
+        /* The backing file is there so 'commit' can use it. vvfat doesn't
+         * access it in any way. */
+        *nperm = 0;
+        *nshared = BLK_PERM_ALL;
+    }
+}
+
 static void vvfat_close(BlockDriverState *bs)
 {
     BDRVVVFATState *s = bs->opaque;
@@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = {
     .bdrv_file_open         = vvfat_open,
     .bdrv_refresh_limits    = vvfat_refresh_limits,
     .bdrv_close             = vvfat_close,
+    .bdrv_child_perm        = vvfat_child_perm,
 
     .bdrv_co_preadv         = vvfat_co_preadv,
     .bdrv_co_pwritev        = vvfat_co_pwritev,
diff --git a/blockdev.c b/blockdev.c
index 2b2f6ceef0..8eb4e84fe0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -52,6 +52,7 @@
 #include "sysemu/arch_init.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
+#include "qemu/throttle-options.h"
 
 static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
     QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
@@ -557,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     if ((!file || !*file) && !qdict_size(bs_opts)) {
         BlockBackendRootState *blk_rs;
 
-        blk = blk_new();
+        blk = blk_new(0, BLK_PERM_ALL);
         blk_rs = blk_get_root_state(blk);
         blk_rs->open_flags    = bdrv_flags;
         blk_rs->read_only     = read_only;
@@ -1767,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common,
 
     if (!state->new_bs->drv->supports_backing) {
         error_setg(errp, "The snapshot does not support backing images");
+        return;
+    }
+
+    /* This removes our old bs and adds the new bs. This is an operation that
+     * can fail, so we need to do it in .prepare; undoing it for abort is
+     * always possible. */
+    bdrv_ref(state->new_bs);
+    bdrv_append(state->new_bs, state->old_bs, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
     }
 }
 
@@ -1777,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common)
 
     bdrv_set_aio_context(state->new_bs, state->aio_context);
 
-    /* This removes our old bs and adds the new bs */
-    bdrv_append(state->new_bs, state->old_bs);
     /* We don't need (or want) to use the transactional
      * bdrv_reopen_multiple() across all the entries at once, because we
      * don't want to abort all of them if one of them fails the reopen */
@@ -1793,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common)
     ExternalSnapshotState *state =
                              DO_UPCAST(ExternalSnapshotState, common, common);
     if (state->new_bs) {
-        bdrv_unref(state->new_bs);
+        if (state->new_bs->backing) {
+            bdrv_replace_in_backing_chain(state->new_bs, state->old_bs);
+        }
     }
 }
 
@@ -1804,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common)
     if (state->aio_context) {
         bdrv_drained_end(state->old_bs);
         aio_context_release(state->aio_context);
+        bdrv_unref(state->new_bs);
     }
 }
 
@@ -2310,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
     }
 
     if (!locked || force) {
-        blk_dev_change_media_cb(blk, false);
+        blk_dev_change_media_cb(blk, false, &error_abort);
     }
 
     if (locked && !force) {
@@ -2348,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
                              Error **errp)
 {
     BlockBackend *blk;
+    Error *local_err = NULL;
 
     device = has_device ? device : NULL;
     id = has_id ? id : NULL;
@@ -2371,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
         return;
     }
 
-    blk_dev_change_media_cb(blk, true);
+    blk_dev_change_media_cb(blk, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 }
 
 void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
@@ -2424,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
          * called at all); therefore, the medium needs to be ejected here.
          * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
          * value passed here (i.e. false). */
-        blk_dev_change_media_cb(blk, false);
+        blk_dev_change_media_cb(blk, false, &error_abort);
     }
 
 out:
@@ -2434,7 +2452,9 @@ out:
 static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
                                             BlockDriverState *bs, Error **errp)
 {
+    Error *local_err = NULL;
     bool has_device;
+    int ret;
 
     /* For BBs without a device, we can exchange the BDS tree at will */
     has_device = blk_get_attached_dev(blk);
@@ -2454,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
         return;
     }
 
-    blk_insert_bs(blk, bs);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        return;
+    }
 
     if (!blk_dev_has_tray(blk)) {
         /* For tray-less devices, blockdev-close-tray is a no-op (or may not be
@@ -2462,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
          * slot here.
          * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
          * value passed here (i.e. true). */
-        blk_dev_change_media_cb(blk, true);
+        blk_dev_change_media_cb(blk, true, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            blk_remove_bs(blk);
+            return;
+        }
     }
 }
 
@@ -2889,8 +2917,11 @@ void qmp_block_resize(bool has_device, const char *device,
         goto out;
     }
 
-    blk = blk_new();
-    blk_insert_bs(blk, bs);
+    blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        goto out;
+    }
 
     /* complete all in-flight operations before resizing the device */
     bdrv_drain_all();
@@ -3013,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                       bool has_top, const char *top,
                       bool has_backing_file, const char *backing_file,
                       bool has_speed, int64_t speed,
+                      bool has_filter_node_name, const char *filter_node_name,
                       Error **errp)
 {
     BlockDriverState *bs;
@@ -3028,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     if (!has_speed) {
         speed = 0;
     }
+    if (!has_filter_node_name) {
+        filter_node_name = NULL;
+    }
 
     /* Important Note:
      *  libvirt relies on the DeviceNotFound error class in order to probe for
@@ -3102,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
             goto out;
         }
         commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
-                            BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
-                            &local_err, false);
+                            BLOCK_JOB_DEFAULT, speed, on_error,
+                            filter_node_name, NULL, NULL, &local_err, false);
     } else {
         BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
         if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3111,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
         }
         commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
                      on_error, has_backing_file ? backing_file : NULL,
-                     &local_err);
+                     filter_node_name, &local_err);
     }
     if (local_err != NULL) {
         error_propagate(errp, local_err);
@@ -3347,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    bool has_on_target_error,
                                    BlockdevOnError on_target_error,
                                    bool has_unmap, bool unmap,
+                                   bool has_filter_node_name,
+                                   const char *filter_node_name,
                                    Error **errp)
 {
 
@@ -3368,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     if (!has_unmap) {
         unmap = true;
     }
+    if (!has_filter_node_name) {
+        filter_node_name = NULL;
+    }
 
     if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3397,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     mirror_start(job_id, bs, target,
                  has_replaces ? replaces : NULL,
                  speed, granularity, buf_size, sync, backing_mode,
-                 on_source_error, on_target_error, unmap, errp);
+                 on_source_error, on_target_error, unmap, filter_node_name,
+                 errp);
 }
 
 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3535,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
                            arg->has_on_source_error, arg->on_source_error,
                            arg->has_on_target_error, arg->on_target_error,
                            arg->has_unmap, arg->unmap,
+                           false, NULL,
                            &local_err);
     bdrv_unref(target_bs);
     error_propagate(errp, local_err);
@@ -3553,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                          BlockdevOnError on_source_error,
                          bool has_on_target_error,
                          BlockdevOnError on_target_error,
+                         bool has_filter_node_name,
+                         const char *filter_node_name,
                          Error **errp)
 {
     BlockDriverState *bs;
@@ -3584,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                            has_on_source_error, on_source_error,
                            has_on_target_error, on_target_error,
                            true, true,
+                           has_filter_node_name, filter_node_name,
                            &local_err);
     error_propagate(errp, local_err);
 
@@ -4007,83 +4052,11 @@ QemuOptsList qemu_common_drive_opts = {
             .name = BDRV_OPT_READ_ONLY,
             .type = QEMU_OPT_BOOL,
             .help = "open drive file as read-only",
-        },{
-            .name = "throttling.iops-total",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit total I/O operations per second",
-        },{
-            .name = "throttling.iops-read",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit read operations per second",
-        },{
-            .name = "throttling.iops-write",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit write operations per second",
-        },{
-            .name = "throttling.bps-total",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit total bytes per second",
-        },{
-            .name = "throttling.bps-read",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit read bytes per second",
-        },{
-            .name = "throttling.bps-write",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit write bytes per second",
-        },{
-            .name = "throttling.iops-total-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations burst",
-        },{
-            .name = "throttling.iops-read-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations read burst",
-        },{
-            .name = "throttling.iops-write-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations write burst",
-        },{
-            .name = "throttling.bps-total-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes burst",
-        },{
-            .name = "throttling.bps-read-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes read burst",
-        },{
-            .name = "throttling.bps-write-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes write burst",
-        },{
-            .name = "throttling.iops-total-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-total-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-read-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-read-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-write-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-write-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-total-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-total-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-read-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-read-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-write-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-write-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-size",
-            .type = QEMU_OPT_NUMBER,
-            .help = "when limiting by iops max size of an I/O in bytes",
-        },{
+        },
+
+        THROTTLE_OPTS,
+
+        {
             .name = "throttling.group",
             .type = QEMU_OPT_STRING,
             .help = "name of the block throttling group",
diff --git a/blockjob.c b/blockjob.c
index abee11bb08..69126af97f 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -55,6 +55,19 @@ struct BlockJobTxn {
 
 static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
 
+static char *child_job_get_parent_desc(BdrvChild *c)
+{
+    BlockJob *job = c->opaque;
+    return g_strdup_printf("%s job '%s'",
+                           BlockJobType_lookup[job->driver->job_type],
+                           job->id);
+}
+
+static const BdrvChildRole child_job = {
+    .get_parent_desc    = child_job_get_parent_desc,
+    .stay_at_node       = true,
+};
+
 BlockJob *block_job_next(BlockJob *job)
 {
     if (!job) {
@@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque)
     block_job_unref(job);
 }
 
-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+void block_job_remove_all_bdrv(BlockJob *job)
+{
+    GSList *l;
+    for (l = job->nodes; l; l = l->next) {
+        BdrvChild *c = l->data;
+        bdrv_op_unblock_all(c->bs, job->blocker);
+        bdrv_root_unref_child(c);
+    }
+    g_slist_free(job->nodes);
+    job->nodes = NULL;
+}
+
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+                       uint64_t perm, uint64_t shared_perm, Error **errp)
 {
-    job->nodes = g_slist_prepend(job->nodes, bs);
+    BdrvChild *c;
+
+    c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
+                               job, errp);
+    if (c == NULL) {
+        return -EPERM;
+    }
+
+    job->nodes = g_slist_prepend(job->nodes, c);
     bdrv_ref(bs);
     bdrv_op_block_all(bs, job->blocker);
+
+    return 0;
 }
 
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed, int flags,
+                       BlockDriverState *bs, uint64_t perm,
+                       uint64_t shared_perm, int64_t speed, int flags,
                        BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
     BlockBackend *blk;
     BlockJob *job;
+    int ret;
 
     if (bs->job) {
         error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
@@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
         }
     }
 
-    blk = blk_new();
-    blk_insert_bs(blk, bs);
+    blk = blk_new(perm, shared_perm);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        blk_unref(blk);
+        return NULL;
+    }
 
     job = g_malloc0(driver->instance_size);
     error_setg(&job->blocker, "block device is in use by block job: %s",
                BlockJobType_lookup[driver->job_type]);
-    block_job_add_bdrv(job, bs);
+    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
     bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
     job->driver        = driver;
@@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
     if (--job->refcnt == 0) {
-        GSList *l;
         BlockDriverState *bs = blk_bs(job->blk);
         bs->job = NULL;
-        for (l = job->nodes; l; l = l->next) {
-            bs = l->data;
-            bdrv_op_unblock_all(bs, job->blocker);
-            bdrv_unref(bs);
-        }
-        g_slist_free(job->nodes);
+        block_job_remove_all_bdrv(job);
         blk_remove_aio_context_notifier(job->blk,
                                         block_job_attached_aio_context,
                                         block_job_detach_aio_context, job);
diff --git a/configure b/configure
index df58e91285..6c21975f02 100755
--- a/configure
+++ b/configure
@@ -5894,6 +5894,7 @@ case "$target_name" in
     TARGET_BASE_ARCH=i386
   ;;
   alpha)
+    mttcg="yes"
   ;;
   arm|armeb)
     TARGET_ARCH=arm
diff --git a/cputlb.c b/cputlb.c
index 7fa7fefa05..f5d056cc08 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -769,14 +769,13 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
     pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
     mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
     if (memory_region_is_unassigned(mr)) {
-        CPUClass *cc = CPU_GET_CLASS(cpu);
-
-        if (cc->do_unassigned_access) {
-            cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
-        } else {
-            report_bad_exec(cpu, addr);
-            exit(1);
-        }
+        cpu_unassigned_access(cpu, addr, false, true, 0, 4);
+        /* The CPU's unassigned access hook might have longjumped out
+         * with an exception. If it didn't (or there was no hook) then
+         * we can't proceed further.
+         */
+        report_bad_exec(cpu, addr);
+        exit(1);
     }
     p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
     return qemu_ram_addr_from_host_nofail(p);
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index fdf40893aa..1e3bd2b8ca 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -42,6 +42,8 @@ CONFIG_ARM11MPCORE=y
 CONFIG_A9MPCORE=y
 CONFIG_A15MPCORE=y
 
+CONFIG_ARM_V7M=y
+
 CONFIG_ARM_GIC=y
 CONFIG_ARM_GIC_KVM=$(CONFIG_KVM)
 CONFIG_ARM_TIMER=y
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 48b07a4c91..029e95202a 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -59,3 +59,4 @@ CONFIG_I82801B11=y
 CONFIG_SMBIOS=y
 CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
+CONFIG_ACPI_VMGENID=y
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index fd96345f3c..d1d7432f74 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -59,3 +59,4 @@ CONFIG_I82801B11=y
 CONFIG_SMBIOS=y
 CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
+CONFIG_ACPI_VMGENID=y
diff --git a/docs/mach-virt-graphical.cfg b/docs/mach-virt-graphical.cfg
new file mode 100644
index 0000000000..0fdf6846dd
--- /dev/null
+++ b/docs/mach-virt-graphical.cfg
@@ -0,0 +1,281 @@
+# mach-virt - VirtIO guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-aarch64 \
+#     -nodefaults \
+#     -readconfig mach-virt-graphical.cfg \
+#     -cpu host
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals,
+# such as the PL011 UART, plus a PCI Express Root Bus; the
+# user will then have to explicitly add further devices.
+#
+# The PCI Express Root Bus shows up in the guest as:
+#
+#   00:00.0 Host bridge
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+#   00:01.0 Display controller
+#   00.1c.* PCI bridge (PCI Express Root Ports)
+#   01:00.0 SCSI storage controller
+#   02:00.0 Ethernet controller
+#   03:00.0 USB controller
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the virt machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line, but we can configure the guest to use the
+# same GIC version as the host.
+
+[machine]
+  type = "virt"
+  accel = "kvm"
+  gic-version = "host"
+
+[memory]
+  size = "1024"
+
+
+# Firmware configuration
+# =========================================================
+#
+# There are two parts to the firmware: a read-only image
+# containing the executable code, which is shared between
+# guests, and a read/write variable store that is owned
+# by one specific guest, exclusively, and is used to
+# record information such as the UEFI boot order.
+#
+# For any new guest, its permanent, private variable store
+# should initially be copied from the template file
+# provided along with the firmware binary.
+#
+# Depending on the OS distribution you're using on the
+# host, the name of the package containing the firmware
+# binary and variable store template, as well as the paths
+# to the files themselves, will be different. For example:
+#
+# Fedora
+#   edk2-aarch64                                      (pkg)
+#   /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw       (bin)
+#   /usr/share/edk2/aarch64/vars-template-pflash.raw  (var)
+#
+# RHEL
+#   AAVMF                                             (pkg)
+#   /usr/share/AAVMF/AAVMF_CODE.fd                    (bin)
+#   /usr/share/AAVMF/AAVMF_VARS.fd                    (var)
+#
+# Debian/Ubuntu
+#   qemu-efi                                          (pkg)
+#   /usr/share/AAVMF/AAVMF_CODE.fd                    (bin)
+#   /usr/share/AAVMF/AAVMF_VARS.fd                    (var)
+
+[drive "uefi-binary"]
+  file = "/usr/share/AAVMF/AAVMF_CODE.fd"       # CHANGE ME
+  format = "raw"
+  if = "pflash"
+  unit = "0"
+  readonly = "on"
+
+[drive "uefi-varstore"]
+  file = "guest_VARS.fd"                        # CHANGE ME
+  format = "raw"
+  if = "pflash"
+  unit = "1"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+  multifunction = "on"
+
+[device "pcie.2"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "pcie.3"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "pcie.4"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+[device "pcie.5"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.4"
+  port = "5"
+  chassis = "5"
+
+[device "pcie.6"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.5"
+  port = "6"
+  chassis = "6"
+
+[device "pcie.7"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.6"
+  port = "7"
+  chassis = "7"
+
+[device "pcie.8"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.7"
+  port = "8"
+  chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+  driver = "virtio-scsi-pci"
+  bus = "pcie.1"
+  addr = "00.0"
+
+[device "scsi-disk"]
+  driver = "scsi-hd"
+  bus = "scsi.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "scsi-optical-disk"]
+  driver = "scsi-cd"
+  bus = "scsi.0"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "virtio-net-pci"
+  netdev = "hostnet"
+  bus = "pcie.2"
+  addr = "00.0"
+
+
+# USB controller (and input devices)
+# =========================================================
+#
+# We add a virtualization-friendly USB 3.0 controller and
+# a USB keyboard / USB tablet combo so that graphical
+# guests can be controlled appropriately.
+
+[device "usb"]
+  driver = "nec-usb-xhci"
+  bus = "pcie.3"
+  addr = "00.0"
+
+[device "keyboard"]
+  driver = "usb-kbd"
+  bus = "usb.0"
+
+[device "tablet"]
+  driver = "usb-tablet"
+  bus = "usb.0"
+
+
+# Display controller
+# =========================================================
+#
+# We use virtio-gpu because the legacy VGA framebuffer is
+# very troublesome on aarch64, and virtio-gpu is the only
+# video device that doesn't implement it.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+#   -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+  driver = "virtio-gpu"
+  bus = "pcie.0"
+  addr = "01.0"
diff --git a/docs/mach-virt-serial.cfg b/docs/mach-virt-serial.cfg
new file mode 100644
index 0000000000..aee9f1c5a1
--- /dev/null
+++ b/docs/mach-virt-serial.cfg
@@ -0,0 +1,243 @@
+# mach-virt - VirtIO guest (serial console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-aarch64 \
+#     -nodefaults \
+#     -readconfig mach-virt-serial.cfg \
+#     -display none -serial mon:stdio \
+#     -cpu host
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through the serial console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals,
+# such as the PL011 UART, plus a PCI Express Root Bus; the
+# user will then have to explicitly add further devices.
+#
+# The PCI Express Root Bus shows up in the guest as:
+#
+#   00:00.0 Host bridge
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+#   00.1c.* PCI bridge (PCI Express Root Ports)
+#   01:00.0 SCSI storage controller
+#   02:00.0 Ethernet controller
+#
+# More information about these devices is available below.
+#
+# We use '-display none' to prevent QEMU from creating a
+# graphical display window, which would serve no use in
+# this specific configuration, and '-serial mon:stdio' to
+# multiplex the guest's serial console and the QEMU monitor
+# to the host's stdio; use 'Ctrl+A h' to learn how to
+# switch between the two and more.
+
+
+# Machine options
+# =========================================================
+#
+# We use the virt machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line, but we can configure the guest to use the
+# same GIC version as the host.
+
+[machine]
+  type = "virt"
+  accel = "kvm"
+  gic-version = "host"
+
+[memory]
+  size = "1024"
+
+
+# Firmware configuration
+# =========================================================
+#
+# There are two parts to the firmware: a read-only image
+# containing the executable code, which is shared between
+# guests, and a read/write variable store that is owned
+# by one specific guest, exclusively, and is used to
+# record information such as the UEFI boot order.
+#
+# For any new guest, its permanent, private variable store
+# should initially be copied from the template file
+# provided along with the firmware binary.
+#
+# Depending on the OS distribution you're using on the
+# host, the name of the package containing the firmware
+# binary and variable store template, as well as the paths
+# to the files themselves, will be different. For example:
+#
+# Fedora
+#   edk2-aarch64                                      (pkg)
+#   /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw       (bin)
+#   /usr/share/edk2/aarch64/vars-template-pflash.raw  (var)
+#
+# RHEL
+#   AAVMF                                             (pkg)
+#   /usr/share/AAVMF/AAVMF_CODE.fd                    (bin)
+#   /usr/share/AAVMF/AAVMF_VARS.fd                    (var)
+#
+# Debian/Ubuntu
+#   qemu-efi                                          (pkg)
+#   /usr/share/AAVMF/AAVMF_CODE.fd                    (bin)
+#   /usr/share/AAVMF/AAVMF_VARS.fd                    (var)
+
+[drive "uefi-binary"]
+  file = "/usr/share/AAVMF/AAVMF_CODE.fd"       # CHANGE ME
+  format = "raw"
+  if = "pflash"
+  unit = "0"
+  readonly = "on"
+
+[drive "uefi-varstore"]
+  file = "guest_VARS.fd"                        # CHANGE ME
+  format = "raw"
+  if = "pflash"
+  unit = "1"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+  multifunction = "on"
+
+[device "pcie.2"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "pcie.3"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "pcie.4"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+[device "pcie.5"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.4"
+  port = "5"
+  chassis = "5"
+
+[device "pcie.6"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.5"
+  port = "6"
+  chassis = "6"
+
+[device "pcie.7"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.6"
+  port = "7"
+  chassis = "7"
+
+[device "pcie.8"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.7"
+  port = "8"
+  chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+  driver = "virtio-scsi-pci"
+  bus = "pcie.1"
+  addr = "00.0"
+
+[device "scsi-disk"]
+  driver = "scsi-hd"
+  bus = "scsi.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "scsi-optical-disk"]
+  driver = "scsi-cd"
+  bus = "scsi.0"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "virtio-net-pci"
+  netdev = "hostnet"
+  bus = "pcie.2"
+  addr = "00.0"
diff --git a/docs/migration.txt b/docs/migration.txt
index 6503c17685..1b940a829b 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -161,6 +161,11 @@ include/hw/hw.h.
 
 === More about versions ===
 
+Version numbers are intended for major incompatible changes to the
+migration of a device, and using them breaks backwards-migration
+compatibility; in general most changes can be made by adding Subsections
+(see below) or _TEST macros (see below) which won't break compatibility.
+
 You can see that there are several version fields:
 
 - version_id: the maximum version_id supported by VMState for that device.
@@ -175,6 +180,9 @@ version_id.  And the function load_state_old() (if present) is able to
 load state from minimum_version_id_old to minimum_version_id.  This
 function is deprecated and will be removed when no more users are left.
 
+Saving state will always create a section with the 'version_id' value
+and thus can't be loaded by any older QEMU.
+
 ===  Massaging functions ===
 
 Sometimes, it is not enough to be able to save the state directly
@@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation
 not enabled, the values on that fields are garbage and don't need to
 be sent.
 
+Using a condition function that checks a 'property' to determine whether
+to send a subsection allows backwards migration compatibility when
+new subsections are added.
+
+For example;
+   a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and
+      default it to true.
+   b) Add an entry to the HW_COMPAT_ for the previous version
+      that sets the property to false.
+   c) Add a static bool  support_foo function that tests the property.
+   d) Add a subsection with a .needed set to the support_foo function
+   e) (potentially) Add a pre_load that sets up a default value for 'foo'
+      to be used if the subsection isn't loaded.
+
+Now that subsection will not be generated when using an older
+machine type and the migration stream will be accepted by older
+QEMU versions. pre-load functions can be used to initialise state
+on the newer version so that they default to suitable values
+when loading streams created by older QEMU versions that do not
+generate the subsection.
+
+In some cases subsections are added for data that had been accidentally
+omitted by earlier versions; if the missing data causes the migration
+process to succeed but the guest to behave badly then it may be better
+to send the subsection and cause the migration to explicitly fail
+with the unknown subsection error.   If the bad behaviour only happens
+with certain data values, making the subsection conditional on
+the data value (rather than the machine type) allows migrations to succeed
+in most cases.  In general the preference is to tie the subsection to
+the machine type, and allow reliable migrations, unless the behaviour
+from omission of the subsection is really bad.
+
+= Not sending existing elements =
+
+Sometimes members of the VMState are no longer needed;
+  removing them will break migration compatibility
+  making them version dependent and bumping the version will break backwards
+   migration compatibility.
+
+The best way is to:
+  a) Add a new property/compatibility/function in the same way for subsections
+    above.
+  b) replace the VMSTATE macro with the _TEST version of the macro, e.g.:
+     VMSTATE_UINT32(foo, barstruct)
+   becomes
+     VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)
+
+  Sometime in the future when we no longer care about the ancient
+versions these can be killed off.
+
 = Return path =
 
 In most migration scenarios there is only a single data path that runs
@@ -482,3 +540,16 @@ request for a page that has already been sent is ignored.  Duplicate requests
 such as this can happen as a page is sent at about the same time the
 destination accesses it.
 
+=== Postcopy with hugepages ===
+
+Postcopy now works with hugetlbfs backed memory:
+  a) The linux kernel on the destination must support userfault on hugepages.
+  b) The huge-page configuration on the source and destination VMs must be
+     identical; i.e. RAMBlocks on both sides must use the same page size.
+  c) Note that -mem-path /dev/hugepages  will fall back to allocating normal
+     RAM if it doesn't have enough hugepages, triggering (b) to fail.
+     Using -mem-prealloc enforces the allocation using hugepages.
+  d) Care should be taken with the size of hugepage used; postcopy with 2MB
+     hugepages works well, however 1GB hugepages are likely to be problematic
+     since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
+     and until the full page is transferred the destination thread is blocked.
diff --git a/docs/q35-chipset.cfg b/docs/q35-chipset.cfg
deleted file mode 100644
index e4ddb7d9cc..0000000000
--- a/docs/q35-chipset.cfg
+++ /dev/null
@@ -1,152 +0,0 @@
-################################################################
-#
-# qemu -M q35 creates a bare machine with just the very essential
-# chipset devices being present:
-#
-#     00.0 - Host bridge
-#     1f.0 - ISA bridge / LPC
-#     1f.2 - SATA (AHCI) controller
-#     1f.3 - SMBus controller
-#
-# This config file documents the other devices and how they are
-# created.  You can simply use "-readconfig $thisfile" to create
-# them all.  Here is a overview:
-#
-#     19.0 - Ethernet controller (not created, our e1000 emulation
-#                                 doesn't emulate the ich9 device).
-#     1a.* - USB Controller #2 (ehci + uhci companions)
-#     1b.0 - HD Audio Controller
-#     1c.* - PCI Express Ports
-#     1d.* - USB Controller #1 (ehci + uhci companions,
-#                               "qemu -M q35 -usb" creates these too)
-#     1e.0 - PCI Bridge
-#
-
-[device "ich9-ehci-2"]
-  driver = "ich9-usb-ehci2"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1a.7"
-
-[device "ich9-uhci-4"]
-  driver = "ich9-usb-uhci4"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1a.0"
-  masterbus = "ich9-ehci-2.0"
-  firstport = "0"
-
-[device "ich9-uhci-5"]
-  driver = "ich9-usb-uhci5"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1a.1"
-  masterbus = "ich9-ehci-2.0"
-  firstport = "2"
-
-[device "ich9-uhci-6"]
-  driver = "ich9-usb-uhci6"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1a.2"
-  masterbus = "ich9-ehci-2.0"
-  firstport = "4"
-
-
-[device "ich9-hda-audio"]
-  driver = "ich9-intel-hda"
-  bus = "pcie.0"
-  addr = "1b.0"
-
-
-[device "ich9-pcie-port-1"]
-  driver = "ioh3420"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1c.0"
-  port = "1"
-  chassis = "1"
-
-[device "ich9-pcie-port-2"]
-  driver = "ioh3420"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1c.1"
-  port = "2"
-  chassis = "2"
-
-[device "ich9-pcie-port-3"]
-  driver = "ioh3420"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1c.2"
-  port = "3"
-  chassis = "3"
-
-[device "ich9-pcie-port-4"]
-  driver = "ioh3420"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1c.3"
-  port = "4"
-  chassis = "4"
-
-##
-# Example PCIe switch with two downstream ports
-#
-#[device "pcie-switch-upstream-port-1"]
-#  driver = "x3130-upstream"
-#  bus = "ich9-pcie-port-4"
-#  addr = "00.0"
-#
-#[device "pcie-switch-downstream-port-1-1"]
-#  driver = "xio3130-downstream"
-#  multifunction = "on"
-#  bus = "pcie-switch-upstream-port-1"
-#  addr = "00.0"
-#  port = "1"
-#  chassis = "5"
-#
-#[device "pcie-switch-downstream-port-1-2"]
-#  driver = "xio3130-downstream"
-#  multifunction = "on"
-#  bus = "pcie-switch-upstream-port-1"
-#  addr = "00.1"
-#  port = "1"
-#  chassis = "6"
-
-[device "ich9-ehci-1"]
-  driver = "ich9-usb-ehci1"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1d.7"
-
-[device "ich9-uhci-1"]
-  driver = "ich9-usb-uhci1"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1d.0"
-  masterbus = "ich9-ehci-1.0"
-  firstport = "0"
-
-[device "ich9-uhci-2"]
-  driver = "ich9-usb-uhci2"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1d.1"
-  masterbus = "ich9-ehci-1.0"
-  firstport = "2"
-
-[device "ich9-uhci-3"]
-  driver = "ich9-usb-uhci3"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1d.2"
-  masterbus = "ich9-ehci-1.0"
-  firstport = "4"
-
-
-[device "ich9-pci-bridge"]
-  driver = "i82801b11-bridge"
-  bus = "pcie.0"
-  addr = "1e.0"
diff --git a/docs/q35-emulated.cfg b/docs/q35-emulated.cfg
new file mode 100644
index 0000000000..c6416d6545
--- /dev/null
+++ b/docs/q35-emulated.cfg
@@ -0,0 +1,288 @@
+# q35 - Emulated guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-x86_64 \
+#     -nodefaults \
+#     -readconfig q35-emulated.cfg
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of emulated devices that
+# closely resembles that of a physical machine, and will be
+# accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+#   00:00.0 Host bridge
+#   00:1f.0 ISA bridge / LPC
+#   00:1f.2 SATA (AHCI) controller
+#   00:1f.3 SMBus controller
+#
+# This configuration file adds a number of devices that
+# are pretty much guaranteed to be present in every single
+# physical machine based on q35, more specifically:
+#
+#   00:01.0 VGA compatible controller
+#   00:19.0 Ethernet controller
+#   00:1a.* USB controller (#2)
+#   00:1b.0 Audio device
+#   00:1c.* PCI bridge (PCI Express Root Ports)
+#   00:1d.* USB Controller (#1)
+#   00:1e.0 PCI bridge (legacy PCI bridge)
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line.
+
+[machine]
+  type = "q35"
+  accel = "kvm"
+
+[memory]
+  size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We add four PCI Express Root Ports, all sharing the same
+# slot on the PCI Express  Root Bus. These ports support
+# hotplug.
+
+[device "ich9-pcie-port-1"]
+  driver = "ioh3420"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+
+[device "ich9-pcie-port-2"]
+  driver = "ioh3420"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "ich9-pcie-port-3"]
+  driver = "ioh3420"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "ich9-pcie-port-4"]
+  driver = "ioh3420"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+
+# PCI bridge (legacy PCI bridge)
+# =========================================================
+#
+# This bridge can be used to build an independent topology
+# for legacy PCI devices. PCI Express devices should be
+# plugged into PCI Express slots instead, so ideally there
+# will be no devices connected to this bridge.
+
+[device "ich9-pci-bridge"]
+  driver = "i82801b11-bridge"
+  bus = "pcie.0"
+  addr = "1e.0"
+
+
+# SATA storage
+# =========================================================
+#
+# An implicit SATA controller is created automatically for
+# every single q35 guest; here we create a disk, backed by
+# a qcow2 disk image on the host's filesystem, and attach
+# it to that controller so that the guest can use it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "sata-disk"]
+  driver = "ide-hd"
+  bus = "ide.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "sata-optical-disk"]
+  driver = "ide-cd"
+  bus = "ide.1"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# USB controller (#1)
+# =========================================================
+#
+# EHCI controller + UHCI companion controllers.
+
+[device "ich9-ehci-1"]
+  driver = "ich9-usb-ehci1"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1d.7"
+
+[device "ich9-uhci-1"]
+  driver = "ich9-usb-uhci1"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1d.0"
+  masterbus = "ich9-ehci-1.0"
+  firstport = "0"
+
+[device "ich9-uhci-2"]
+  driver = "ich9-usb-uhci2"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1d.1"
+  masterbus = "ich9-ehci-1.0"
+  firstport = "2"
+
+[device "ich9-uhci-3"]
+  driver = "ich9-usb-uhci3"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1d.2"
+  masterbus = "ich9-ehci-1.0"
+  firstport = "4"
+
+
+# USB controller (#2)
+# =========================================================
+#
+# EHCI controller + UHCI companion controllers.
+
+[device "ich9-ehci-2"]
+  driver = "ich9-usb-ehci2"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1a.7"
+
+[device "ich9-uhci-4"]
+  driver = "ich9-usb-uhci4"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1a.0"
+  masterbus = "ich9-ehci-2.0"
+  firstport = "0"
+
+[device "ich9-uhci-5"]
+  driver = "ich9-usb-uhci5"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1a.1"
+  masterbus = "ich9-ehci-2.0"
+  firstport = "2"
+
+[device "ich9-uhci-6"]
+  driver = "ich9-usb-uhci6"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1a.2"
+  masterbus = "ich9-ehci-2.0"
+  firstport = "4"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We add a Gigabit Ethernet interface to the guest; on the
+# host side, we take advantage of user networking so that
+# the QEMU process doesn't require any additional
+# privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "e1000"
+  netdev = "hostnet"
+  bus = "pcie.0"
+  addr = "19.0"
+
+
+# VGA compatible controller
+# =========================================================
+#
+# We use stdvga instead of Cirrus as it supports more video
+# modes and is closer to what actual hardware looks like.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+#   -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+  driver = "VGA"
+  bus = "pcie.0"
+  addr = "01.0"
+
+
+# Audio device
+# =========================================================
+#
+# The sound card is a legacy PCI device that is plugged
+# directly into the PCI Express Root Bus.
+
+[device "ich9-hda-audio"]
+  driver = "ich9-intel-hda"
+  bus = "pcie.0"
+  addr = "1b.0"
+
+[device "ich9-hda-duplex"]
+  driver = "hda-duplex"
+  bus = "ich9-hda-audio.0"
+  cad = "0"
diff --git a/docs/q35-virtio-graphical.cfg b/docs/q35-virtio-graphical.cfg
new file mode 100644
index 0000000000..28bde2fc57
--- /dev/null
+++ b/docs/q35-virtio-graphical.cfg
@@ -0,0 +1,248 @@
+# q35 - VirtIO guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-x86_64 \
+#     -nodefaults \
+#     -readconfig q35-virtio-graphical.cfg
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+#   00:00.0 Host bridge
+#   00:1f.0 ISA bridge / LPC
+#   00:1f.2 SATA (AHCI) controller
+#   00:1f.3 SMBus controller
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+#   00:01.0 VGA compatible controller
+#   00:1b.0 Audio device
+#   00.1c.* PCI bridge (PCI Express Root Ports)
+#   01:00.0 SCSI storage controller
+#   02:00.0 Ethernet controller
+#   03:00.0 USB controller
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+
+[machine]
+  type = "q35"
+  accel = "kvm"
+
+[memory]
+  size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+  multifunction = "on"
+
+[device "pcie.2"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "pcie.3"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "pcie.4"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+[device "pcie.5"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.4"
+  port = "5"
+  chassis = "5"
+
+[device "pcie.6"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.5"
+  port = "6"
+  chassis = "6"
+
+[device "pcie.7"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.6"
+  port = "7"
+  chassis = "7"
+
+[device "pcie.8"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.7"
+  port = "8"
+  chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+  driver = "virtio-scsi-pci"
+  bus = "pcie.1"
+  addr = "00.0"
+
+[device "scsi-disk"]
+  driver = "scsi-hd"
+  bus = "scsi.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "scsi-optical-disk"]
+  driver = "scsi-cd"
+  bus = "scsi.0"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "virtio-net-pci"
+  netdev = "hostnet"
+  bus = "pcie.2"
+  addr = "00.0"
+
+
+# USB controller (and input devices)
+# =========================================================
+#
+# We add a virtualization-friendly USB 3.0 controller and
+# a USB tablet so that graphical guests can be controlled
+# appropriately. A USB keyboard is not needed, as q35
+# guests get a PS/2 one added automatically.
+
+[device "usb"]
+  driver = "nec-usb-xhci"
+  bus = "pcie.3"
+  addr = "00.0"
+
+[device "tablet"]
+  driver = "usb-tablet"
+  bus = "usb.0"
+
+
+# VGA compatible controller
+# =========================================================
+#
+# We plug the QXL video card directly into the PCI Express
+# Root Bus as it is a legacy PCI device; this way, we can
+# reduce the number of PCI Express controllers in the
+# guest.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+#   -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+  driver = "qxl-vga"
+  bus = "pcie.0"
+  addr = "01.0"
+
+
+# Audio device
+# =========================================================
+#
+# Like the video card, the sound card is a legacy PCI
+# device and as such can be plugged directly into the PCI
+# Express Root Bus.
+
+[device "sound"]
+  driver = "ich9-intel-hda"
+  bus = "pcie.0"
+  addr = "1b.0"
+
+[device "duplex"]
+  driver = "hda-duplex"
+  bus = "sound.0"
+  cad = "0"
diff --git a/docs/q35-virtio-serial.cfg b/docs/q35-virtio-serial.cfg
new file mode 100644
index 0000000000..c33c9cc07a
--- /dev/null
+++ b/docs/q35-virtio-serial.cfg
@@ -0,0 +1,193 @@
+# q35 - VirtIO guest (serial console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-x86_64 \
+#     -nodefaults \
+#     -readconfig q35-virtio-serial.cfg \
+#     -display none -serial mon:stdio
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through the serial console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+#   00:00.0 Host bridge
+#   00:1f.0 ISA bridge / LPC
+#   00:1f.2 SATA (AHCI) controller
+#   00:1f.3 SMBus controller
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+#   00.1c.* PCI bridge (PCI Express Root Ports)
+#   01:00.0 SCSI storage controller
+#   02:00.0 Ethernet controller
+#
+# More information about these devices is available below.
+#
+# We use '-display none' to prevent QEMU from creating a
+# graphical display window, which would serve no use in
+# this specific configuration, and '-serial mon:stdio' to
+# multiplex the guest's serial console and the QEMU monitor
+# to the host's stdio; use 'Ctrl+A h' to learn how to
+# switch between the two and more.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+
+[machine]
+  type = "q35"
+  accel = "kvm"
+
+[memory]
+  size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+  multifunction = "on"
+
+[device "pcie.2"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "pcie.3"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "pcie.4"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+[device "pcie.5"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.4"
+  port = "5"
+  chassis = "5"
+
+[device "pcie.6"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.5"
+  port = "6"
+  chassis = "6"
+
+[device "pcie.7"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.6"
+  port = "7"
+  chassis = "7"
+
+[device "pcie.8"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.7"
+  port = "8"
+  chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+  driver = "virtio-scsi-pci"
+  bus = "pcie.1"
+  addr = "00.0"
+
+[device "scsi-disk"]
+  driver = "scsi-hd"
+  bus = "scsi.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "scsi-optical-disk"]
+  driver = "scsi-cd"
+  bus = "scsi.0"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "virtio-net-pci"
+  netdev = "hostnet"
+  bus = "pcie.2"
+  addr = "00.0"
diff --git a/docs/replay.txt b/docs/replay.txt
index 03e193193f..486c1e0e9d 100644
--- a/docs/replay.txt
+++ b/docs/replay.txt
@@ -225,3 +225,10 @@ recording the virtual machine this filter puts all packets coming from
 the outer world into the log. In replay mode packets from the log are
 injected into the network device. All interactions with network backend
 in replay mode are disabled.
+
+Audio devices
+-------------
+
+Audio data is recorded and replay automatically. The command line for recording
+and replaying must contain identical specifications of audio hardware, e.g.:
+ -soundhw ac97
diff --git a/docs/specs/vmgenid.txt b/docs/specs/vmgenid.txt
new file mode 100644
index 0000000000..aa9f518676
--- /dev/null
+++ b/docs/specs/vmgenid.txt
@@ -0,0 +1,245 @@
+VIRTUAL MACHINE GENERATION ID
+=============================
+
+Copyright (C) 2016 Red Hat, Inc.
+Copyright (C) 2017 Skyport Systems, Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+===
+
+The VM generation ID (vmgenid) device is an emulated device which
+exposes a 128-bit, cryptographically random, integer value identifier,
+referred to as a Globally Unique Identifier, or GUID.
+
+This allows management applications (e.g. libvirt) to notify the guest
+operating system when the virtual machine is executed with a different
+configuration (e.g. snapshot execution or creation from a template).  The
+guest operating system notices the change, and is then able to react as
+appropriate by marking its copies of distributed databases as dirty,
+re-initializing its random number generator etc.
+
+
+Requirements
+------------
+
+These requirements are extracted from the "How to implement virtual machine
+generation ID support in a virtualization platform" section of the
+specification, dated August 1, 2012.
+
+
+The document may be found on the web at:
+  http://go.microsoft.com/fwlink/?LinkId=260709
+
+R1a. The generation ID shall live in an 8-byte aligned buffer.
+
+R1b. The buffer holding the generation ID shall be in guest RAM, ROM, or device
+     MMIO range.
+
+R1c. The buffer holding the generation ID shall be kept separate from areas
+     used by the operating system.
+
+R1d. The buffer shall not be covered by an AddressRangeMemory or
+     AddressRangeACPI entry in the E820 or UEFI memory map.
+
+R1e. The generation ID shall not live in a page frame that could be mapped with
+     caching disabled. (In other words, regardless of whether the generation ID
+     lives in RAM, ROM or MMIO, it shall only be mapped as cacheable.)
+
+R2 to R5. [These AML requirements are isolated well enough in the Microsoft
+          specification for us to simply refer to them here.]
+
+R6. The hypervisor shall expose a _HID (hardware identifier) object in the
+    VMGenId device's scope that is unique to the hypervisor vendor.
+
+
+QEMU Implementation
+-------------------
+
+The above-mentioned specification does not dictate which ACPI descriptor table
+will contain the VM Generation ID device.  Other implementations (Hyper-V and
+Xen) put it in the main descriptor table (Differentiated System Description
+Table or DSDT).  For ease of debugging and implementation, we have decided to
+put it in its own Secondary System Description Table, or SSDT.
+
+The following is a dump of the contents from a running system:
+
+# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT
+
+Intel ACPI Component Architecture
+ASL+ Optimizing Compiler version 20150717-64
+Copyright (c) 2000 - 2015 Intel Corporation
+
+Reading ACPI table from file /sys/firmware/acpi/tables/SSDT - Length
+00000198 (0x0000C6)
+ACPI: SSDT 0x0000000000000000 0000C6 (v01 BOCHS  VMGENID  00000001 BXPC
+00000001)
+Acpi table [SSDT] successfully installed and loaded
+Pass 1 parse of [SSDT]
+Pass 2 parse of [SSDT]
+Parsing Deferred Opcodes (Methods/Buffers/Packages/Regions)
+
+Parsing completed
+Disassembly completed
+ASL Output:    ./SSDT.dsl - 1631 bytes
+# cat SSDT.dsl
+/*
+ * Intel ACPI Component Architecture
+ * AML/ASL+ Disassembler version 20150717-64
+ * Copyright (c) 2000 - 2015 Intel Corporation
+ *
+ * Disassembling to symbolic ASL+ operators
+ *
+ * Disassembly of /sys/firmware/acpi/tables/SSDT, Sun Feb  5 00:19:37 2017
+ *
+ * Original Table Header:
+ *     Signature        "SSDT"
+ *     Length           0x000000CA (202)
+ *     Revision         0x01
+ *     Checksum         0x4B
+ *     OEM ID           "BOCHS "
+ *     OEM Table ID     "VMGENID"
+ *     OEM Revision     0x00000001 (1)
+ *     Compiler ID      "BXPC"
+ *     Compiler Version 0x00000001 (1)
+ */
+DefinitionBlock ("/sys/firmware/acpi/tables/SSDT.aml", "SSDT", 1, "BOCHS ",
+"VMGENID", 0x00000001)
+{
+    Name (VGIA, 0x07FFF000)
+    Scope (\_SB)
+    {
+        Device (VGEN)
+        {
+            Name (_HID, "QEMUVGID")  // _HID: Hardware ID
+            Name (_CID, "VM_Gen_Counter")  // _CID: Compatible ID
+            Name (_DDN, "VM_Gen_Counter")  // _DDN: DOS Device Name
+            Method (_STA, 0, NotSerialized)  // _STA: Status
+            {
+                Local0 = 0x0F
+                If ((VGIA == Zero))
+                {
+                    Local0 = Zero
+                }
+
+                Return (Local0)
+            }
+
+            Method (ADDR, 0, NotSerialized)
+            {
+                Local0 = Package (0x02) {}
+                Index (Local0, Zero) = (VGIA + 0x28)
+                Index (Local0, One) = Zero
+                Return (Local0)
+            }
+        }
+    }
+
+    Method (\_GPE._E05, 0, NotSerialized)  // _Exx: Edge-Triggered GPE
+    {
+        Notify (\_SB.VGEN, 0x80) // Status Change
+    }
+}
+
+
+Design Details:
+---------------
+
+Requirements R1a through R1e dictate that the memory holding the
+VM Generation ID must be allocated and owned by the guest firmware,
+in this case BIOS or UEFI.  However, to be useful, QEMU must be able to
+change the contents of the memory at runtime, specifically when starting a
+backed-up or snapshotted image.  In order to do this, QEMU must know the
+address that has been allocated.
+
+The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
+These are data object that are visible to both QEMU and guests, and are
+addressable as sequential files.
+
+More information about fw_cfg can be found in "docs/specs/fw_cfg.txt"
+
+Two fw_cfg blobs are used in this case:
+
+/etc/vmgenid_guid - contains the actual VM Generation ID GUID
+                  - read-only to the guest
+/etc/vmgenid_addr - contains the address of the downloaded vmgenid blob
+                  - writeable by the guest
+
+
+QEMU sends the following commands to the guest at startup:
+
+1. Allocate memory for vmgenid_guid fw_cfg blob.
+2. Write the address of vmgenid_guid into the SSDT (VGIA ACPI variable as
+   shown above in the iasl dump).  Note that this change is not propagated
+   back to QEMU.
+3. Write the address of vmgenid_guid back to QEMU's copy of vmgenid_addr
+   via the fw_cfg DMA interface.
+
+After step 3, QEMU is able to update the contents of vmgenid_guid at will.
+
+Since BIOS or UEFI does not necessarily run when we wish to change the GUID,
+the value of VGIA is persisted via the VMState mechanism.
+
+As spelled out in the specification, any change to the GUID executes an
+ACPI notification.  The exact handler to use is not specified, so the vmgenid
+device uses the first unused one:  \_GPE._E05.
+
+
+Endian-ness Considerations:
+---------------------------
+
+Although not specified in Microsoft's document, it is assumed that the
+device is expected to use little-endian format.
+
+All GUID passed in via command line or monitor are treated as big-endian.
+GUID values displayed via monitor are shown in big-endian format.
+
+
+GUID Storage Format:
+--------------------
+
+In order to implement an OVMF "SDT Header Probe Suppressor", the contents of
+the vmgenid_guid fw_cfg blob are not simply a 128-bit GUID.  There is also
+significant padding in order to align and fill a memory page, as shown in the
+following diagram:
+
++----------------------------------+
+| SSDT with OEM Table ID = VMGENID |
++----------------------------------+
+| ...                              |       TOP OF PAGE
+| VGIA dword object ---------------|-----> +---------------------------+
+| ...                              |       | fw-allocated array for    |
+| _STA method referring to VGIA    |       | "etc/vmgenid_guid"        |
+| ...                              |       +---------------------------+
+| ADDR method referring to VGIA    |       |  0: OVMF SDT Header probe |
+| ...                              |       |     suppressor            |
++----------------------------------+       | 36: padding for 8-byte    |
+                                           |     alignment             |
+                                           | 40: GUID                  |
+                                           | 56: padding to page size  |
+                                           +---------------------------+
+                                           END OF PAGE
+
+
+Device Usage:
+-------------
+
+The device has one property, which may be only be set using the command line:
+
+  guid - sets the value of the GUID.  A special value "auto" instructs
+         QEMU to generate a new random GUID.
+
+For example:
+
+  QEMU  -device vmgenid,guid="324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"
+  QEMU  -device vmgenid,guid=auto
+
+The property may be queried via QMP/HMP:
+
+  (QEMU) query-vm-generation-id
+  {"return": {"guid": "324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"}}
+
+Setting of this parameter is intentionally left out from the QMP/HMP
+interfaces.  There are no known use cases for changing the GUID once QEMU is
+running, and adding this capability would greatly increase the complexity.
diff --git a/exec.c b/exec.c
index 3adf2b1861..785d20f648 100644
--- a/exec.c
+++ b/exec.c
@@ -45,6 +45,12 @@
 #include "exec/address-spaces.h"
 #include "sysemu/xen-mapcache.h"
 #include "trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+
 #endif
 #include "exec/cpu-all.h"
 #include "qemu/rcu_queue.h"
@@ -1518,6 +1524,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb)
     return rb->page_size;
 }
 
+/* Returns the largest size of page in use */
+size_t qemu_ram_pagesize_largest(void)
+{
+    RAMBlock *block;
+    size_t largest = 0;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        largest = MAX(largest, qemu_ram_pagesize(block));
+    }
+
+    return largest;
+}
+
 static int memory_try_enable_merging(void *addr, size_t len)
 {
     if (!machine_mem_merge(current_machine)) {
@@ -3294,4 +3313,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
     rcu_read_unlock();
     return ret;
 }
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+    int ret = -1;
+
+    uint8_t *host_startaddr = rb->host + start;
+
+    if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+        error_report("ram_block_discard_range: Unaligned start address: %p",
+                     host_startaddr);
+        goto err;
+    }
+
+    if ((start + length) <= rb->used_length) {
+        uint8_t *host_endaddr = host_startaddr + length;
+        if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
+            error_report("ram_block_discard_range: Unaligned end address: %p",
+                         host_endaddr);
+            goto err;
+        }
+
+        errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+        if (rb->page_size == qemu_host_page_size) {
+#if defined(CONFIG_MADVISE)
+            /* Note: We need the madvise MADV_DONTNEED behaviour of definitely
+             * freeing the page.
+             */
+            ret = madvise(host_startaddr, length, MADV_DONTNEED);
+#endif
+        } else {
+            /* Huge page case  - unfortunately it can't do DONTNEED, but
+             * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
+             * huge page file.
+             */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+            ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                            start, length);
+#endif
+        }
+        if (ret) {
+            ret = -errno;
+            error_report("ram_block_discard_range: Failed to discard range "
+                         "%s:%" PRIx64 " +%zx (%d)",
+                         rb->idstr, start, length, ret);
+        }
+    } else {
+        error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+                     "/%zx/" RAM_ADDR_FMT")",
+                     rb->idstr, start, length, rb->used_length);
+    }
+
+err:
+    return ret;
+}
+
 #endif
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 485a006aa7..7af14e29aa 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -7492,7 +7492,7 @@ uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
 {
     signed char current_rounding_mode = status->float_rounding_mode;
     set_float_rounding_mode(float_round_to_zero, status);
-    int64_t v = float64_to_uint64(a, status);
+    uint64_t v = float64_to_uint64(a, status);
     set_float_rounding_mode(current_rounding_mode, status);
     return v;
 }
diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs
index 1b120a4a7d..659df6e187 100644
--- a/fsdev/Makefile.objs
+++ b/fsdev/Makefile.objs
@@ -5,7 +5,7 @@ common-obj-y = qemu-fsdev.o 9p-marshal.o 9p-iov-marshal.o
 else
 common-obj-y = qemu-fsdev-dummy.o
 endif
-common-obj-y += qemu-fsdev-opts.o
+common-obj-y += qemu-fsdev-opts.o qemu-fsdev-throttle.o
 
 # Toplevel always builds this; targets without virtio will put it in
 # common-obj-y
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index a56dc8488d..0844a403dc 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -17,6 +17,7 @@
 #include <dirent.h>
 #include <utime.h>
 #include <sys/vfs.h>
+#include "qemu-fsdev-throttle.h"
 
 #define SM_LOCAL_MODE_BITS    0600
 #define SM_LOCAL_DIR_MODE_BITS    0700
@@ -74,6 +75,7 @@ typedef struct FsDriverEntry {
     char *path;
     int export_flags;
     FileOperations *ops;
+    FsThrottle fst;
 } FsDriverEntry;
 
 typedef struct FsContext
@@ -83,6 +85,7 @@ typedef struct FsContext
     int export_flags;
     struct xattr_operations **xops;
     struct extended_ops exops;
+    FsThrottle *fst;
     /* fs driver specific data */
     void *private;
 } FsContext;
diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
index 1dd8c7a24c..bf5713008a 100644
--- a/fsdev/qemu-fsdev-opts.c
+++ b/fsdev/qemu-fsdev-opts.c
@@ -9,6 +9,7 @@
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/module.h"
+#include "qemu/throttle-options.h"
 
 static QemuOptsList qemu_fsdev_opts = {
     .name = "fsdev",
@@ -39,6 +40,8 @@ static QemuOptsList qemu_fsdev_opts = {
             .type = QEMU_OPT_NUMBER,
         },
 
+        THROTTLE_OPTS,
+
         { /*End of list */ }
     },
 };
diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
new file mode 100644
index 0000000000..7ae4e86646
--- /dev/null
+++ b/fsdev/qemu-fsdev-throttle.c
@@ -0,0 +1,118 @@
+/*
+ * Fsdev Throttle
+ *
+ * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu-fsdev-throttle.h"
+#include "qemu/iov.h"
+
+static void fsdev_throttle_read_timer_cb(void *opaque)
+{
+    FsThrottle *fst = opaque;
+    qemu_co_enter_next(&fst->throttled_reqs[false]);
+}
+
+static void fsdev_throttle_write_timer_cb(void *opaque)
+{
+    FsThrottle *fst = opaque;
+    qemu_co_enter_next(&fst->throttled_reqs[true]);
+}
+
+void fsdev_throttle_parse_opts(QemuOpts *opts, FsThrottle *fst, Error **errp)
+{
+    throttle_config_init(&fst->cfg);
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
+        qemu_opt_get_number(opts, "throttling.bps-total", 0);
+    fst->cfg.buckets[THROTTLE_BPS_READ].avg  =
+        qemu_opt_get_number(opts, "throttling.bps-read", 0);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
+        qemu_opt_get_number(opts, "throttling.bps-write", 0);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
+        qemu_opt_get_number(opts, "throttling.iops-total", 0);
+    fst->cfg.buckets[THROTTLE_OPS_READ].avg =
+        qemu_opt_get_number(opts, "throttling.iops-read", 0);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
+        qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
+        qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+    fst->cfg.buckets[THROTTLE_BPS_READ].max  =
+        qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
+        qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
+        qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_READ].max =
+        qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
+        qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
+        qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
+    fst->cfg.buckets[THROTTLE_BPS_READ].burst_length  =
+        qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
+        qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
+        qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
+        qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
+        qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
+    fst->cfg.op_size =
+        qemu_opt_get_number(opts, "throttling.iops-size", 0);
+
+    throttle_is_valid(&fst->cfg, errp);
+}
+
+void fsdev_throttle_init(FsThrottle *fst)
+{
+    if (throttle_enabled(&fst->cfg)) {
+        throttle_init(&fst->ts);
+        throttle_timers_init(&fst->tt,
+                             qemu_get_aio_context(),
+                             QEMU_CLOCK_REALTIME,
+                             fsdev_throttle_read_timer_cb,
+                             fsdev_throttle_write_timer_cb,
+                             fst);
+        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
+        qemu_co_queue_init(&fst->throttled_reqs[0]);
+        qemu_co_queue_init(&fst->throttled_reqs[1]);
+    }
+}
+
+void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write,
+                                            struct iovec *iov, int iovcnt)
+{
+    if (throttle_enabled(&fst->cfg)) {
+        if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) ||
+            !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
+            qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL);
+        }
+
+        throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt));
+
+        if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) &&
+            !throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) {
+            qemu_co_queue_next(&fst->throttled_reqs[is_write]);
+        }
+    }
+}
+
+void fsdev_throttle_cleanup(FsThrottle *fst)
+{
+    if (throttle_enabled(&fst->cfg)) {
+        throttle_timers_destroy(&fst->tt);
+    }
+}
diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
new file mode 100644
index 0000000000..e418643ccb
--- /dev/null
+++ b/fsdev/qemu-fsdev-throttle.h
@@ -0,0 +1,39 @@
+/*
+ * Fsdev Throttle
+ *
+ * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+
+#ifndef _FSDEV_THROTTLE_H
+#define _FSDEV_THROTTLE_H
+
+#include "block/aio.h"
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qapi/error.h"
+#include "qemu/throttle.h"
+
+typedef struct FsThrottle {
+    ThrottleState ts;
+    ThrottleTimers tt;
+    ThrottleConfig cfg;
+    CoQueue      throttled_reqs[2];
+} FsThrottle;
+
+void fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **);
+
+void fsdev_throttle_init(FsThrottle *);
+
+void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool ,
+                                            struct iovec *, int);
+
+void fsdev_throttle_cleanup(FsThrottle *);
+#endif /* _FSDEV_THROTTLE_H */
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index b0f35e6829..a53f105c52 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -802,6 +802,20 @@ Show information about hotpluggable CPUs
 ETEXI
 
 STEXI
+@item info vm-generation-id
+@findex vm-generation-id
+Show Virtual Machine Generation ID
+ETEXI
+
+    {
+        .name       = "vm-generation-id",
+        .args_type  = "",
+        .params     = "",
+        .help       = "Show Virtual Machine Generation ID",
+        .cmd = hmp_info_vm_generation_id,
+    },
+
+STEXI
 @end table
 ETEXI
 
diff --git a/hmp.c b/hmp.c
index 83e287e0a4..261843f7a2 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2045,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
     const char* device = qdict_get_str(qdict, "device");
     const char* command = qdict_get_str(qdict, "command");
     Error *err = NULL;
+    int ret;
 
     blk = blk_by_name(device);
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
         if (bs) {
-            blk = local_blk = blk_new();
-            blk_insert_bs(blk, bs);
+            blk = local_blk = blk_new(0, BLK_PERM_ALL);
+            ret = blk_insert_bs(blk, bs, &err);
+            if (ret < 0) {
+                goto fail;
+            }
         } else {
             goto fail;
         }
@@ -2060,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
     aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    /*
+     * Notably absent: Proper permission management. This is sad, but it seems
+     * almost impossible to achieve without changing the semantics and thereby
+     * limiting the use cases of the qemu-io HMP command.
+     *
+     * In an ideal world we would unconditionally create a new BlockBackend for
+     * qemuio_command(), but we have commands like 'reopen' and want them to
+     * take effect on the exact BlockBackend whose name the user passed instead
+     * of just on a temporary copy of it.
+     *
+     * Another problem is that deleting the temporary BlockBackend involves
+     * draining all requests on it first, but some qemu-iotests cases want to
+     * issue multiple aio_read/write requests and expect them to complete in
+     * the background while the monitor has already returned.
+     *
+     * This is also what prevents us from saving the original permissions and
+     * restoring them later: We can't revoke permissions until all requests
+     * have completed, and we don't know when that is nor can we really let
+     * anything else run before we have revoken them to avoid race conditions.
+     *
+     * What happens now is that command() in qemu-io-cmds.c can extend the
+     * permissions if necessary for the qemu-io command. And they simply stay
+     * extended, possibly resulting in a read-only guest device keeping write
+     * permissions. Ugly, but it appears to be the lesser evil.
+     */
     qemuio_command(blk, command);
 
     aio_context_release(aio_context);
@@ -2576,3 +2605,12 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
 
     qapi_free_HotpluggableCPUList(saved);
 }
+
+void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict)
+{
+    GuidInfo *info = qmp_query_vm_generation_id(NULL);
+    if (info) {
+        monitor_printf(mon, "%s\n", info->guid);
+    }
+    qapi_free_GuidInfo(info);
+}
diff --git a/hmp.h b/hmp.h
index 05daf7cd5c..799fd371fa 100644
--- a/hmp.h
+++ b/hmp.h
@@ -137,5 +137,6 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict);
 void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict);
 void hmp_info_dump(Monitor *mon, const QDict *qdict);
 void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
+void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 7de07e1ba6..f22a3c3654 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -13,7 +13,9 @@
 
 #include "qemu/osdep.h"
 #include "9p.h"
+#include "9p-local.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
 #include "fsdev/qemu-fsdev.h"   /* local_ops */
 #include <arpa/inet.h>
 #include <pwd.h>
@@ -43,40 +45,62 @@
 #define BTRFS_SUPER_MAGIC 0x9123683E
 #endif
 
-#define VIRTFS_META_DIR ".virtfs_metadata"
+typedef struct {
+    int mountfd;
+} LocalData;
 
-static char *local_mapped_attr_path(FsContext *ctx, const char *path)
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+                        mode_t mode)
 {
-    int dirlen;
-    const char *name = strrchr(path, '/');
-    if (name) {
-        dirlen = name - path;
-        ++name;
-    } else {
-        name = path;
-        dirlen = 0;
+    LocalData *data = fs_ctx->private;
+
+    /* All paths are relative to the path data->mountfd points to */
+    while (*path == '/') {
+        path++;
     }
-    return g_strdup_printf("%s/%.*s/%s/%s", ctx->fs_root,
-                           dirlen, path, VIRTFS_META_DIR, name);
+
+    return relative_openat_nofollow(data->mountfd, path, flags, mode);
+}
+
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path)
+{
+    return local_open_nofollow(fs_ctx, path, O_DIRECTORY | O_RDONLY, 0);
+}
+
+static void renameat_preserve_errno(int odirfd, const char *opath, int ndirfd,
+                                    const char *npath)
+{
+    int serrno = errno;
+    renameat(odirfd, opath, ndirfd, npath);
+    errno = serrno;
 }
 
-static FILE *local_fopen(const char *path, const char *mode)
+static void unlinkat_preserve_errno(int dirfd, const char *path, int flags)
+{
+    int serrno = errno;
+    unlinkat(dirfd, path, flags);
+    errno = serrno;
+}
+
+#define VIRTFS_META_DIR ".virtfs_metadata"
+
+static FILE *local_fopenat(int dirfd, const char *name, const char *mode)
 {
     int fd, o_mode = 0;
     FILE *fp;
-    int flags = O_NOFOLLOW;
+    int flags;
     /*
      * only supports two modes
      */
     if (mode[0] == 'r') {
-        flags |= O_RDONLY;
+        flags = O_RDONLY;
     } else if (mode[0] == 'w') {
-        flags |= O_WRONLY | O_TRUNC | O_CREAT;
+        flags = O_WRONLY | O_TRUNC | O_CREAT;
         o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
     } else {
         return NULL;
     }
-    fd = open(path, flags, o_mode);
+    fd = openat_file(dirfd, name, flags, o_mode);
     if (fd == -1) {
         return NULL;
     }
@@ -88,16 +112,20 @@ static FILE *local_fopen(const char *path, const char *mode)
 }
 
 #define ATTR_MAX 100
-static void local_mapped_file_attr(FsContext *ctx, const char *path,
+static void local_mapped_file_attr(int dirfd, const char *name,
                                    struct stat *stbuf)
 {
     FILE *fp;
     char buf[ATTR_MAX];
-    char *attr_path;
+    int map_dirfd;
 
-    attr_path = local_mapped_attr_path(ctx, path);
-    fp = local_fopen(attr_path, "r");
-    g_free(attr_path);
+    map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+    if (map_dirfd == -1) {
+        return;
+    }
+
+    fp = local_fopenat(map_dirfd, name, "r");
+    close_preserve_errno(map_dirfd);
     if (!fp) {
         return;
     }
@@ -119,12 +147,17 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path,
 
 static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
 {
-    int err;
-    char *buffer;
-    char *path = fs_path->data;
+    int err = -1;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
+    int dirfd;
 
-    buffer = rpath(fs_ctx, path);
-    err =  lstat(buffer, stbuf);
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    err = fstatat(dirfd, name, stbuf, AT_SYMLINK_NOFOLLOW);
     if (err) {
         goto err_out;
     }
@@ -134,87 +167,83 @@ static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
         gid_t tmp_gid;
         mode_t tmp_mode;
         dev_t tmp_dev;
-        if (getxattr(buffer, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) {
+
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.uid", &tmp_uid,
+                                 sizeof(uid_t)) > 0) {
             stbuf->st_uid = le32_to_cpu(tmp_uid);
         }
-        if (getxattr(buffer, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.gid", &tmp_gid,
+                                 sizeof(gid_t)) > 0) {
             stbuf->st_gid = le32_to_cpu(tmp_gid);
         }
-        if (getxattr(buffer, "user.virtfs.mode",
-                    &tmp_mode, sizeof(mode_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.mode", &tmp_mode,
+                                 sizeof(mode_t)) > 0) {
             stbuf->st_mode = le32_to_cpu(tmp_mode);
         }
-        if (getxattr(buffer, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.rdev", &tmp_dev,
+                                 sizeof(dev_t)) > 0) {
             stbuf->st_rdev = le64_to_cpu(tmp_dev);
         }
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        local_mapped_file_attr(fs_ctx, path, stbuf);
+        local_mapped_file_attr(dirfd, name, stbuf);
     }
 
 err_out:
-    g_free(buffer);
-    return err;
-}
-
-static int local_create_mapped_attr_dir(FsContext *ctx, const char *path)
-{
-    int err;
-    char *attr_dir;
-    char *tmp_path = g_strdup(path);
-
-    attr_dir = g_strdup_printf("%s/%s/%s",
-             ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR);
-
-    err = mkdir(attr_dir, 0700);
-    if (err < 0 && errno == EEXIST) {
-        err = 0;
-    }
-    g_free(attr_dir);
-    g_free(tmp_path);
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return err;
 }
 
-static int local_set_mapped_file_attr(FsContext *ctx,
-                                      const char *path, FsCred *credp)
+static int local_set_mapped_file_attrat(int dirfd, const char *name,
+                                        FsCred *credp)
 {
     FILE *fp;
-    int ret = 0;
+    int ret;
     char buf[ATTR_MAX];
-    char *attr_path;
     int uid = -1, gid = -1, mode = -1, rdev = -1;
+    int map_dirfd;
+
+    ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700);
+    if (ret < 0 && errno != EEXIST) {
+        return -1;
+    }
 
-    attr_path = local_mapped_attr_path(ctx, path);
-    fp = local_fopen(attr_path, "r");
+    map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+    if (map_dirfd == -1) {
+        return -1;
+    }
+
+    fp = local_fopenat(map_dirfd, name, "r");
     if (!fp) {
-        goto create_map_file;
+        if (errno == ENOENT) {
+            goto update_map_file;
+        } else {
+            close_preserve_errno(map_dirfd);
+            return -1;
+        }
     }
     memset(buf, 0, ATTR_MAX);
     while (fgets(buf, ATTR_MAX, fp)) {
         if (!strncmp(buf, "virtfs.uid", 10)) {
-            uid = atoi(buf+11);
+            uid = atoi(buf + 11);
         } else if (!strncmp(buf, "virtfs.gid", 10)) {
-            gid = atoi(buf+11);
+            gid = atoi(buf + 11);
         } else if (!strncmp(buf, "virtfs.mode", 11)) {
-            mode = atoi(buf+12);
+            mode = atoi(buf + 12);
         } else if (!strncmp(buf, "virtfs.rdev", 11)) {
-            rdev = atoi(buf+12);
+            rdev = atoi(buf + 12);
         }
         memset(buf, 0, ATTR_MAX);
     }
     fclose(fp);
-    goto update_map_file;
-
-create_map_file:
-    ret = local_create_mapped_attr_dir(ctx, path);
-    if (ret < 0) {
-        goto err_out;
-    }
 
 update_map_file:
-    fp = local_fopen(attr_path, "w");
+    fp = local_fopenat(map_dirfd, name, "w");
+    close_preserve_errno(map_dirfd);
     if (!fp) {
-        ret = -1;
-        goto err_out;
+        return -1;
     }
 
     if (credp->fc_uid != -1) {
@@ -230,7 +259,6 @@ update_map_file:
         rdev = credp->fc_rdev;
     }
 
-
     if (uid != -1) {
         fprintf(fp, "virtfs.uid=%d\n", uid);
     }
@@ -245,39 +273,71 @@ update_map_file:
     }
     fclose(fp);
 
-err_out:
-    g_free(attr_path);
+    return 0;
+}
+
+static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
+{
+    int fd, ret;
+
+    /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW).
+     * Unfortunately, the linux kernel doesn't implement it yet. As an
+     * alternative, let's open the file and use fchmod() instead. This
+     * may fail depending on the permissions of the file, but it is the
+     * best we can do to avoid TOCTTOU. We first try to open read-only
+     * in case name points to a directory. If that fails, we try write-only
+     * in case name doesn't point to a directory.
+     */
+    fd = openat_file(dirfd, name, O_RDONLY, 0);
+    if (fd == -1) {
+        /* In case the file is writable-only and isn't a directory. */
+        if (errno == EACCES) {
+            fd = openat_file(dirfd, name, O_WRONLY, 0);
+        }
+        if (fd == -1 && errno == EISDIR) {
+            errno = EACCES;
+        }
+    }
+    if (fd == -1) {
+        return -1;
+    }
+    ret = fchmod(fd, mode);
+    close_preserve_errno(fd);
     return ret;
 }
 
-static int local_set_xattr(const char *path, FsCred *credp)
+static int local_set_xattrat(int dirfd, const char *path, FsCred *credp)
 {
     int err;
 
     if (credp->fc_uid != -1) {
         uint32_t tmp_uid = cpu_to_le32(credp->fc_uid);
-        err = setxattr(path, "user.virtfs.uid", &tmp_uid, sizeof(uid_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.uid", &tmp_uid,
+                                   sizeof(uid_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_gid != -1) {
         uint32_t tmp_gid = cpu_to_le32(credp->fc_gid);
-        err = setxattr(path, "user.virtfs.gid", &tmp_gid, sizeof(gid_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.gid", &tmp_gid,
+                                   sizeof(gid_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_mode != -1) {
         uint32_t tmp_mode = cpu_to_le32(credp->fc_mode);
-        err = setxattr(path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode,
+                                   sizeof(mode_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_rdev != -1) {
         uint64_t tmp_rdev = cpu_to_le64(credp->fc_rdev);
-        err = setxattr(path, "user.virtfs.rdev", &tmp_rdev, sizeof(dev_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.rdev", &tmp_rdev,
+                                   sizeof(dev_t), 0);
         if (err) {
             return err;
         }
@@ -285,58 +345,56 @@ static int local_set_xattr(const char *path, FsCred *credp)
     return 0;
 }
 
-static int local_post_create_passthrough(FsContext *fs_ctx, const char *path,
-                                         FsCred *credp)
+static int local_set_cred_passthrough(FsContext *fs_ctx, int dirfd,
+                                      const char *name, FsCred *credp)
 {
-    char *buffer;
-
-    buffer = rpath(fs_ctx, path);
-    if (lchown(buffer, credp->fc_uid, credp->fc_gid) < 0) {
+    if (fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) < 0) {
         /*
          * If we fail to change ownership and if we are
          * using security model none. Ignore the error
          */
         if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) {
-            goto err;
+            return -1;
         }
     }
 
-    if (chmod(buffer, credp->fc_mode & 07777) < 0) {
-        goto err;
-    }
-
-    g_free(buffer);
-    return 0;
-err:
-    g_free(buffer);
-    return -1;
+    return fchmodat_nofollow(dirfd, name, credp->fc_mode & 07777);
 }
 
 static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path,
                               char *buf, size_t bufsz)
 {
     ssize_t tsize = -1;
-    char *buffer;
-    char *path = fs_path->data;
 
     if ((fs_ctx->export_flags & V9FS_SM_MAPPED) ||
         (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
         int fd;
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, O_RDONLY | O_NOFOLLOW);
-        g_free(buffer);
+
+        fd = local_open_nofollow(fs_ctx, fs_path->data, O_RDONLY, 0);
         if (fd == -1) {
             return -1;
         }
         do {
             tsize = read(fd, (void *)buf, bufsz);
         } while (tsize == -1 && errno == EINTR);
-        close(fd);
+        close_preserve_errno(fd);
     } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
                (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        tsize = readlink(buffer, buf, bufsz);
-        g_free(buffer);
+        char *dirpath = g_path_get_dirname(fs_path->data);
+        char *name = g_path_get_basename(fs_path->data);
+        int dirfd;
+
+        dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+        if (dirfd == -1) {
+            goto out;
+        }
+
+        tsize = readlinkat(dirfd, name, buf, bufsz);
+        close_preserve_errno(dirfd);
+    out:
+        g_free(name);
+        g_free(dirpath);
     }
     return tsize;
 }
@@ -354,27 +412,32 @@ static int local_closedir(FsContext *ctx, V9fsFidOpenState *fs)
 static int local_open(FsContext *ctx, V9fsPath *fs_path,
                       int flags, V9fsFidOpenState *fs)
 {
-    char *buffer;
-    char *path = fs_path->data;
+    int fd;
 
-    buffer = rpath(ctx, path);
-    fs->fd = open(buffer, flags | O_NOFOLLOW);
-    g_free(buffer);
+    fd = local_open_nofollow(ctx, fs_path->data, flags, 0);
+    if (fd == -1) {
+        return -1;
+    }
+    fs->fd = fd;
     return fs->fd;
 }
 
 static int local_opendir(FsContext *ctx,
                          V9fsPath *fs_path, V9fsFidOpenState *fs)
 {
-    char *buffer;
-    char *path = fs_path->data;
+    int dirfd;
+    DIR *stream;
+
+    dirfd = local_opendir_nofollow(ctx, fs_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    buffer = rpath(ctx, path);
-    fs->dir.stream = opendir(buffer);
-    g_free(buffer);
-    if (!fs->dir.stream) {
+    stream = fdopendir(dirfd);
+    if (!stream) {
         return -1;
     }
+    fs->dir.stream = stream;
     return 0;
 }
 
@@ -463,145 +526,122 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
 
 static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
 {
-    char *buffer;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
     int ret = -1;
-    char *path = fs_path->data;
+    int dirfd;
+
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
     if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        ret = local_set_xattr(buffer, credp);
-        g_free(buffer);
+        ret = local_set_xattrat(dirfd, name, credp);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        return local_set_mapped_file_attr(fs_ctx, path, credp);
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        ret = chmod(buffer, credp->fc_mode);
-        g_free(buffer);
+        ret = local_set_mapped_file_attrat(dirfd, name, credp);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        ret = fchmodat_nofollow(dirfd, name, credp->fc_mode);
     }
+    close_preserve_errno(dirfd);
+
+out:
+    g_free(dirpath);
+    g_free(name);
     return ret;
 }
 
 static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
                        const char *name, FsCred *credp)
 {
-    char *path;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0);
         if (err == -1) {
             goto out;
         }
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
 
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0);
-        if (err == -1) {
-            goto out;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, credp->fc_mode, credp->fc_rdev);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
         if (err == -1) {
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, 0);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
 static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path,
                        const char *name, FsCred *credp)
 {
-    char *path;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS);
         if (err == -1) {
             goto out;
         }
-        credp->fc_mode = credp->fc_mode|S_IFDIR;
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS);
-        if (err == -1) {
-            goto out;
+        credp->fc_mode = credp->fc_mode | S_IFDIR;
+
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        credp->fc_mode = credp->fc_mode|S_IFDIR;
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, credp->fc_mode);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = mkdirat(dirfd, name, credp->fc_mode);
         if (err == -1) {
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, AT_REMOVEDIR);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
@@ -649,62 +689,45 @@ static int local_fstat(FsContext *fs_ctx, int fid_type,
 static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
                        int flags, FsCred *credp, V9fsFidOpenState *fs)
 {
-    char *path;
     int fd = -1;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
     /*
      * Mark all the open to not follow symlinks
      */
     flags |= O_NOFOLLOW;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
     /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, SM_LOCAL_MODE_BITS);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
         credp->fc_mode = credp->fc_mode|S_IFREG;
-        /* Set cleint credentials in xattr */
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            /* Set cleint credentials in xattr */
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, SM_LOCAL_MODE_BITS);
-        if (fd == -1) {
-            err = fd;
-            goto out;
-        }
-        credp->fc_mode = credp->fc_mode|S_IFREG;
-        /* Set client credentials in .virtfs_metadata directory files */
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
                (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, credp->fc_mode);
+        fd = openat_file(dirfd, name, flags, credp->fc_mode);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
@@ -713,12 +736,11 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
     goto out;
 
 err_end:
-    close(fd);
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name,
+                            flags & O_DIRECTORY ? AT_REMOVEDIR : 0);
+    close_preserve_errno(fd);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
@@ -727,23 +749,22 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
                          V9fsPath *dir_path, const char *name, FsCred *credp)
 {
     int err = -1;
-    int serrno = 0;
-    char *newpath;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    newpath = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
     /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
         int fd;
         ssize_t oldpath_size, write_size;
-        buffer = rpath(fs_ctx, newpath);
-        fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS);
+
+        fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR,
+                         SM_LOCAL_MODE_BITS);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
         /* Write the oldpath (target) to the file. */
@@ -751,218 +772,204 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
         do {
             write_size = write(fd, (void *)oldpath, oldpath_size);
         } while (write_size == -1 && errno == EINTR);
+        close_preserve_errno(fd);
 
         if (write_size != oldpath_size) {
-            serrno = errno;
-            close(fd);
-            err = -1;
             goto err_end;
         }
-        close(fd);
         /* Set cleint credentials in symlink's xattr */
-        credp->fc_mode = credp->fc_mode|S_IFLNK;
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        int fd;
-        ssize_t oldpath_size, write_size;
-        buffer = rpath(fs_ctx, newpath);
-        fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS);
-        if (fd == -1) {
-            err = fd;
-            goto out;
-        }
-        /* Write the oldpath (target) to the file. */
-        oldpath_size = strlen(oldpath);
-        do {
-            write_size = write(fd, (void *)oldpath, oldpath_size);
-        } while (write_size == -1 && errno == EINTR);
+        credp->fc_mode = credp->fc_mode | S_IFLNK;
 
-        if (write_size != oldpath_size) {
-            serrno = errno;
-            close(fd);
-            err = -1;
-            goto err_end;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        close(fd);
-        /* Set cleint credentials in symlink's xattr */
-        credp->fc_mode = credp->fc_mode|S_IFLNK;
-        err = local_set_mapped_file_attr(fs_ctx, newpath, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, newpath);
-        err = symlink(oldpath, buffer);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = symlinkat(oldpath, dirfd, name);
         if (err) {
             goto out;
         }
-        err = lchown(buffer, credp->fc_uid, credp->fc_gid);
+        err = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                       AT_SYMLINK_NOFOLLOW);
         if (err == -1) {
             /*
              * If we fail to change ownership and if we are
              * using security model none. Ignore the error
              */
             if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) {
-                serrno = errno;
                 goto err_end;
-            } else
+            } else {
                 err = 0;
+            }
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, 0);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
 static int local_link(FsContext *ctx, V9fsPath *oldpath,
                       V9fsPath *dirpath, const char *name)
 {
-    int ret;
-    V9fsString newpath;
-    char *buffer, *buffer1;
+    char *odirpath = g_path_get_dirname(oldpath->data);
+    char *oname = g_path_get_basename(oldpath->data);
+    int ret = -1;
+    int odirfd, ndirfd;
 
-    v9fs_string_init(&newpath);
-    v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name);
+    odirfd = local_opendir_nofollow(ctx, odirpath);
+    if (odirfd == -1) {
+        goto out;
+    }
+
+    ndirfd = local_opendir_nofollow(ctx, dirpath->data);
+    if (ndirfd == -1) {
+        close_preserve_errno(odirfd);
+        goto out;
+    }
 
-    buffer = rpath(ctx, oldpath->data);
-    buffer1 = rpath(ctx, newpath.data);
-    ret = link(buffer, buffer1);
-    g_free(buffer);
-    g_free(buffer1);
+    ret = linkat(odirfd, oname, ndirfd, name, 0);
+    if (ret < 0) {
+        goto out_close;
+    }
 
     /* now link the virtfs_metadata files */
-    if (!ret && (ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
-        /* Link the .virtfs_metadata files. Create the metada directory */
-        ret = local_create_mapped_attr_dir(ctx, newpath.data);
-        if (ret < 0) {
-            goto err_out;
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        int omap_dirfd, nmap_dirfd;
+
+        ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700);
+        if (ret < 0 && errno != EEXIST) {
+            goto err_undo_link;
+        }
+
+        omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR);
+        if (omap_dirfd == -1) {
+            goto err;
+        }
+
+        nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR);
+        if (nmap_dirfd == -1) {
+            close_preserve_errno(omap_dirfd);
+            goto err;
         }
-        buffer = local_mapped_attr_path(ctx, oldpath->data);
-        buffer1 = local_mapped_attr_path(ctx, newpath.data);
-        ret = link(buffer, buffer1);
-        g_free(buffer);
-        g_free(buffer1);
+
+        ret = linkat(omap_dirfd, oname, nmap_dirfd, name, 0);
+        close_preserve_errno(nmap_dirfd);
+        close_preserve_errno(omap_dirfd);
         if (ret < 0 && errno != ENOENT) {
-            goto err_out;
+            goto err_undo_link;
         }
-    }
-err_out:
-    v9fs_string_free(&newpath);
-    return ret;
-}
 
-static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
-{
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+        ret = 0;
+    }
+    goto out_close;
 
-    buffer = rpath(ctx, path);
-    ret = truncate(buffer, size);
-    g_free(buffer);
+err:
+    ret = -1;
+err_undo_link:
+    unlinkat_preserve_errno(ndirfd, name, 0);
+out_close:
+    close_preserve_errno(ndirfd);
+    close_preserve_errno(odirfd);
+out:
+    g_free(oname);
+    g_free(odirpath);
     return ret;
 }
 
-static int local_rename(FsContext *ctx, const char *oldpath,
-                        const char *newpath)
+static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
 {
-    int err;
-    char *buffer, *buffer1;
+    int fd, ret;
 
-    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        err = local_create_mapped_attr_dir(ctx, newpath);
-        if (err < 0) {
-            return err;
-        }
-        /* rename the .virtfs_metadata files */
-        buffer = local_mapped_attr_path(ctx, oldpath);
-        buffer1 = local_mapped_attr_path(ctx, newpath);
-        err = rename(buffer, buffer1);
-        g_free(buffer);
-        g_free(buffer1);
-        if (err < 0 && errno != ENOENT) {
-            return err;
-        }
+    fd = local_open_nofollow(ctx, fs_path->data, O_WRONLY, 0);
+    if (fd == -1) {
+        return -1;
     }
-
-    buffer = rpath(ctx, oldpath);
-    buffer1 = rpath(ctx, newpath);
-    err = rename(buffer, buffer1);
-    g_free(buffer);
-    g_free(buffer1);
-    return err;
+    ret = ftruncate(fd, size);
+    close_preserve_errno(fd);
+    return ret;
 }
 
 static int local_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
 {
-    char *buffer;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
     int ret = -1;
-    char *path = fs_path->data;
+    int dirfd;
+
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
     if ((credp->fc_uid == -1 && credp->fc_gid == -1) ||
         (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
         (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        ret = lchown(buffer, credp->fc_uid, credp->fc_gid);
-        g_free(buffer);
+        ret = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                       AT_SYMLINK_NOFOLLOW);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        ret = local_set_xattr(buffer, credp);
-        g_free(buffer);
+        ret = local_set_xattrat(dirfd, name, credp);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        return local_set_mapped_file_attr(fs_ctx, path, credp);
+        ret = local_set_mapped_file_attrat(dirfd, name, credp);
     }
+
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return ret;
 }
 
 static int local_utimensat(FsContext *s, V9fsPath *fs_path,
                            const struct timespec *buf)
 {
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
+    int dirfd, ret = -1;
+
+    dirfd = local_opendir_nofollow(s, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
-    buffer = rpath(s, path);
-    ret = qemu_utimens(buffer, buf);
-    g_free(buffer);
+    ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(name);
     return ret;
 }
 
-static int local_remove(FsContext *ctx, const char *path)
+static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name,
+                                 int flags)
 {
-    int err;
-    struct stat stbuf;
-    char *buffer;
+    int ret = -1;
 
     if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(ctx, path);
-        err =  lstat(buffer, &stbuf);
-        g_free(buffer);
-        if (err) {
-            goto err_out;
-        }
-        /*
-         * If directory remove .virtfs_metadata contained in the
-         * directory
-         */
-        if (S_ISDIR(stbuf.st_mode)) {
-            buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root,
-                                     path, VIRTFS_META_DIR);
-            err = remove(buffer);
-            g_free(buffer);
-            if (err < 0 && errno != ENOENT) {
+        int map_dirfd;
+
+        if (flags == AT_REMOVEDIR) {
+            int fd;
+
+            fd = openat(dirfd, name, O_RDONLY | O_DIRECTORY | O_PATH);
+            if (fd == -1) {
+                goto err_out;
+            }
+            /*
+             * If directory remove .virtfs_metadata contained in the
+             * directory
+             */
+            ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR);
+            close_preserve_errno(fd);
+            if (ret < 0 && errno != ENOENT) {
                 /*
                  * We didn't had the .virtfs_metadata file. May be file created
                  * in non-mapped mode ?. Ignore ENOENT.
@@ -972,12 +979,12 @@ static int local_remove(FsContext *ctx, const char *path)
         }
         /*
          * Now remove the name from parent directory
-         * .virtfs_metadata directory
+         * .virtfs_metadata directory.
          */
-        buffer = local_mapped_attr_path(ctx, path);
-        err = remove(buffer);
-        g_free(buffer);
-        if (err < 0 && errno != ENOENT) {
+        map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+        ret = unlinkat(map_dirfd, name, 0);
+        close_preserve_errno(map_dirfd);
+        if (ret < 0 && errno != ENOENT) {
             /*
              * We didn't had the .virtfs_metadata file. May be file created
              * in non-mapped mode ?. Ignore ENOENT.
@@ -986,10 +993,39 @@ static int local_remove(FsContext *ctx, const char *path)
         }
     }
 
-    buffer = rpath(ctx, path);
-    err = remove(buffer);
-    g_free(buffer);
+    ret = unlinkat(dirfd, name, flags);
+err_out:
+    return ret;
+}
+
+static int local_remove(FsContext *ctx, const char *path)
+{
+    struct stat stbuf;
+    char *dirpath = g_path_get_dirname(path);
+    char *name = g_path_get_basename(path);
+    int flags = 0;
+    int dirfd;
+    int err = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd) {
+        goto out;
+    }
+
+    if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) {
+        goto err_out;
+    }
+
+    if (S_ISDIR(stbuf.st_mode)) {
+        flags |= AT_REMOVEDIR;
+    }
+
+    err = local_unlinkat_common(ctx, dirfd, name, flags);
 err_out:
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return err;
 }
 
@@ -1013,13 +1049,11 @@ static int local_fsync(FsContext *ctx, int fid_type,
 
 static int local_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf)
 {
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+    int fd, ret;
 
-    buffer = rpath(s, path);
-    ret = statfs(buffer, stbuf);
-    g_free(buffer);
+    fd = local_open_nofollow(s, fs_path->data, O_RDONLY, 0);
+    ret = fstatfs(fd, stbuf);
+    close_preserve_errno(fd);
     return ret;
 }
 
@@ -1071,70 +1105,105 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir,
                           const char *new_name)
 {
     int ret;
-    V9fsString old_full_name, new_full_name;
+    int odirfd, ndirfd;
+
+    odirfd = local_opendir_nofollow(ctx, olddir->data);
+    if (odirfd == -1) {
+        return -1;
+    }
+
+    ndirfd = local_opendir_nofollow(ctx, newdir->data);
+    if (ndirfd == -1) {
+        close_preserve_errno(odirfd);
+        return -1;
+    }
+
+    ret = renameat(odirfd, old_name, ndirfd, new_name);
+    if (ret < 0) {
+        goto out;
+    }
+
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        int omap_dirfd, nmap_dirfd;
 
-    v9fs_string_init(&old_full_name);
-    v9fs_string_init(&new_full_name);
+        ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700);
+        if (ret < 0 && errno != EEXIST) {
+            goto err_undo_rename;
+        }
+
+        omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR);
+        if (omap_dirfd == -1) {
+            goto err;
+        }
+
+        nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR);
+        if (nmap_dirfd == -1) {
+            close_preserve_errno(omap_dirfd);
+            goto err;
+        }
 
-    v9fs_string_sprintf(&old_full_name, "%s/%s", olddir->data, old_name);
-    v9fs_string_sprintf(&new_full_name, "%s/%s", newdir->data, new_name);
+        /* rename the .virtfs_metadata files */
+        ret = renameat(omap_dirfd, old_name, nmap_dirfd, new_name);
+        close_preserve_errno(nmap_dirfd);
+        close_preserve_errno(omap_dirfd);
+        if (ret < 0 && errno != ENOENT) {
+            goto err_undo_rename;
+        }
 
-    ret = local_rename(ctx, old_full_name.data, new_full_name.data);
-    v9fs_string_free(&old_full_name);
-    v9fs_string_free(&new_full_name);
+        ret = 0;
+    }
+    goto out;
+
+err:
+    ret = -1;
+err_undo_rename:
+    renameat_preserve_errno(ndirfd, new_name, odirfd, old_name);
+out:
+    close_preserve_errno(ndirfd);
+    close_preserve_errno(odirfd);
     return ret;
 }
 
+static void v9fs_path_init_dirname(V9fsPath *path, const char *str)
+{
+    path->data = g_path_get_dirname(str);
+    path->size = strlen(path->data) + 1;
+}
+
+static int local_rename(FsContext *ctx, const char *oldpath,
+                        const char *newpath)
+{
+    int err;
+    char *oname = g_path_get_basename(oldpath);
+    char *nname = g_path_get_basename(newpath);
+    V9fsPath olddir, newdir;
+
+    v9fs_path_init_dirname(&olddir, oldpath);
+    v9fs_path_init_dirname(&newdir, newpath);
+
+    err = local_renameat(ctx, &olddir, oname, &newdir, nname);
+
+    v9fs_path_free(&newdir);
+    v9fs_path_free(&olddir);
+    g_free(nname);
+    g_free(oname);
+
+    return err;
+}
+
 static int local_unlinkat(FsContext *ctx, V9fsPath *dir,
                           const char *name, int flags)
 {
     int ret;
-    V9fsString fullname;
-    char *buffer;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-
-    v9fs_string_sprintf(&fullname, "%s/%s", dir->data, name);
-    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        if (flags == AT_REMOVEDIR) {
-            /*
-             * If directory remove .virtfs_metadata contained in the
-             * directory
-             */
-            buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root,
-                                     fullname.data, VIRTFS_META_DIR);
-            ret = remove(buffer);
-            g_free(buffer);
-            if (ret < 0 && errno != ENOENT) {
-                /*
-                 * We didn't had the .virtfs_metadata file. May be file created
-                 * in non-mapped mode ?. Ignore ENOENT.
-                 */
-                goto err_out;
-            }
-        }
-        /*
-         * Now remove the name from parent directory
-         * .virtfs_metadata directory.
-         */
-        buffer = local_mapped_attr_path(ctx, fullname.data);
-        ret = remove(buffer);
-        g_free(buffer);
-        if (ret < 0 && errno != ENOENT) {
-            /*
-             * We didn't had the .virtfs_metadata file. May be file created
-             * in non-mapped mode ?. Ignore ENOENT.
-             */
-            goto err_out;
-        }
+    dirfd = local_opendir_nofollow(ctx, dir->data);
+    if (dirfd == -1) {
+        return -1;
     }
-    /* Remove the name finally */
-    buffer = rpath(ctx, fullname.data);
-    ret = remove(buffer);
-    g_free(buffer);
 
-err_out:
-    v9fs_string_free(&fullname);
+    ret = local_unlinkat_common(ctx, dirfd, name, flags);
+    close_preserve_errno(dirfd);
     return ret;
 }
 
@@ -1168,8 +1237,31 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path,
 
 static int local_init(FsContext *ctx)
 {
-    int err = 0;
     struct statfs stbuf;
+    LocalData *data = g_malloc(sizeof(*data));
+
+    data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY);
+    if (data->mountfd == -1) {
+        goto err;
+    }
+
+#ifdef FS_IOC_GETVERSION
+    /*
+     * use ioc_getversion only if the ioctl is definied
+     */
+    if (fstatfs(data->mountfd, &stbuf) < 0) {
+        close_preserve_errno(data->mountfd);
+        goto err;
+    }
+    switch (stbuf.f_type) {
+    case EXT2_SUPER_MAGIC:
+    case BTRFS_SUPER_MAGIC:
+    case REISERFS_SUPER_MAGIC:
+    case XFS_SUPER_MAGIC:
+        ctx->exops.get_st_gen = local_ioc_getversion;
+        break;
+    }
+#endif
 
     if (ctx->export_flags & V9FS_SM_PASSTHROUGH) {
         ctx->xops = passthrough_xattr_ops;
@@ -1185,29 +1277,28 @@ static int local_init(FsContext *ctx)
         ctx->xops = passthrough_xattr_ops;
     }
     ctx->export_flags |= V9FS_PATHNAME_FSCONTEXT;
-#ifdef FS_IOC_GETVERSION
-    /*
-     * use ioc_getversion only if the iocl is definied
-     */
-    err = statfs(ctx->fs_root, &stbuf);
-    if (!err) {
-        switch (stbuf.f_type) {
-        case EXT2_SUPER_MAGIC:
-        case BTRFS_SUPER_MAGIC:
-        case REISERFS_SUPER_MAGIC:
-        case XFS_SUPER_MAGIC:
-            ctx->exops.get_st_gen = local_ioc_getversion;
-            break;
-        }
-    }
-#endif
-    return err;
+
+    ctx->private = data;
+    return 0;
+
+err:
+    g_free(data);
+    return -1;
+}
+
+static void local_cleanup(FsContext *ctx)
+{
+    LocalData *data = ctx->private;
+
+    close(data->mountfd);
+    g_free(data);
 }
 
 static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 {
     const char *sec_model = qemu_opt_get(opts, "security_model");
     const char *path = qemu_opt_get(opts, "path");
+    Error *err = NULL;
 
     if (!sec_model) {
         error_report("Security model not specified, local fs needs security model");
@@ -1236,6 +1327,13 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
         error_report("fsdev: No path specified");
         return -1;
     }
+
+    fsdev_throttle_parse_opts(opts, &fse->fst, &err);
+    if (err) {
+        error_reportf_err(err, "Throttle configuration is not valid: ");
+        return -1;
+    }
+
     fse->path = g_strdup(path);
 
     return 0;
@@ -1244,6 +1342,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 FileOperations local_ops = {
     .parse_opts = local_parse_opts,
     .init  = local_init,
+    .cleanup = local_cleanup,
     .lstat = local_lstat,
     .readlink = local_readlink,
     .close = local_close,
diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h
new file mode 100644
index 0000000000..32c72749d9
--- /dev/null
+++ b/hw/9pfs/9p-local.h
@@ -0,0 +1,20 @@
+/*
+ * 9p local backend utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_LOCAL_H
+#define QEMU_9P_LOCAL_H
+
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+                        mode_t mode);
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path);
+
+#endif
diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c
index ec003181cd..bbf89064f7 100644
--- a/hw/9pfs/9p-posix-acl.c
+++ b/hw/9pfs/9p-posix-acl.c
@@ -25,13 +25,7 @@
 static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size);
 }
 
 static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
@@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
 static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size,
+                                   flags);
 }
 
 static int mp_pacl_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
     int ret;
-    char *buffer;
 
-    buffer = rpath(ctx, path);
-    ret  = lremovexattr(buffer, MAP_ACL_ACCESS);
+    ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS);
     if (ret == -1 && errno == ENODATA) {
         /*
          * We don't get ENODATA error when trying to remove a
@@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx,
         errno = 0;
         ret = 0;
     }
-    g_free(buffer);
     return ret;
 }
 
 static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size);
 }
 
 static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
@@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
 static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size,
+                                   flags);
 }
 
 static int mp_dacl_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
     int ret;
-    char *buffer;
 
-    buffer = rpath(ctx, path);
-    ret  = lremovexattr(buffer, MAP_ACL_DEFAULT);
+    ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT);
     if (ret == -1 && errno == ENODATA) {
         /*
          * We don't get ENODATA error when trying to remove a
@@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx,
         errno = 0;
         ret = 0;
     }
-    g_free(buffer);
     return ret;
 }
 
diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c
new file mode 100644
index 0000000000..fdb4d57376
--- /dev/null
+++ b/hw/9pfs/9p-util.c
@@ -0,0 +1,69 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+                             mode_t mode)
+{
+    int fd;
+
+    fd = dup(dirfd);
+    if (fd == -1) {
+        return -1;
+    }
+
+    while (*path) {
+        const char *c;
+        int next_fd;
+        char *head;
+
+        /* Only relative paths without consecutive slashes */
+        assert(path[0] != '/');
+
+        head = g_strdup(path);
+        c = strchr(path, '/');
+        if (c) {
+            head[c - path] = 0;
+            next_fd = openat_dir(fd, head);
+        } else {
+            next_fd = openat_file(fd, head, flags, mode);
+        }
+        g_free(head);
+        if (next_fd == -1) {
+            close_preserve_errno(fd);
+            return -1;
+        }
+        close(fd);
+        fd = next_fd;
+
+        if (!c) {
+            break;
+        }
+        path = c + 1;
+    }
+
+    return fd;
+}
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                             void *value, size_t size)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lgetxattr(proc_path, name, value, size);
+    g_free(proc_path);
+    return ret;
+}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
new file mode 100644
index 0000000000..091f3ce88e
--- /dev/null
+++ b/hw/9pfs/9p-util.h
@@ -0,0 +1,54 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_UTIL_H
+#define QEMU_9P_UTIL_H
+
+static inline void close_preserve_errno(int fd)
+{
+    int serrno = errno;
+    close(fd);
+    errno = serrno;
+}
+
+static inline int openat_dir(int dirfd, const char *name)
+{
+    return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH);
+}
+
+static inline int openat_file(int dirfd, const char *name, int flags,
+                              mode_t mode)
+{
+    int fd, serrno, ret;
+
+    fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
+                mode);
+    if (fd == -1) {
+        return -1;
+    }
+
+    serrno = errno;
+    /* O_NONBLOCK was only needed to open the file. Let's drop it. */
+    ret = fcntl(fd, F_SETFL, flags);
+    assert(!ret);
+    errno = serrno;
+    return fd;
+}
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+                             mode_t mode);
+ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name,
+                             void *value, size_t size);
+int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
+                         void *value, size_t size, int flags);
+
+#endif
diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c
index f87530c8b5..2c90817b75 100644
--- a/hw/9pfs/9p-xattr-user.c
+++ b/hw/9pfs/9p-xattr-user.c
@@ -20,9 +20,6 @@
 static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
         errno = ENOATTR;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, name, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, name, value, size);
 }
 
 static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
@@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
 static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
         errno = EACCES;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, name, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, name, value, size, flags);
 }
 
 static int mp_user_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
-    char *buffer;
-    int ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx,
         errno = EACCES;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lremovexattr(buffer, name);
-    g_free(buffer);
-    return ret;
+    return local_removexattr_nofollow(ctx, path, name);
 }
 
 XattrOperations mapped_user_xattr = {
diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c
index 5d8595ed93..eec160b3c2 100644
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -15,6 +15,8 @@
 #include "9p.h"
 #include "fsdev/file-op-9p.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
+#include "9p-local.h"
 
 
 static XattrOperations *get_xattr_operations(XattrOperations **h,
@@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path,
     return name_size;
 }
 
+static ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+                                     char *list, size_t size)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = llistxattr(proc_path, list, size);
+    g_free(proc_path);
+    return ret;
+}
 
 /*
  * Get the list and pass to each layer to find out whether
@@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path,
                         void *value, size_t vsize)
 {
     ssize_t size = 0;
-    char *buffer;
     void *ovalue = value;
     XattrOperations *xops;
     char *orig_value, *orig_value_start;
     ssize_t xattr_len, parsed_len = 0, attr_len;
+    char *dirpath, *name;
+    int dirfd;
 
     /* Get the actual len */
-    buffer = rpath(ctx, path);
-    xattr_len = llistxattr(buffer, value, 0);
+    dirpath = g_path_get_dirname(path);
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    g_free(dirpath);
+    if (dirfd == -1) {
+        return -1;
+    }
+
+    name = g_path_get_basename(path);
+    xattr_len = flistxattrat_nofollow(dirfd, name, value, 0);
     if (xattr_len <= 0) {
-        g_free(buffer);
+        g_free(name);
+        close_preserve_errno(dirfd);
         return xattr_len;
     }
 
     /* Now fetch the xattr and find the actual size */
     orig_value = g_malloc(xattr_len);
-    xattr_len = llistxattr(buffer, orig_value, xattr_len);
-    g_free(buffer);
+    xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len);
+    g_free(name);
+    close_preserve_errno(dirfd);
+    if (xattr_len < 0) {
+        return -1;
+    }
 
     /* store the orig pointer */
     orig_value_start = orig_value;
@@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx,
 
 }
 
+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fgetxattrat_nofollow(dirfd, filename, name, value, size);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size)
+{
+    return local_getxattr_nofollow(ctx, path, name, value, size);
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                         void *value, size_t size, int flags)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lsetxattr(proc_path, name, value, size, flags);
+    g_free(proc_path);
+    return ret;
+}
+
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size,
+                                int flags)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+                size_t size, int flags)
+{
+    return local_setxattr_nofollow(ctx, path, name, value, size, flags);
+}
+
+static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+                                       const char *name)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lremovexattr(proc_path, name);
+    g_free(proc_path);
+    return ret;
+}
+
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+                                   const char *name)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fremovexattrat_nofollow(dirfd, filename, name);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+int pt_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+    return local_removexattr_nofollow(ctx, path, name);
+}
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+                        void *value, size_t size)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size, int flags)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+                         void *value, size_t size)
+{
+    return 0;
+}
+
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
 XattrOperations *mapped_xattr_ops[] = {
     &mapped_user_xattr,
     &mapped_pacl_xattr,
diff --git a/hw/9pfs/9p-xattr.h b/hw/9pfs/9p-xattr.h
index a853ea641c..0d83996575 100644
--- a/hw/9pfs/9p-xattr.h
+++ b/hw/9pfs/9p-xattr.h
@@ -29,6 +29,13 @@ typedef struct xattr_operations
                        const char *path, const char *name);
 } XattrOperations;
 
+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size);
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size,
+                                int flags);
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+                                   const char *name);
 
 extern XattrOperations mapped_user_xattr;
 extern XattrOperations passthrough_user_xattr;
@@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value,
 int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name,
                           void *value, size_t size, int flags);
 int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name);
+
 ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value,
                      size_t size);
-
-static inline ssize_t pt_getxattr(FsContext *ctx, const char *path,
-                                  const char *name, void *value, size_t size)
-{
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, name, value, size);
-    g_free(buffer);
-    return ret;
-}
-
-static inline int pt_setxattr(FsContext *ctx, const char *path,
-                              const char *name, void *value,
-                              size_t size, int flags)
-{
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, name, value, size, flags);
-    g_free(buffer);
-    return ret;
-}
-
-static inline int pt_removexattr(FsContext *ctx,
-                                 const char *path, const char *name)
-{
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lremovexattr(path, name);
-    g_free(buffer);
-    return ret;
-}
-
-static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path,
-                                      const char *name, void *value,
-                                      size_t size)
-{
-    errno = ENOTSUP;
-    return -1;
-}
-
-static inline int notsup_setxattr(FsContext *ctx, const char *path,
-                                  const char *name, void *value,
-                                  size_t size, int flags)
-{
-    errno = ENOTSUP;
-    return -1;
-}
-
-static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path,
-                                       char *name, void *value, size_t size)
-{
-    return 0;
-}
-
-static inline int notsup_removexattr(FsContext *ctx,
-                                     const char *path, const char *name)
-{
-    errno = ENOTSUP;
-    return -1;
-}
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size);
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+                size_t size, int flags);
+int pt_removexattr(FsContext *ctx, const char *path, const char *name);
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+                        void *value, size_t size);
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size, int flags);
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+                         void *value, size_t size);
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name);
 
 #endif
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 3af1c93dc8..76c9247c77 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -3010,7 +3010,6 @@ out_nofid:
  */
 static void coroutine_fn v9fs_lock(void *opaque)
 {
-    int8_t status;
     V9fsFlock flock;
     size_t offset = 7;
     struct stat stbuf;
@@ -3018,7 +3017,6 @@ static void coroutine_fn v9fs_lock(void *opaque)
     int32_t fid, err = 0;
     V9fsPDU *pdu = opaque;
 
-    status = P9_LOCK_ERROR;
     v9fs_string_init(&flock.client_id);
     err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
                         &flock.flags, &flock.start, &flock.length,
@@ -3044,15 +3042,15 @@ static void coroutine_fn v9fs_lock(void *opaque)
     if (err < 0) {
         goto out;
     }
-    status = P9_LOCK_SUCCESS;
+    err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
+    if (err < 0) {
+        goto out;
+    }
+    err += offset;
+    trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    err = pdu_marshal(pdu, offset, "b", status);
-    if (err > 0) {
-        err += offset;
-    }
-    trace_v9fs_lock_return(pdu->tag, pdu->id, status);
     pdu_complete(pdu, err);
     v9fs_string_free(&flock.client_id);
 }
@@ -3531,6 +3529,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
         error_setg(errp, "share path %s is not a directory", fse->path);
         goto out;
     }
+
+    s->ctx.fst = &fse->fst;
+    fsdev_throttle_init(s->ctx.fst);
+
     v9fs_path_free(&path);
 
     rc = 0;
@@ -3551,6 +3553,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
     if (s->ops->cleanup) {
         s->ops->cleanup(&s->ctx);
     }
+    fsdev_throttle_cleanup(s->ctx.fst);
     g_free(s->tag);
     g_free(s->ctx.fs_root);
 }
diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs
index da0ae0cfdb..32197e6671 100644
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,4 +1,4 @@
-common-obj-y  = 9p.o
+common-obj-y  = 9p.o 9p-util.o
 common-obj-y += 9p-local.o 9p-xattr.o
 common-obj-y += 9p-xattr-user.o 9p-posix-acl.o
 common-obj-y += coth.o cofs.o codir.o cofile.o
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 120e267108..88791bc327 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -247,6 +247,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
+    fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset);
@@ -266,6 +267,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
+    fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset);
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 6acf79860a..11c35bcb44 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -5,6 +5,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o
 common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
 common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
+common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
 common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 
 common-obj-y += acpi_interface.o
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index b2a1e4033b..c6f2032dec 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1559,6 +1559,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
     tables->rsdp = g_array_new(false, true /* clear */, 1);
     tables->table_data = g_array_new(false, true /* clear */, 1);
     tables->tcpalog = g_array_new(false, true /* clear */, 1);
+    tables->vmgenid = g_array_new(false, true /* clear */, 1);
     tables->linker = bios_linker_loader_init();
 }
 
@@ -1568,6 +1569,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
     g_array_free(tables->rsdp, true);
     g_array_free(tables->table_data, true);
     g_array_free(tables->tcpalog, mfre);
+    g_array_free(tables->vmgenid, mfre);
 }
 
 /* Build rsdt table */
diff --git a/hw/acpi/bios-linker-loader.c b/hw/acpi/bios-linker-loader.c
index d963ebe241..046183a0f1 100644
--- a/hw/acpi/bios-linker-loader.c
+++ b/hw/acpi/bios-linker-loader.c
@@ -78,6 +78,21 @@ struct BiosLinkerLoaderEntry {
             uint32_t length;
         } cksum;
 
+        /*
+         * COMMAND_WRITE_POINTER - write the fw_cfg file (originating from
+         * @dest_file) at @wr_pointer.offset, by adding a pointer to
+         * @src_offset within the table originating from @src_file.
+         * 1,2,4 or 8 byte unsigned addition is used depending on
+         * @wr_pointer.size.
+         */
+        struct {
+            char dest_file[BIOS_LINKER_LOADER_FILESZ];
+            char src_file[BIOS_LINKER_LOADER_FILESZ];
+            uint32_t dst_offset;
+            uint32_t src_offset;
+            uint8_t size;
+        } wr_pointer;
+
         /* padding */
         char pad[124];
     };
@@ -85,9 +100,10 @@ struct BiosLinkerLoaderEntry {
 typedef struct BiosLinkerLoaderEntry BiosLinkerLoaderEntry;
 
 enum {
-    BIOS_LINKER_LOADER_COMMAND_ALLOCATE     = 0x1,
-    BIOS_LINKER_LOADER_COMMAND_ADD_POINTER  = 0x2,
-    BIOS_LINKER_LOADER_COMMAND_ADD_CHECKSUM = 0x3,
+    BIOS_LINKER_LOADER_COMMAND_ALLOCATE          = 0x1,
+    BIOS_LINKER_LOADER_COMMAND_ADD_POINTER       = 0x2,
+    BIOS_LINKER_LOADER_COMMAND_ADD_CHECKSUM      = 0x3,
+    BIOS_LINKER_LOADER_COMMAND_WRITE_POINTER     = 0x4,
 };
 
 enum {
@@ -278,3 +294,47 @@ void bios_linker_loader_add_pointer(BIOSLinker *linker,
 
     g_array_append_vals(linker->cmd_blob, &entry, sizeof entry);
 }
+
+/*
+ * bios_linker_loader_write_pointer: ask guest to write a pointer to the
+ * source file into the destination file, and write it back to QEMU via
+ * fw_cfg DMA.
+ *
+ * @linker: linker object instance
+ * @dest_file: destination file that must be written
+ * @dst_patched_offset: location within destination file blob to be patched
+ *                      with the pointer to @src_file, in bytes
+ * @dst_patched_offset_size: size of the pointer to be patched
+ *                      at @dst_patched_offset in @dest_file blob, in bytes
+ * @src_file: source file who's address must be taken
+ * @src_offset: location within source file blob to which
+ *              @dest_file+@dst_patched_offset will point to after
+ *              firmware's executed WRITE_POINTER command
+ */
+void bios_linker_loader_write_pointer(BIOSLinker *linker,
+                                    const char *dest_file,
+                                    uint32_t dst_patched_offset,
+                                    uint8_t dst_patched_size,
+                                    const char *src_file,
+                                    uint32_t src_offset)
+{
+    BiosLinkerLoaderEntry entry;
+    const BiosLinkerFileEntry *source_file =
+        bios_linker_find_file(linker, src_file);
+
+    assert(source_file);
+    assert(src_offset < source_file->blob->len);
+    memset(&entry, 0, sizeof entry);
+    strncpy(entry.wr_pointer.dest_file, dest_file,
+            sizeof entry.wr_pointer.dest_file - 1);
+    strncpy(entry.wr_pointer.src_file, src_file,
+            sizeof entry.wr_pointer.src_file - 1);
+    entry.command = cpu_to_le32(BIOS_LINKER_LOADER_COMMAND_WRITE_POINTER);
+    entry.wr_pointer.dst_offset = cpu_to_le32(dst_patched_offset);
+    entry.wr_pointer.src_offset = cpu_to_le32(src_offset);
+    entry.wr_pointer.size = dst_patched_size;
+    assert(dst_patched_size == 1 || dst_patched_size == 2 ||
+           dst_patched_size == 4 || dst_patched_size == 8);
+
+    g_array_append_vals(linker->cmd_blob, &entry, sizeof entry);
+}
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index d957d1e30d..2b0f3e1bfb 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -49,7 +49,6 @@
 
 #define ACPI_PCIHP_ADDR 0xae00
 #define ACPI_PCIHP_SIZE 0x0014
-#define ACPI_PCIHP_LEGACY_SIZE 0x000f
 #define PCI_UP_BASE 0x0000
 #define PCI_DOWN_BASE 0x0004
 #define PCI_EJ_BASE 0x0008
@@ -302,16 +301,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
     s->root= root_bus;
     s->legacy_piix = !bridges_enabled;
 
-    if (s->legacy_piix) {
-        unsigned *bus_bsel = g_malloc(sizeof *bus_bsel);
-
-        s->io_len = ACPI_PCIHP_LEGACY_SIZE;
-
-        *bus_bsel = ACPI_PCIHP_BSEL_DEFAULT;
-        object_property_add_uint32_ptr(OBJECT(root_bus), ACPI_PCIHP_PROP_BSEL,
-                                       bus_bsel, NULL);
-    }
-
     memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
                           "acpi-pci-hotplug", s->io_len);
     memory_region_add_subregion(address_space_io, s->io_base, &s->io);
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 6d99fe407c..a553a7e110 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -440,6 +440,8 @@ static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
 {
     PIIX4PMState *s = opaque;
 
+    /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive
+     * and it's not hot-unpluggable */
     qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort);
 }
 
diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c
index 8ce7daf23a..b4adac88cd 100644
--- a/hw/acpi/tco.c
+++ b/hw/acpi/tco.c
@@ -49,6 +49,7 @@ static inline void tco_timer_reload(TCOIORegs *tr)
 static inline void tco_timer_stop(TCOIORegs *tr)
 {
     tr->expire_time = -1;
+    timer_del(tr->tco_timer);
 }
 
 static void tco_timer_expired(void *opaque)
diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
new file mode 100644
index 0000000000..744f2847da
--- /dev/null
+++ b/hw/acpi/vmgenid.c
@@ -0,0 +1,258 @@
+/*
+ *  Virtual Machine Generation ID Device
+ *
+ *  Copyright (C) 2017 Skyport Systems.
+ *
+ *  Author: Ben Warren <ben@skyportsystems.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qmp-commands.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/vmgenid.h"
+#include "hw/nvram/fw_cfg.h"
+#include "sysemu/sysemu.h"
+
+void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
+                        BIOSLinker *linker)
+{
+    Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
+    uint32_t vgia_offset;
+    QemuUUID guid_le;
+
+    /* Fill in the GUID values.  These need to be converted to little-endian
+     * first, since that's what the guest expects
+     */
+    g_array_set_size(guid, VMGENID_FW_CFG_SIZE - ARRAY_SIZE(guid_le.data));
+    guid_le = vms->guid;
+    qemu_uuid_bswap(&guid_le);
+    /* The GUID is written at a fixed offset into the fw_cfg file
+     * in order to implement the "OVMF SDT Header probe suppressor"
+     * see docs/specs/vmgenid.txt for more details
+     */
+    g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data,
+                        ARRAY_SIZE(guid_le.data));
+
+    /* Put this in a separate SSDT table */
+    ssdt = init_aml_allocator();
+
+    /* Reserve space for header */
+    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
+
+    /* Storage for the GUID address */
+    vgia_offset = table_data->len +
+        build_append_named_dword(ssdt->buf, "VGIA");
+    scope = aml_scope("\\_SB");
+    dev = aml_device("VGEN");
+    aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVGID")));
+    aml_append(dev, aml_name_decl("_CID", aml_string("VM_Gen_Counter")));
+    aml_append(dev, aml_name_decl("_DDN", aml_string("VM_Gen_Counter")));
+
+    /* Simple status method to check that address is linked and non-zero */
+    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_int(0xf), addr));
+    if_ctx = aml_if(aml_equal(aml_name("VGIA"), aml_int(0)));
+    aml_append(if_ctx, aml_store(aml_int(0), addr));
+    aml_append(method, if_ctx);
+    aml_append(method, aml_return(addr));
+    aml_append(dev, method);
+
+    /* the ADDR method returns two 32-bit words representing the lower and
+     * upper halves * of the physical address of the fw_cfg blob
+     * (holding the GUID)
+     */
+    method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
+
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_package(2), addr));
+
+    aml_append(method, aml_store(aml_add(aml_name("VGIA"),
+                                         aml_int(VMGENID_GUID_OFFSET), NULL),
+                                 aml_index(addr, aml_int(0))));
+    aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1))));
+    aml_append(method, aml_return(addr));
+
+    aml_append(dev, method);
+    aml_append(scope, dev);
+    aml_append(ssdt, scope);
+
+    /* attach an ACPI notify */
+    method = aml_method("\\_GPE._E05", 0, AML_NOTSERIALIZED);
+    aml_append(method, aml_notify(aml_name("\\_SB.VGEN"), aml_int(0x80)));
+    aml_append(ssdt, method);
+
+    g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
+
+    /* Allocate guest memory for the Data fw_cfg blob */
+    bios_linker_loader_alloc(linker, VMGENID_GUID_FW_CFG_FILE, guid, 4096,
+                             false /* page boundary, high memory */);
+
+    /* Patch address of GUID fw_cfg blob into the ADDR fw_cfg blob
+     * so QEMU can write the GUID there.  The address is expected to be
+     * < 4GB, but write 64 bits anyway.
+     * The address that is patched in is offset in order to implement
+     * the "OVMF SDT Header probe suppressor"
+     * see docs/specs/vmgenid.txt for more details.
+     */
+    bios_linker_loader_write_pointer(linker,
+        VMGENID_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
+        VMGENID_GUID_FW_CFG_FILE, VMGENID_GUID_OFFSET);
+
+    /* Patch address of GUID fw_cfg blob into the AML so OSPM can retrieve
+     * and read it.  Note that while we provide storage for 64 bits, only
+     * the least-signficant 32 get patched into AML.
+     */
+    bios_linker_loader_add_pointer(linker,
+        ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t),
+        VMGENID_GUID_FW_CFG_FILE, 0);
+
+    build_header(linker, table_data,
+        (void *)(table_data->data + table_data->len - ssdt->buf->len),
+        "SSDT", ssdt->buf->len, 1, NULL, "VMGENID");
+    free_aml_allocator();
+}
+
+void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid)
+{
+    /* Create a read-only fw_cfg file for GUID */
+    fw_cfg_add_file(s, VMGENID_GUID_FW_CFG_FILE, guid->data,
+                    VMGENID_FW_CFG_SIZE);
+    /* Create a read-write fw_cfg file for Address */
+    fw_cfg_add_file_callback(s, VMGENID_ADDR_FW_CFG_FILE, NULL, NULL,
+                             vms->vmgenid_addr_le,
+                             ARRAY_SIZE(vms->vmgenid_addr_le), false);
+}
+
+static void vmgenid_update_guest(VmGenIdState *vms)
+{
+    Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL);
+    uint32_t vmgenid_addr;
+    QemuUUID guid_le;
+
+    if (obj) {
+        /* Write the GUID to guest memory */
+        memcpy(&vmgenid_addr, vms->vmgenid_addr_le, sizeof(vmgenid_addr));
+        vmgenid_addr = le32_to_cpu(vmgenid_addr);
+        /* A zero value in vmgenid_addr means that BIOS has not yet written
+         * the address
+         */
+        if (vmgenid_addr) {
+            /* QemuUUID has the first three words as big-endian, and expect
+             * that any GUIDs passed in will always be BE.  The guest,
+             * however, will expect the fields to be little-endian.
+             * Perform a byte swap immediately before writing.
+             */
+            guid_le = vms->guid;
+            qemu_uuid_bswap(&guid_le);
+            /* The GUID is written at a fixed offset into the fw_cfg file
+             * in order to implement the "OVMF SDT Header probe suppressor"
+             * see docs/specs/vmgenid.txt for more details.
+             */
+            cpu_physical_memory_write(vmgenid_addr, guid_le.data,
+                                      sizeof(guid_le.data));
+            /* Send _GPE.E05 event */
+            acpi_send_event(DEVICE(obj), ACPI_VMGENID_CHANGE_STATUS);
+        }
+    }
+}
+
+static void vmgenid_set_guid(Object *obj, const char *value, Error **errp)
+{
+    VmGenIdState *vms = VMGENID(obj);
+
+    if (!strcmp(value, "auto")) {
+        qemu_uuid_generate(&vms->guid);
+    } else if (qemu_uuid_parse(value, &vms->guid) < 0) {
+        error_setg(errp, "'%s. %s': Failed to parse GUID string: %s",
+                   object_get_typename(OBJECT(vms)), VMGENID_GUID, value);
+        return;
+    }
+
+    vmgenid_update_guest(vms);
+}
+
+/* After restoring an image, we need to update the guest memory and notify
+ * it of a potential change to VM Generation ID
+ */
+static int vmgenid_post_load(void *opaque, int version_id)
+{
+    VmGenIdState *vms = opaque;
+    vmgenid_update_guest(vms);
+    return 0;
+}
+
+static const VMStateDescription vmstate_vmgenid = {
+    .name = "vmgenid",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = vmgenid_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(vmgenid_addr_le, VmGenIdState, sizeof(uint64_t)),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void vmgenid_handle_reset(void *opaque)
+{
+    VmGenIdState *vms = VMGENID(opaque);
+    /* Clear the guest-allocated GUID address when the VM resets */
+    memset(vms->vmgenid_addr_le, 0, ARRAY_SIZE(vms->vmgenid_addr_le));
+}
+
+static void vmgenid_realize(DeviceState *dev, Error **errp)
+{
+    VmGenIdState *vms = VMGENID(dev);
+    qemu_register_reset(vmgenid_handle_reset, vms);
+}
+
+static void vmgenid_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_vmgenid;
+    dc->realize = vmgenid_realize;
+    dc->hotpluggable = false;
+
+    object_class_property_add_str(klass, VMGENID_GUID, NULL,
+                                  vmgenid_set_guid, NULL);
+    object_class_property_set_description(klass, VMGENID_GUID,
+                                    "Set Global Unique Identifier "
+                                    "(big-endian) or auto for random value",
+                                    NULL);
+}
+
+static const TypeInfo vmgenid_device_info = {
+    .name          = VMGENID_DEVICE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(VmGenIdState),
+    .class_init    = vmgenid_device_class_init,
+};
+
+static void vmgenid_register_types(void)
+{
+    type_register_static(&vmgenid_device_info);
+}
+
+type_init(vmgenid_register_types)
+
+GuidInfo *qmp_query_vm_generation_id(Error **errp)
+{
+    GuidInfo *info;
+    VmGenIdState *vms;
+    Object *obj = find_vmgenid_dev();
+
+    if (!obj) {
+        return NULL;
+    }
+    vms = VMGENID(obj);
+
+    info = g_malloc0(sizeof(*info));
+    info->guid = qemu_uuid_unparse_strdup(&vms->guid);
+    return info;
+}
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 0c9ca7bfa0..c8a11f2b53 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/arm/armv7m.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -17,147 +18,260 @@
 #include "elf.h"
 #include "sysemu/qtest.h"
 #include "qemu/error-report.h"
+#include "exec/address-spaces.h"
 
 /* Bitbanded IO.  Each word corresponds to a single bit.  */
 
 /* Get the byte address of the real memory for a bitband access.  */
-static inline uint32_t bitband_addr(void * opaque, uint32_t addr)
+static inline hwaddr bitband_addr(BitBandState *s, hwaddr offset)
 {
-    uint32_t res;
-
-    res = *(uint32_t *)opaque;
-    res |= (addr & 0x1ffffff) >> 5;
-    return res;
-
+    return s->base | (offset & 0x1ffffff) >> 5;
 }
 
-static uint32_t bitband_readb(void *opaque, hwaddr offset)
+static MemTxResult bitband_read(void *opaque, hwaddr offset,
+                                uint64_t *data, unsigned size, MemTxAttrs attrs)
 {
-    uint8_t v;
-    cpu_physical_memory_read(bitband_addr(opaque, offset), &v, 1);
-    return (v & (1 << ((offset >> 2) & 7))) != 0;
+    BitBandState *s = opaque;
+    uint8_t buf[4];
+    MemTxResult res;
+    int bitpos, bit;
+    hwaddr addr;
+
+    assert(size <= 4);
+
+    /* Find address in underlying memory and round down to multiple of size */
+    addr = bitband_addr(s, offset) & (-size);
+    res = address_space_read(s->source_as, addr, attrs, buf, size);
+    if (res) {
+        return res;
+    }
+    /* Bit position in the N bytes read... */
+    bitpos = (offset >> 2) & ((size * 8) - 1);
+    /* ...converted to byte in buffer and bit in byte */
+    bit = (buf[bitpos >> 3] >> (bitpos & 7)) & 1;
+    *data = bit;
+    return MEMTX_OK;
 }
 
-static void bitband_writeb(void *opaque, hwaddr offset,
-                           uint32_t value)
+static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value,
+                                 unsigned size, MemTxAttrs attrs)
 {
-    uint32_t addr;
-    uint8_t mask;
-    uint8_t v;
-    addr = bitband_addr(opaque, offset);
-    mask = (1 << ((offset >> 2) & 7));
-    cpu_physical_memory_read(addr, &v, 1);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 1);
+    BitBandState *s = opaque;
+    uint8_t buf[4];
+    MemTxResult res;
+    int bitpos, bit;
+    hwaddr addr;
+
+    assert(size <= 4);
+
+    /* Find address in underlying memory and round down to multiple of size */
+    addr = bitband_addr(s, offset) & (-size);
+    res = address_space_read(s->source_as, addr, attrs, buf, size);
+    if (res) {
+        return res;
+    }
+    /* Bit position in the N bytes read... */
+    bitpos = (offset >> 2) & ((size * 8) - 1);
+    /* ...converted to byte in buffer and bit in byte */
+    bit = 1 << (bitpos & 7);
+    if (value & 1) {
+        buf[bitpos >> 3] |= bit;
+    } else {
+        buf[bitpos >> 3] &= ~bit;
+    }
+    return address_space_write(s->source_as, addr, attrs, buf, size);
 }
 
-static uint32_t bitband_readw(void *opaque, hwaddr offset)
+static const MemoryRegionOps bitband_ops = {
+    .read_with_attrs = bitband_read,
+    .write_with_attrs = bitband_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 4,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+};
+
+static void bitband_init(Object *obj)
 {
-    uint32_t addr;
-    uint16_t mask;
-    uint16_t v;
-    addr = bitband_addr(opaque, offset) & ~1;
-    mask = (1 << ((offset >> 2) & 15));
-    mask = tswap16(mask);
-    cpu_physical_memory_read(addr, &v, 2);
-    return (v & mask) != 0;
+    BitBandState *s = BITBAND(obj);
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+
+    object_property_add_link(obj, "source-memory",
+                             TYPE_MEMORY_REGION,
+                             (Object **)&s->source_memory,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
+    memory_region_init_io(&s->iomem, obj, &bitband_ops, s,
+                          "bitband", 0x02000000);
+    sysbus_init_mmio(dev, &s->iomem);
 }
 
-static void bitband_writew(void *opaque, hwaddr offset,
-                           uint32_t value)
+static void bitband_realize(DeviceState *dev, Error **errp)
 {
-    uint32_t addr;
-    uint16_t mask;
-    uint16_t v;
-    addr = bitband_addr(opaque, offset) & ~1;
-    mask = (1 << ((offset >> 2) & 15));
-    mask = tswap16(mask);
-    cpu_physical_memory_read(addr, &v, 2);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 2);
+    BitBandState *s = BITBAND(dev);
+
+    if (!s->source_memory) {
+        error_setg(errp, "source-memory property not set");
+        return;
+    }
+
+    s->source_as = address_space_init_shareable(s->source_memory,
+                                                "bitband-source");
 }
 
-static uint32_t bitband_readl(void *opaque, hwaddr offset)
+/* Board init.  */
+
+static const hwaddr bitband_input_addr[ARMV7M_NUM_BITBANDS] = {
+    0x20000000, 0x40000000
+};
+
+static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = {
+    0x22000000, 0x42000000
+};
+
+static void armv7m_instance_init(Object *obj)
 {
-    uint32_t addr;
-    uint32_t mask;
-    uint32_t v;
-    addr = bitband_addr(opaque, offset) & ~3;
-    mask = (1 << ((offset >> 2) & 31));
-    mask = tswap32(mask);
-    cpu_physical_memory_read(addr, &v, 4);
-    return (v & mask) != 0;
+    ARMv7MState *s = ARMV7M(obj);
+    int i;
+
+    /* Can't init the cpu here, we don't yet know which model to use */
+
+    object_property_add_link(obj, "memory",
+                             TYPE_MEMORY_REGION,
+                             (Object **)&s->board_memory,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
+    memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX);
+
+    object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic");
+    qdev_set_parent_bus(DEVICE(&s->nvic), sysbus_get_default());
+    object_property_add_alias(obj, "num-irq",
+                              OBJECT(&s->nvic), "num-irq", &error_abort);
+
+    for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
+        object_initialize(&s->bitband[i], sizeof(s->bitband[i]), TYPE_BITBAND);
+        qdev_set_parent_bus(DEVICE(&s->bitband[i]), sysbus_get_default());
+    }
 }
 
-static void bitband_writel(void *opaque, hwaddr offset,
-                           uint32_t value)
+static void armv7m_realize(DeviceState *dev, Error **errp)
 {
-    uint32_t addr;
-    uint32_t mask;
-    uint32_t v;
-    addr = bitband_addr(opaque, offset) & ~3;
-    mask = (1 << ((offset >> 2) & 31));
-    mask = tswap32(mask);
-    cpu_physical_memory_read(addr, &v, 4);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 4);
-}
+    ARMv7MState *s = ARMV7M(dev);
+    SysBusDevice *sbd;
+    Error *err = NULL;
+    int i;
+    char **cpustr;
+    ObjectClass *oc;
+    const char *typename;
+    CPUClass *cc;
+
+    if (!s->board_memory) {
+        error_setg(errp, "memory property was not set");
+        return;
+    }
 
-static const MemoryRegionOps bitband_ops = {
-    .old_mmio = {
-        .read = { bitband_readb, bitband_readw, bitband_readl, },
-        .write = { bitband_writeb, bitband_writew, bitband_writel, },
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
+    memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1);
 
-#define TYPE_BITBAND "ARM,bitband-memory"
-#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
+    cpustr = g_strsplit(s->cpu_model, ",", 2);
 
-typedef struct {
-    /*< private >*/
-    SysBusDevice parent_obj;
-    /*< public >*/
+    oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
+    if (!oc) {
+        error_setg(errp, "Unknown CPU model %s", cpustr[0]);
+        g_strfreev(cpustr);
+        return;
+    }
 
-    MemoryRegion iomem;
-    uint32_t base;
-} BitBandState;
+    cc = CPU_CLASS(oc);
+    typename = object_class_get_name(oc);
+    cc->parse_features(typename, cpustr[1], &err);
+    g_strfreev(cpustr);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
-static void bitband_init(Object *obj)
-{
-    BitBandState *s = BITBAND(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+    s->cpu = ARM_CPU(object_new(typename));
+    if (!s->cpu) {
+        error_setg(errp, "Unknown CPU model %s", s->cpu_model);
+        return;
+    }
 
-    memory_region_init_io(&s->iomem, obj, &bitband_ops, &s->base,
-                          "bitband", 0x02000000);
-    sysbus_init_mmio(dev, &s->iomem);
+    object_property_set_link(OBJECT(s->cpu), OBJECT(&s->container), "memory",
+                             &error_abort);
+    object_property_set_bool(OBJECT(s->cpu), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Note that we must realize the NVIC after the CPU */
+    object_property_set_bool(OBJECT(&s->nvic), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Alias the NVIC's input and output GPIOs as our own so the board
+     * code can wire them up. (We do this in realize because the
+     * NVIC doesn't create the input GPIO array until realize.)
+     */
+    qdev_pass_gpios(DEVICE(&s->nvic), dev, NULL);
+    qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ");
+
+    /* Wire the NVIC up to the CPU */
+    sbd = SYS_BUS_DEVICE(&s->nvic);
+    sysbus_connect_irq(sbd, 0,
+                       qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
+    s->cpu->env.nvic = &s->nvic;
+
+    memory_region_add_subregion(&s->container, 0xe000e000,
+                                sysbus_mmio_get_region(sbd, 0));
+
+    for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
+        Object *obj = OBJECT(&s->bitband[i]);
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->bitband[i]);
+
+        object_property_set_int(obj, bitband_input_addr[i], "base", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+        object_property_set_link(obj, OBJECT(s->board_memory),
+                                 "source-memory", &error_abort);
+        object_property_set_bool(obj, true, "realized", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+
+        memory_region_add_subregion(&s->container, bitband_output_addr[i],
+                                    sysbus_mmio_get_region(sbd, 0));
+    }
 }
 
-static void armv7m_bitband_init(void)
-{
-    DeviceState *dev;
+static Property armv7m_properties[] = {
+    DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model),
+    DEFINE_PROP_END_OF_LIST(),
+};
 
-    dev = qdev_create(NULL, TYPE_BITBAND);
-    qdev_prop_set_uint32(dev, "base", 0x20000000);
-    qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x22000000);
+static void armv7m_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
-    dev = qdev_create(NULL, TYPE_BITBAND);
-    qdev_prop_set_uint32(dev, "base", 0x40000000);
-    qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x42000000);
+    dc->realize = armv7m_realize;
+    dc->props = armv7m_properties;
 }
 
-/* Board init.  */
+static const TypeInfo armv7m_info = {
+    .name = TYPE_ARMV7M,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(ARMv7MState),
+    .instance_init = armv7m_instance_init,
+    .class_init = armv7m_class_init,
+};
 
 static void armv7m_reset(void *opaque)
 {
@@ -168,37 +282,35 @@ static void armv7m_reset(void *opaque)
 
 /* Init CPU and memory for a v7-M based board.
    mem_size is in bytes.
-   Returns the NVIC array.  */
+   Returns the ARMv7M device.  */
 
 DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
                       const char *kernel_filename, const char *cpu_model)
 {
-    ARMCPU *cpu;
-    CPUARMState *env;
-    DeviceState *nvic;
-    int image_size;
-    uint64_t entry;
-    uint64_t lowaddr;
-    int big_endian;
+    DeviceState *armv7m;
 
     if (cpu_model == NULL) {
-	cpu_model = "cortex-m3";
-    }
-    cpu = cpu_arm_init(cpu_model);
-    if (cpu == NULL) {
-        fprintf(stderr, "Unable to find CPU definition\n");
-        exit(1);
+        cpu_model = "cortex-m3";
     }
-    env = &cpu->env;
 
-    armv7m_bitband_init();
+    armv7m = qdev_create(NULL, "armv7m");
+    qdev_prop_set_uint32(armv7m, "num-irq", num_irq);
+    qdev_prop_set_string(armv7m, "cpu-model", cpu_model);
+    object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()),
+                                     "memory", &error_abort);
+    /* This will exit with an error if the user passed us a bad cpu_model */
+    qdev_init_nofail(armv7m);
+
+    armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size);
+    return armv7m;
+}
 
-    nvic = qdev_create(NULL, "armv7m_nvic");
-    qdev_prop_set_uint32(nvic, "num-irq", num_irq);
-    env->nvic = nvic;
-    qdev_init_nofail(nvic);
-    sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0,
-                       qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ));
+void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
+{
+    int image_size;
+    uint64_t entry;
+    uint64_t lowaddr;
+    int big_endian;
 
 #ifdef TARGET_WORDS_BIGENDIAN
     big_endian = 1;
@@ -224,8 +336,15 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
         }
     }
 
+    /* CPU objects (unlike devices) are not automatically reset on system
+     * reset, so we must always register a handler to do so. Unlike
+     * A-profile CPUs, we don't need to do anything special in the
+     * handler to arrange that it starts correctly.
+     * This is arguably the wrong place to do this, but it matches the
+     * way A-profile does it. Note that this means that every M profile
+     * board must call this function!
+     */
     qemu_register_reset(armv7m_reset, cpu);
-    return nvic;
 }
 
 static Property bitband_properties[] = {
@@ -237,6 +356,7 @@ static void bitband_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
+    dc->realize = bitband_realize;
     dc->props = bitband_properties;
 }
 
@@ -251,6 +371,7 @@ static const TypeInfo bitband_info = {
 static void armv7m_register_types(void)
 {
     type_register_static(&bitband_info);
+    type_register_static(&armv7m_info);
 }
 
 type_init(armv7m_register_types)
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index 2e641a3989..369ef1e3bd 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -86,11 +86,21 @@ static void bcm2835_peripherals_init(Object *obj)
     object_property_add_const_link(OBJECT(&s->property), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
 
+    /* Random Number Generator */
+    object_initialize(&s->rng, sizeof(s->rng), TYPE_BCM2835_RNG);
+    object_property_add_child(obj, "rng", OBJECT(&s->rng), NULL);
+    qdev_set_parent_bus(DEVICE(&s->rng), sysbus_get_default());
+
     /* Extended Mass Media Controller */
     object_initialize(&s->sdhci, sizeof(s->sdhci), TYPE_SYSBUS_SDHCI);
     object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL);
     qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default());
 
+    /* SDHOST */
+    object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST);
+    object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL);
+    qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default());
+
     /* DMA Channels */
     object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA);
     object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL);
@@ -98,6 +108,16 @@ static void bcm2835_peripherals_init(Object *obj)
 
     object_property_add_const_link(OBJECT(&s->dma), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
+
+    /* GPIO */
+    object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO);
+    object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL);
+    qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default());
+
+    object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci",
+                                   OBJECT(&s->sdhci.sdbus), &error_abort);
+    object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost",
+                                   OBJECT(&s->sdhost.sdbus), &error_abort);
 }
 
 static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
@@ -226,6 +246,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->property), 0,
                       qdev_get_gpio_in(DEVICE(&s->mboxes), MBOX_CHAN_PROPERTY));
 
+    /* Random Number Generator */
+    object_property_set_bool(OBJECT(&s->rng), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    memory_region_add_subregion(&s->peri_mr, RNG_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->rng), 0));
+
     /* Extended Mass Media Controller */
     object_property_set_int(OBJECT(&s->sdhci), BCM2835_SDHC_CAPAREG, "capareg",
                             &err);
@@ -252,13 +282,20 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0,
         qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
                                INTERRUPT_ARASANSDIO));
-    object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->sdhci), "sd-bus",
-                              &err);
+
+    /* SDHOST */
+    object_property_set_bool(OBJECT(&s->sdhost), true, "realized", &err);
     if (err) {
         error_propagate(errp, err);
         return;
     }
 
+    memory_region_add_subregion(&s->peri_mr, MMCI0_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->sdhost), 0));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhost), 0,
+        qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
+                               INTERRUPT_SDIO));
+
     /* DMA Channels */
     object_property_set_bool(OBJECT(&s->dma), true, "realized", &err);
     if (err) {
@@ -277,6 +314,23 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
                                                   BCM2835_IC_GPU_IRQ,
                                                   INTERRUPT_DMA0 + n));
     }
+
+    /* GPIO */
+    object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    memory_region_add_subregion(&s->peri_mr, GPIO_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gpio), 0));
+
+    object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus",
+                              &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 }
 
 static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index be3c96d21e..1d2b50cc4e 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/log.h"
 #include "cpu.h"
 #include "hw/boards.h"
 #include "sysemu/sysemu.h"
@@ -74,6 +75,9 @@
 /* PMU SFR base address */
 #define EXYNOS4210_PMU_BASE_ADDR            0x10020000
 
+/* Clock controller SFR base address */
+#define EXYNOS4210_CLK_BASE_ADDR            0x10030000
+
 /* Display controllers (FIMD) */
 #define EXYNOS4210_FIMD0_BASE_ADDR          0x11C00000
 
@@ -138,6 +142,16 @@ void exynos4210_write_secondary(ARMCPU *cpu,
                        info->smp_loader_start);
 }
 
+static uint64_t exynos4210_calc_affinity(int cpu)
+{
+    uint64_t mp_affinity;
+
+    /* Exynos4210 has 0x9 as cluster ID */
+    mp_affinity = (0x9 << ARM_AFF1_SHIFT) | cpu;
+
+    return mp_affinity;
+}
+
 Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
         unsigned long ram_size)
 {
@@ -163,6 +177,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
         }
 
         s->cpu[n] = ARM_CPU(cpuobj);
+        object_property_set_int(cpuobj, exynos4210_calc_affinity(n),
+                                "mp-affinity", &error_abort);
         object_property_set_int(cpuobj, EXYNOS4210_SMP_PRIVATE_BASE_ADDR,
                                 "reset-cbar", &error_abort);
         object_property_set_bool(cpuobj, true, "realized", &error_fatal);
@@ -297,6 +313,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
     */
     sysbus_create_simple("exynos4210.pmu", EXYNOS4210_PMU_BASE_ADDR, NULL);
 
+    sysbus_create_simple("exynos4210.clk", EXYNOS4210_CLK_BASE_ADDR, NULL);
+
     /* PWM */
     sysbus_create_varargs("exynos4210.pwm", EXYNOS4210_PWM_BASE_ADDR,
                           s->irq_table[exynos4210_get_irq(22, 0)],
diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c
index 23d792837f..3cfe332dd1 100644
--- a/hw/arm/netduino2.c
+++ b/hw/arm/netduino2.c
@@ -27,17 +27,18 @@
 #include "hw/boards.h"
 #include "qemu/error-report.h"
 #include "hw/arm/stm32f205_soc.h"
+#include "hw/arm/arm.h"
 
 static void netduino2_init(MachineState *machine)
 {
     DeviceState *dev;
 
     dev = qdev_create(NULL, TYPE_STM32F205_SOC);
-    if (machine->kernel_filename) {
-        qdev_prop_set_string(dev, "kernel-filename", machine->kernel_filename);
-    }
     qdev_prop_set_string(dev, "cpu-model", "cortex-m3");
     object_property_set_bool(OBJECT(dev), true, "realized", &error_fatal);
+
+    armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
+                       FLASH_SIZE);
 }
 
 static void netduino2_machine_init(MachineClass *mc)
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 38425bda6c..6e1260d2ed 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -49,6 +49,9 @@ static void stm32f205_soc_initfn(Object *obj)
     STM32F205State *s = STM32F205_SOC(obj);
     int i;
 
+    object_initialize(&s->armv7m, sizeof(s->armv7m), TYPE_ARMV7M);
+    qdev_set_parent_bus(DEVICE(&s->armv7m), sysbus_get_default());
+
     object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F2XX_SYSCFG);
     qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default());
 
@@ -82,7 +85,7 @@ static void stm32f205_soc_initfn(Object *obj)
 static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
 {
     STM32F205State *s = STM32F205_SOC(dev_soc);
-    DeviceState *dev, *nvic;
+    DeviceState *dev, *armv7m;
     SysBusDevice *busdev;
     Error *err = NULL;
     int i;
@@ -110,8 +113,16 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     vmstate_register_ram_global(sram);
     memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
 
-    nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
-                       s->kernel_filename, s->cpu_model);
+    armv7m = DEVICE(&s->armv7m);
+    qdev_prop_set_uint32(armv7m, "num-irq", 96);
+    qdev_prop_set_string(armv7m, "cpu-model", s->cpu_model);
+    object_property_set_link(OBJECT(&s->armv7m), OBJECT(get_system_memory()),
+                                     "memory", &error_abort);
+    object_property_set_bool(OBJECT(&s->armv7m), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
 
     /* System configuration controller */
     dev = DEVICE(&s->syscfg);
@@ -122,7 +133,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     }
     busdev = SYS_BUS_DEVICE(dev);
     sysbus_mmio_map(busdev, 0, 0x40013800);
-    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71));
+    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, 71));
 
     /* Attach UART (uses USART registers) and USART controllers */
     for (i = 0; i < STM_NUM_USARTS; i++) {
@@ -136,7 +147,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, usart_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i]));
     }
 
     /* Timer 2 to 5 */
@@ -150,7 +161,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, timer_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, timer_irq[i]));
     }
 
     /* ADC 1 to 3 */
@@ -162,7 +173,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         return;
     }
     qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0,
-                          qdev_get_gpio_in(nvic, ADC_IRQ));
+                          qdev_get_gpio_in(armv7m, ADC_IRQ));
 
     for (i = 0; i < STM_NUM_ADCS; i++) {
         dev = DEVICE(&(s->adc[i]));
@@ -187,12 +198,11 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, spi_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i]));
     }
 }
 
 static Property stm32f205_soc_properties[] = {
-    DEFINE_PROP_STRING("kernel-filename", STM32F205State, kernel_filename),
     DEFINE_PROP_STRING("cpu-model", STM32F205State, cpu_model),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f3440f2ccb..5f62a0321e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -535,7 +535,6 @@ static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
 static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 {
     /* We create a standalone GIC */
-    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     DeviceState *gicdev;
     SysBusDevice *gicbusdev;
     const char *gictype;
@@ -605,7 +604,7 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 
     fdt_add_gic_node(vms);
 
-    if (type == 3 && !vmc->no_its) {
+    if (type == 3 && vms->its) {
         create_its(vms, gicdev);
     } else if (type == 2) {
         create_v2m(vms, pic);
@@ -1378,6 +1377,7 @@ static void machvirt_init(MachineState *machine)
         }
 
         object_property_set_bool(cpuobj, true, "realized", NULL);
+        object_unref(cpuobj);
     }
     fdt_add_timer_nodes(vms);
     fdt_add_cpu_nodes(vms);
@@ -1480,6 +1480,20 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
     vms->highmem = value;
 }
 
+static bool virt_get_its(Object *obj, Error **errp)
+{
+    VirtMachineState *vms = VIRT_MACHINE(obj);
+
+    return vms->its;
+}
+
+static void virt_set_its(Object *obj, bool value, Error **errp)
+{
+    VirtMachineState *vms = VIRT_MACHINE(obj);
+
+    vms->its = value;
+}
+
 static char *virt_get_gic_version(Object *obj, Error **errp)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -1540,6 +1554,7 @@ type_init(machvirt_machine_init);
 static void virt_2_9_instance_init(Object *obj)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
 
     /* EL3 is disabled by default on virt: this makes us consistent
      * between KVM and TCG for this board, and it also allows us to
@@ -1579,6 +1594,19 @@ static void virt_2_9_instance_init(Object *obj)
                                     "Set GIC version. "
                                     "Valid values are 2, 3 and host", NULL);
 
+    if (vmc->no_its) {
+        vms->its = false;
+    } else {
+        /* Default allows ITS instantiation */
+        vms->its = true;
+        object_property_add_bool(obj, "its", virt_get_its,
+                                 virt_set_its, NULL);
+        object_property_set_description(obj, "its",
+                                        "Set on/off to enable/disable "
+                                        "ITS instantiation",
+                                        NULL);
+    }
+
     vms->memmap = a15memmap;
     vms->irqmap = a15irqmap;
 }
diff --git a/hw/block/block.c b/hw/block/block.c
index 8dc9d84a39..27878d0087 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf)
     }
 }
 
-void blkconf_apply_backend_options(BlockConf *conf)
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+                                   bool resizable, Error **errp)
 {
     BlockBackend *blk = conf->blk;
     BlockdevOnError rerror, werror;
+    uint64_t perm, shared_perm;
     bool wce;
+    int ret;
+
+    perm = BLK_PERM_CONSISTENT_READ;
+    if (!readonly) {
+        perm |= BLK_PERM_WRITE;
+    }
+
+    shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                  BLK_PERM_GRAPH_MOD;
+    if (resizable) {
+        shared_perm |= BLK_PERM_RESIZE;
+    }
+    if (conf->share_rw) {
+        shared_perm |= BLK_PERM_WRITE;
+    }
+
+    ret = blk_set_perm(blk, perm, shared_perm, errp);
+    if (ret < 0) {
+        return;
+    }
 
     switch (conf->wce) {
     case ON_OFF_AUTO_ON:    wce = true; break;
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 17d29e7bc5..a328693d15 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -186,6 +186,7 @@ typedef enum FDiskFlags {
 struct FDrive {
     FDCtrl *fdctrl;
     BlockBackend *blk;
+    BlockConf *conf;
     /* Drive status */
     FloppyDriveType drive;    /* CMOS drive type        */
     uint8_t perpendicular;    /* 2.88 MB access mode    */
@@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv)
     }
 }
 
-static void fd_change_cb(void *opaque, bool load)
+static void fd_change_cb(void *opaque, bool load, Error **errp)
 {
     FDrive *drive = opaque;
+    Error *local_err = NULL;
+
+    if (!load) {
+        blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort);
+    } else {
+        blkconf_apply_backend_options(drive->conf,
+                                      blk_is_read_only(drive->blk), false,
+                                      &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
 
     drive->media_changed = 1;
     drive->media_validated = false;
@@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev)
     FloppyDrive *dev = FLOPPY_DRIVE(qdev);
     FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
     FDrive *drive;
+    Error *local_err = NULL;
     int ret;
 
     if (dev->unit == -1) {
@@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev)
 
     if (!dev->conf.blk) {
         /* Anonymous BlockBackend for an empty drive */
-        dev->conf.blk = blk_new();
+        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
         ret = blk_attach_dev(dev->conf.blk, qdev);
         assert(ret == 0);
     }
@@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev)
      * blkconf_apply_backend_options(). */
     dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
     dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
-    blkconf_apply_backend_options(&dev->conf);
+
+    blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk),
+                                  false, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
 
     /* 'enospc' is the default for -drive, 'report' is what blk_new() gives us
      * for empty drives. */
@@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev)
         return -1;
     }
 
+    drive->conf = &dev->conf;
     drive->blk = dev->conf.blk;
     drive->fdctrl = bus->fdc;
 
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 2d6eb46a04..190573cefa 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
 {
     Flash *s = M25P80(ss);
     M25P80Class *mc = M25P80_GET_CLASS(s);
+    int ret;
 
     s->pi = mc->pi;
 
@@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
     s->dirty_page = -1;
 
     if (s->blk) {
+        uint64_t perm = BLK_PERM_CONSISTENT_READ |
+                        (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+
         DB_PRINT_L(0, "Binding to IF_MTD drive\n");
         s->storage = blk_blockalign(s->blk, s->size);
 
diff --git a/hw/block/nand.c b/hw/block/nand.c
index c69e6755d9..0d33ac281f 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp)
 {
     int pagesize;
     NANDFlashState *s = NAND(dev);
+    int ret;
+
 
     s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
     s->size = nand_flash_ids[s->chip_id].size << 20;
@@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp)
             error_setg(errp, "Can't use a read-only drive");
             return;
         }
+        ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
         if (blk_getlength(s->blk) >=
                 (s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
             pagesize = 0;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ae91a18f17..ae303d44e5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev)
     int i;
     int64_t bs_size;
     uint8_t *pci_conf;
+    Error *local_err = NULL;
 
     if (!n->conf.blk) {
         return -1;
@@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev)
         return -1;
     }
     blkconf_blocksizes(&n->conf);
-    blkconf_apply_backend_options(&n->conf);
+    blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
+                                  false, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
 
     pci_conf = pci_dev->config;
     pci_conf[PCI_INTERRUPT_PIN] = 1;
diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index 8d8422739e..ddf5492426 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd)
     OneNANDState *s = ONE_NAND(dev);
     uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7));
     void *ram;
+    Error *local_err = NULL;
 
     s->base = (hwaddr)-1;
     s->rdy = NULL;
@@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd)
             error_report("Can't use a read-only drive");
             return -1;
         }
+        blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                     BLK_PERM_ALL, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
         s->blk_cur = s->blk;
     }
     s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT),
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 71b98a3eef..594d4cf6fe 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -758,6 +758,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
 
     if (pfl->blk) {
+        uint64_t perm;
+        pfl->ro = blk_is_read_only(pfl->blk);
+        perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+    } else {
+        pfl->ro = 0;
+    }
+
+    if (pfl->blk) {
         /* read the initial flash content */
         ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
 
@@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    if (pfl->blk) {
-        pfl->ro = blk_is_read_only(pfl->blk);
-    } else {
-        pfl->ro = 0;
-    }
-
     /* Default to devices being used at their maximum device width. This was
      * assumed before the device_width support was added.
      */
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index ef71322759..e6c5c6c25d 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl));
     pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem);
     pfl->chip_len = chip_len;
+
+    if (pfl->blk) {
+        uint64_t perm;
+        pfl->ro = blk_is_read_only(pfl->blk);
+        perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+    } else {
+        pfl->ro = 0;
+    }
+
     if (pfl->blk) {
         /* read the initial flash content */
         ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);
@@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     pfl->rom_mode = 1;
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
 
-    if (pfl->blk) {
-        pfl->ro = blk_is_read_only(pfl->blk);
-    } else {
-        pfl->ro = 0;
-    }
-
     pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl);
     pfl->wcycle = 0;
     pfl->cmd = 0;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 843bd2fa73..98c16a7a9a 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     blkconf_serial(&conf->conf, &conf->serial);
-    blkconf_apply_backend_options(&conf->conf);
+    blkconf_apply_backend_options(&conf->conf,
+                                  blk_is_read_only(conf->conf.blk), true,
+                                  &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
     s->original_wce = blk_enable_write_cache(conf->conf.blk);
     blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err);
     if (err) {
diff --git a/hw/core/bus.c b/hw/core/bus.c
index cf383fc1af..4651f24486 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -197,7 +197,7 @@ static void qbus_initfn(Object *obj)
                              TYPE_HOTPLUG_HANDLER,
                              (Object **)&bus->hotplug_handler,
                              object_property_allow_set_link,
-                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             0,
                              NULL);
     object_property_add_bool(obj, "realized",
                              bus_get_realized, bus_set_realized, NULL);
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 8b980e91fb..bf17b42cbe 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -435,6 +435,19 @@ int load_elf_as(const char *filename,
                 uint64_t *highaddr, int big_endian, int elf_machine,
                 int clear_lsb, int data_swab, AddressSpace *as)
 {
+    return load_elf_ram(filename, translate_fn, translate_opaque,
+                        pentry, lowaddr, highaddr, big_endian, elf_machine,
+                        clear_lsb, data_swab, as, true);
+}
+
+/* return < 0 if error, otherwise the number of bytes loaded in memory */
+int load_elf_ram(const char *filename,
+                 uint64_t (*translate_fn)(void *, uint64_t),
+                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                 uint64_t *highaddr, int big_endian, int elf_machine,
+                 int clear_lsb, int data_swab, AddressSpace *as,
+                 bool load_rom)
+{
     int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
     uint8_t e_ident[EI_NIDENT];
 
@@ -473,11 +486,11 @@ int load_elf_as(const char *filename,
     if (e_ident[EI_CLASS] == ELFCLASS64) {
         ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab,
                          pentry, lowaddr, highaddr, elf_machine, clear_lsb,
-                         data_swab, as);
+                         data_swab, as, load_rom);
     } else {
         ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab,
                          pentry, lowaddr, highaddr, elf_machine, clear_lsb,
-                         data_swab, as);
+                         data_swab, as, load_rom);
     }
 
  fail:
diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c
index 1ac090d1a4..1485d5b285 100644
--- a/hw/core/or-irq.c
+++ b/hw/core/or-irq.c
@@ -89,6 +89,9 @@ static void or_irq_class_init(ObjectClass *klass, void *data)
     dc->props = or_irq_properties;
     dc->realize = or_irq_realize;
     dc->vmsd = &vmstate_or_irq;
+
+    /* Reason: Needs to be wired up to work, e.g. see stm32f205_soc.c */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo or_irq_type_info = {
diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c
index 3af82afe78..59ccb00550 100644
--- a/hw/core/ptimer.c
+++ b/hw/core/ptimer.c
@@ -12,6 +12,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/replay.h"
 #include "sysemu/qtest.h"
+#include "block/aio.h"
 
 #define DELTA_ADJUST     1
 #define DELTA_NO_ADJUST -1
@@ -353,3 +354,10 @@ ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask)
     s->policy_mask = policy_mask;
     return s;
 }
+
+void ptimer_free(ptimer_state *s)
+{
+    qemu_bh_delete(s->bh);
+    timer_free(s->timer);
+    g_free(s);
+}
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 94f4d8bde4..c34be1c1ba 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr,
 {
     BlockBackend *blk;
     bool blk_created = false;
+    int ret;
 
     blk = blk_by_name(str);
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
         if (bs) {
-            blk = blk_new();
-            blk_insert_bs(blk, bs);
+            blk = blk_new(0, BLK_PERM_ALL);
             blk_created = true;
+
+            ret = blk_insert_bs(blk, bs, errp);
+            if (ret < 0) {
+                goto fail;
+            }
         }
     }
     if (!blk) {
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 06ba02e2a3..1e7fb33246 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -37,6 +37,7 @@
 #include "hw/boards.h"
 #include "hw/sysbus.h"
 #include "qapi-event.h"
+#include "migration/migration.h"
 
 int qdev_hotplug = 0;
 static bool qdev_hot_added = false;
@@ -102,9 +103,23 @@ static void bus_add_child(BusState *bus, DeviceState *child)
 
 void qdev_set_parent_bus(DeviceState *dev, BusState *bus)
 {
+    bool replugging = dev->parent_bus != NULL;
+
+    if (replugging) {
+        /* Keep a reference to the device while it's not plugged into
+         * any bus, to avoid it potentially evaporating when it is
+         * dereffed in bus_remove_child().
+         */
+        object_ref(OBJECT(dev));
+        bus_remove_child(dev->parent_bus, dev);
+        object_unref(OBJECT(dev->parent_bus));
+    }
     dev->parent_bus = bus;
     object_ref(OBJECT(bus));
     bus_add_child(bus, dev);
+    if (replugging) {
+        object_unref(OBJECT(dev));
+    }
 }
 
 /* Create a new device.  This only initializes the device state
@@ -889,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
     Error *local_err = NULL;
     bool unattached_parent = false;
     static int unattached_count;
+    int ret;
 
     if (dev->hotplugged && !dc->hotpluggable) {
         error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
@@ -896,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
     }
 
     if (value && !dev->realized) {
+        ret = check_migratable(obj, &local_err);
+        if (ret < 0) {
+            goto fail;
+        }
+
         if (!obj->parent) {
             gchar *name = g_strdup_printf("device[%d]", unattached_count++);
 
diff --git a/hw/core/register.c b/hw/core/register.c
index 4bfbc508de..dc335a79a9 100644
--- a/hw/core/register.c
+++ b/hw/core/register.c
@@ -59,6 +59,15 @@ static inline uint64_t register_read_val(RegisterInfo *reg)
     return 0; /* unreachable */
 }
 
+static inline uint64_t register_enabled_mask(int data_size, unsigned size)
+{
+    if (data_size < size) {
+        size = data_size;
+    }
+
+    return MAKE_64BIT_MASK(0, size * 8);
+}
+
 void register_write(RegisterInfo *reg, uint64_t val, uint64_t we,
                     const char *prefix, bool debug)
 {
@@ -192,11 +201,7 @@ void register_write_memory(void *opaque, hwaddr addr,
     }
 
     /* Generate appropriate write enable mask */
-    if (reg->data_size < size) {
-        we = MAKE_64BIT_MASK(0, reg->data_size * 8);
-    } else {
-        we = MAKE_64BIT_MASK(0, size * 8);
-    }
+    we = register_enabled_mask(reg->data_size, size);
 
     register_write(reg, value, we, reg_array->prefix,
                    reg_array->debug);
@@ -208,6 +213,7 @@ uint64_t register_read_memory(void *opaque, hwaddr addr,
     RegisterInfoArray *reg_array = opaque;
     RegisterInfo *reg = NULL;
     uint64_t read_val;
+    uint64_t re;
     int i;
 
     for (i = 0; i < reg_array->num_elements; i++) {
@@ -223,7 +229,10 @@ uint64_t register_read_memory(void *opaque, hwaddr addr,
         return 0;
     }
 
-    read_val = register_read(reg, size * 8, reg_array->prefix,
+    /* Generate appropriate read enable mask */
+    re = register_enabled_mask(reg->data_size, size);
+
+    read_val = register_read(reg, re, reg_array->prefix,
                              reg_array->debug);
 
     return extract64(read_val, 0, size * 8);
@@ -274,9 +283,18 @@ void register_finalize_block(RegisterInfoArray *r_array)
     g_free(r_array);
 }
 
+static void register_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    /* Reason: needs to be wired up to work */
+    dc->cannot_instantiate_with_device_add_yet = true;
+}
+
 static const TypeInfo register_info = {
     .name  = TYPE_REGISTER,
     .parent = TYPE_DEVICE,
+    .class_init = register_class_init,
 };
 
 static void register_register_types(void)
diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c
index 7528665510..59120ddb67 100644
--- a/hw/display/milkymist-tmu2.c
+++ b/hw/display/milkymist-tmu2.c
@@ -293,7 +293,7 @@ static void tmu2_start(MilkymistTMU2State *s)
     cpu_physical_memory_unmap(mesh, mesh_len, 0, mesh_len);
 
     /* Write back the OpenGL framebuffer to the QEMU framebuffer */
-    fb_len = 2 * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
+    fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
     fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, 1);
     if (fb == NULL) {
         glDeleteTextures(1, &texture);
diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
index ecb09d17a1..f49b7fe8cd 100644
--- a/hw/display/virtio-gpu-3d.c
+++ b/hw/display/virtio-gpu-3d.c
@@ -177,16 +177,15 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
         qemu_console_resize(g->scanout[ss.scanout_id].con,
                             ss.r.width, ss.r.height);
         virgl_renderer_force_ctx_0();
-        dpy_gl_scanout(g->scanout[ss.scanout_id].con, info.tex_id,
-                       info.flags & 1 /* FIXME: Y_0_TOP */,
-                       info.width, info.height,
-                       ss.r.x, ss.r.y, ss.r.width, ss.r.height);
+        dpy_gl_scanout_texture(g->scanout[ss.scanout_id].con, info.tex_id,
+                               info.flags & 1 /* FIXME: Y_0_TOP */,
+                               info.width, info.height,
+                               ss.r.x, ss.r.y, ss.r.width, ss.r.height);
     } else {
         if (ss.scanout_id != 0) {
             dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, NULL);
         }
-        dpy_gl_scanout(g->scanout[ss.scanout_id].con, 0, false,
-                       0, 0, 0, 0, 0, 0);
+        dpy_gl_scanout_disable(g->scanout[ss.scanout_id].con);
     }
     g->scanout[ss.scanout_id].resource_id = ss.resource_id;
 }
@@ -597,7 +596,7 @@ void virtio_gpu_virgl_reset(VirtIOGPU *g)
         if (i != 0) {
             dpy_gfx_replace_surface(g->scanout[i].con, NULL);
         }
-        dpy_gl_scanout(g->scanout[i].con, 0, false, 0, 0, 0, 0, 0, 0);
+        dpy_gl_scanout_disable(g->scanout[i].con);
     }
 }
 
diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs
index a43c7cf442..fa0a72e6d0 100644
--- a/hw/gpio/Makefile.objs
+++ b/hw/gpio/Makefile.objs
@@ -7,3 +7,4 @@ common-obj-$(CONFIG_GPIO_KEY) += gpio_key.o
 
 obj-$(CONFIG_OMAP) += omap_gpio.o
 obj-$(CONFIG_IMX) += imx_gpio.o
+obj-$(CONFIG_RASPI) += bcm2835_gpio.o
diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c
new file mode 100644
index 0000000000..acc2e3cf9e
--- /dev/null
+++ b/hw/gpio/bcm2835_gpio.c
@@ -0,0 +1,353 @@
+/*
+ * Raspberry Pi (BCM2835) GPIO Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+#include "hw/gpio/bcm2835_gpio.h"
+
+#define GPFSEL0   0x00
+#define GPFSEL1   0x04
+#define GPFSEL2   0x08
+#define GPFSEL3   0x0C
+#define GPFSEL4   0x10
+#define GPFSEL5   0x14
+#define GPSET0    0x1C
+#define GPSET1    0x20
+#define GPCLR0    0x28
+#define GPCLR1    0x2C
+#define GPLEV0    0x34
+#define GPLEV1    0x38
+#define GPEDS0    0x40
+#define GPEDS1    0x44
+#define GPREN0    0x4C
+#define GPREN1    0x50
+#define GPFEN0    0x58
+#define GPFEN1    0x5C
+#define GPHEN0    0x64
+#define GPHEN1    0x68
+#define GPLEN0    0x70
+#define GPLEN1    0x74
+#define GPAREN0   0x7C
+#define GPAREN1   0x80
+#define GPAFEN0   0x88
+#define GPAFEN1   0x8C
+#define GPPUD     0x94
+#define GPPUDCLK0 0x98
+#define GPPUDCLK1 0x9C
+
+static uint32_t gpfsel_get(BCM2835GpioState *s, uint8_t reg)
+{
+    int i;
+    uint32_t value = 0;
+    for (i = 0; i < 10; i++) {
+        uint32_t index = 10 * reg + i;
+        if (index < sizeof(s->fsel)) {
+            value |= (s->fsel[index] & 0x7) << (3 * i);
+        }
+    }
+    return value;
+}
+
+static void gpfsel_set(BCM2835GpioState *s, uint8_t reg, uint32_t value)
+{
+    int i;
+    for (i = 0; i < 10; i++) {
+        uint32_t index = 10 * reg + i;
+        if (index < sizeof(s->fsel)) {
+            int fsel = (value >> (3 * i)) & 0x7;
+            s->fsel[index] = fsel;
+        }
+    }
+
+    /* SD controller selection (48-53) */
+    if (s->sd_fsel != 0
+            && (s->fsel[48] == 0) /* SD_CLK_R */
+            && (s->fsel[49] == 0) /* SD_CMD_R */
+            && (s->fsel[50] == 0) /* SD_DATA0_R */
+            && (s->fsel[51] == 0) /* SD_DATA1_R */
+            && (s->fsel[52] == 0) /* SD_DATA2_R */
+            && (s->fsel[53] == 0) /* SD_DATA3_R */
+            ) {
+        /* SDHCI controller selected */
+        sdbus_reparent_card(s->sdbus_sdhost, s->sdbus_sdhci);
+        s->sd_fsel = 0;
+    } else if (s->sd_fsel != 4
+            && (s->fsel[48] == 4) /* SD_CLK_R */
+            && (s->fsel[49] == 4) /* SD_CMD_R */
+            && (s->fsel[50] == 4) /* SD_DATA0_R */
+            && (s->fsel[51] == 4) /* SD_DATA1_R */
+            && (s->fsel[52] == 4) /* SD_DATA2_R */
+            && (s->fsel[53] == 4) /* SD_DATA3_R */
+            ) {
+        /* SDHost controller selected */
+        sdbus_reparent_card(s->sdbus_sdhci, s->sdbus_sdhost);
+        s->sd_fsel = 4;
+    }
+}
+
+static int gpfsel_is_out(BCM2835GpioState *s, int index)
+{
+    if (index >= 0 && index < 54) {
+        return s->fsel[index] == 1;
+    }
+    return 0;
+}
+
+static void gpset(BCM2835GpioState *s,
+        uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
+{
+    uint32_t changes = val & ~*lev;
+    uint32_t cur = 1;
+
+    int i;
+    for (i = 0; i < count; i++) {
+        if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
+            qemu_set_irq(s->out[start + i], 1);
+        }
+        cur <<= 1;
+    }
+
+    *lev |= val;
+}
+
+static void gpclr(BCM2835GpioState *s,
+        uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
+{
+    uint32_t changes = val & *lev;
+    uint32_t cur = 1;
+
+    int i;
+    for (i = 0; i < count; i++) {
+        if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
+            qemu_set_irq(s->out[start + i], 0);
+        }
+        cur <<= 1;
+    }
+
+    *lev &= ~val;
+}
+
+static uint64_t bcm2835_gpio_read(void *opaque, hwaddr offset,
+        unsigned size)
+{
+    BCM2835GpioState *s = (BCM2835GpioState *)opaque;
+
+    switch (offset) {
+    case GPFSEL0:
+    case GPFSEL1:
+    case GPFSEL2:
+    case GPFSEL3:
+    case GPFSEL4:
+    case GPFSEL5:
+        return gpfsel_get(s, offset / 4);
+    case GPSET0:
+    case GPSET1:
+        /* Write Only */
+        return 0;
+    case GPCLR0:
+    case GPCLR1:
+        /* Write Only */
+        return 0;
+    case GPLEV0:
+        return s->lev0;
+    case GPLEV1:
+        return s->lev1;
+    case GPEDS0:
+    case GPEDS1:
+    case GPREN0:
+    case GPREN1:
+    case GPFEN0:
+    case GPFEN1:
+    case GPHEN0:
+    case GPHEN1:
+    case GPLEN0:
+    case GPLEN1:
+    case GPAREN0:
+    case GPAREN1:
+    case GPAFEN0:
+    case GPAFEN1:
+    case GPPUD:
+    case GPPUDCLK0:
+    case GPPUDCLK1:
+        /* Not implemented */
+        return 0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                __func__, offset);
+        break;
+    }
+
+    return 0;
+}
+
+static void bcm2835_gpio_write(void *opaque, hwaddr offset,
+        uint64_t value, unsigned size)
+{
+    BCM2835GpioState *s = (BCM2835GpioState *)opaque;
+
+    switch (offset) {
+    case GPFSEL0:
+    case GPFSEL1:
+    case GPFSEL2:
+    case GPFSEL3:
+    case GPFSEL4:
+    case GPFSEL5:
+        gpfsel_set(s, offset / 4, value);
+        break;
+    case GPSET0:
+        gpset(s, value, 0, 32, &s->lev0);
+        break;
+    case GPSET1:
+        gpset(s, value, 32, 22, &s->lev1);
+        break;
+    case GPCLR0:
+        gpclr(s, value, 0, 32, &s->lev0);
+        break;
+    case GPCLR1:
+        gpclr(s, value, 32, 22, &s->lev1);
+        break;
+    case GPLEV0:
+    case GPLEV1:
+        /* Read Only */
+        break;
+    case GPEDS0:
+    case GPEDS1:
+    case GPREN0:
+    case GPREN1:
+    case GPFEN0:
+    case GPFEN1:
+    case GPHEN0:
+    case GPHEN1:
+    case GPLEN0:
+    case GPLEN1:
+    case GPAREN0:
+    case GPAREN1:
+    case GPAFEN0:
+    case GPAFEN1:
+    case GPPUD:
+    case GPPUDCLK0:
+    case GPPUDCLK1:
+        /* Not implemented */
+        break;
+    default:
+        goto err_out;
+    }
+    return;
+
+err_out:
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+            __func__, offset);
+}
+
+static void bcm2835_gpio_reset(DeviceState *dev)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(dev);
+
+    int i;
+    for (i = 0; i < 6; i++) {
+        gpfsel_set(s, i, 0);
+    }
+
+    s->sd_fsel = 0;
+
+    /* SDHCI is selected by default */
+    sdbus_reparent_card(&s->sdbus, s->sdbus_sdhci);
+
+    s->lev0 = 0;
+    s->lev1 = 0;
+}
+
+static const MemoryRegionOps bcm2835_gpio_ops = {
+    .read = bcm2835_gpio_read,
+    .write = bcm2835_gpio_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_gpio = {
+    .name = "bcm2835_gpio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(fsel, BCM2835GpioState, 54),
+        VMSTATE_UINT32(lev0, BCM2835GpioState),
+        VMSTATE_UINT32(lev1, BCM2835GpioState),
+        VMSTATE_UINT8(sd_fsel, BCM2835GpioState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_gpio_init(Object *obj)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(obj);
+    DeviceState *dev = DEVICE(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
+                        TYPE_SD_BUS, DEVICE(s), "sd-bus");
+
+    memory_region_init_io(&s->iomem, obj,
+            &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+    qdev_init_gpio_out(dev, s->out, 54);
+}
+
+static void bcm2835_gpio_realize(DeviceState *dev, Error **errp)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "sdbus-sdhci", &err);
+    if (obj == NULL) {
+        error_setg(errp, "%s: required sdhci link not found: %s",
+                __func__, error_get_pretty(err));
+        return;
+    }
+    s->sdbus_sdhci = SD_BUS(obj);
+
+    obj = object_property_get_link(OBJECT(dev), "sdbus-sdhost", &err);
+    if (obj == NULL) {
+        error_setg(errp, "%s: required sdhost link not found: %s",
+                __func__, error_get_pretty(err));
+        return;
+    }
+    s->sdbus_sdhost = SD_BUS(obj);
+}
+
+static void bcm2835_gpio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_bcm2835_gpio;
+    dc->realize = &bcm2835_gpio_realize;
+    dc->reset = &bcm2835_gpio_reset;
+}
+
+static const TypeInfo bcm2835_gpio_info = {
+    .name          = TYPE_BCM2835_GPIO,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835GpioState),
+    .instance_init = bcm2835_gpio_init,
+    .class_init    = bcm2835_gpio_class_init,
+};
+
+static void bcm2835_gpio_register_types(void)
+{
+    type_register_static(&bcm2835_gpio_info);
+}
+
+type_init(bcm2835_gpio_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 1c928abb28..2073108577 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -42,6 +42,7 @@
 #include "hw/acpi/memory_hotplug.h"
 #include "sysemu/tpm.h"
 #include "hw/acpi/tpm.h"
+#include "hw/acpi/vmgenid.h"
 #include "sysemu/tpm_backend.h"
 #include "hw/timer/mc146818rtc_regs.h"
 #include "sysemu/numa.h"
@@ -462,7 +463,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 
         *bus_bsel = (*bsel_alloc)++;
         object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL,
-                                       bus_bsel, NULL);
+                                       bus_bsel, &error_abort);
     }
 
     return bsel_alloc;
@@ -471,7 +472,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 static void acpi_set_pci_info(void)
 {
     PCIBus *bus = find_i440fx(); /* TODO: Q35 support */
-    unsigned bsel_alloc = 0;
+    unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT;
 
     if (bus) {
         /* Scan all PCI buses. Set property to enable acpi based hotplug. */
@@ -1803,7 +1804,7 @@ static Aml *build_q35_osc_method(void)
     Aml *else_ctx;
     Aml *method;
     Aml *a_cwd1 = aml_name("CDW1");
-    Aml *a_ctrl = aml_name("CTRL");
+    Aml *a_ctrl = aml_local(0);
 
     method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
     aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
@@ -1813,7 +1814,6 @@ static Aml *build_q35_osc_method(void)
     aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2"));
     aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3"));
 
-    aml_append(if_ctx, aml_store(aml_name("CDW2"), aml_name("SUPP")));
     aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl));
 
     /*
@@ -1898,8 +1898,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
         aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
         aml_append(dev, aml_name_decl("_UID", aml_int(1)));
-        aml_append(dev, aml_name_decl("SUPP", aml_int(0)));
-        aml_append(dev, aml_name_decl("CTRL", aml_int(0)));
         aml_append(dev, build_q35_osc_method());
         aml_append(sb_scope, dev);
         aml_append(dsdt, sb_scope);
@@ -1964,6 +1962,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
             aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
             aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
             aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
+            if (pci_bus_is_express(bus)) {
+                aml_append(dev, build_q35_osc_method());
+            }
 
             if (numa_node != NUMA_NODE_UNASSIGNED) {
                 aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node)));
@@ -2610,6 +2611,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     size_t aml_len = 0;
     GArray *tables_blob = tables->table_data;
     AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
+    Object *vmgenid_dev;
 
     acpi_get_pm_info(&pm);
     acpi_get_misc_info(&misc);
@@ -2653,6 +2655,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     acpi_add_table(table_offsets, tables_blob);
     build_madt(tables_blob, tables->linker, pcms);
 
+    vmgenid_dev = find_vmgenid_dev();
+    if (vmgenid_dev) {
+        acpi_add_table(table_offsets, tables_blob);
+        vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob,
+                           tables->vmgenid, tables->linker);
+    }
+
     if (misc.has_hpet) {
         acpi_add_table(table_offsets, tables_blob);
         build_hpet(tables_blob, tables->linker);
@@ -2823,6 +2832,7 @@ void acpi_setup(void)
     PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     AcpiBuildTables tables;
     AcpiBuildState *build_state;
+    Object *vmgenid_dev;
 
     if (!pcms->fw_cfg) {
         ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
@@ -2859,6 +2869,12 @@ void acpi_setup(void)
     fw_cfg_add_file(pcms->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
                     tables.tcpalog->data, acpi_data_len(tables.tcpalog));
 
+    vmgenid_dev = find_vmgenid_dev();
+    if (vmgenid_dev) {
+        vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg,
+                           tables.vmgenid);
+    }
+
     if (!pcmc->rsdp_in_ram) {
         /*
          * Keep for compatibility with old machine types.
diff --git a/hw/ide/core.c b/hw/ide/core.c
index cfa5de6ebf..db509b3e15 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s)
 }
 
 /* called when the inserted state of the media has changed */
-static void ide_cd_change_cb(void *opaque, bool load)
+static void ide_cd_change_cb(void *opaque, bool load, Error **errp)
 {
     IDEState *s = opaque;
     uint64_t nb_sectors;
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index dbaa75cf59..4383cd111d 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
             return -1;
         } else {
             /* Anonymous BlockBackend for an empty drive */
-            dev->conf.blk = blk_new();
+            dev->conf.blk = blk_new(0, BLK_PERM_ALL);
         }
     }
 
@@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
             return -1;
         }
     }
-    blkconf_apply_backend_options(&dev->conf);
+    blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD,
+                                  &err);
+    if (err) {
+        error_report_err(err);
+        return -1;
+    }
 
     if (ide_init_drive(s, dev->conf.blk, kind,
                        dev->version, dev->serial, dev->model, dev->wwn,
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 8948106ac4..adedd0da5f 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -24,7 +24,7 @@ obj-$(CONFIG_APIC) += apic.o apic_common.o
 obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o
 obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o
 obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o
-obj-$(CONFIG_STELLARIS) += armv7m_nvic.o
+obj-$(CONFIG_ARM_V7M) += armv7m_nvic.o
 obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o
 obj-$(CONFIG_GRLIB) += grlib_irqmp.o
 obj-$(CONFIG_IOAPIC) += ioapic.o
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 521aac3cc6..8e5a9d8a3e 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -156,17 +156,6 @@ static void gic_set_irq_11mpcore(GICState *s, int irq, int level,
     }
 }
 
-static void gic_set_irq_nvic(GICState *s, int irq, int level,
-                                 int cm, int target)
-{
-    if (level) {
-        GIC_SET_LEVEL(irq, cm);
-        GIC_SET_PENDING(irq, target);
-    } else {
-        GIC_CLEAR_LEVEL(irq, cm);
-    }
-}
-
 static void gic_set_irq_generic(GICState *s, int irq, int level,
                                 int cm, int target)
 {
@@ -214,8 +203,6 @@ static void gic_set_irq(void *opaque, int irq, int level)
 
     if (s->revision == REV_11MPCORE) {
         gic_set_irq_11mpcore(s, irq, level, cm, target);
-    } else if (s->revision == REV_NVIC) {
-        gic_set_irq_nvic(s, irq, level, cm, target);
     } else {
         gic_set_irq_generic(s, irq, level, cm, target);
     }
@@ -367,7 +354,7 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu, MemTxAttrs attrs)
         return 1023;
     }
 
-    if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+    if (s->revision == REV_11MPCORE) {
         /* Clear pending flags for both level and edge triggered interrupts.
          * Level triggered IRQs will be reasserted once they become inactive.
          */
@@ -589,11 +576,6 @@ void gic_complete_irq(GICState *s, int cpu, int irq, MemTxAttrs attrs)
             DPRINTF("Set %d pending mask %x\n", irq, cm);
             GIC_SET_PENDING(irq, cm);
         }
-    } else if (s->revision == REV_NVIC) {
-        if (GIC_TEST_LEVEL(irq, cm)) {
-            DPRINTF("Set nvic %d pending mask %x\n", irq, cm);
-            GIC_SET_PENDING(irq, cm);
-        }
     }
 
     group = gic_has_groups(s) && GIC_TEST_GROUP(irq, cm);
@@ -768,7 +750,7 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs)
     } else if (offset < 0xf10) {
         goto bad_reg;
     } else if (offset < 0xf30) {
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
 
@@ -802,9 +784,6 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs)
             case 2:
                 res = gic_id_gicv2[(offset - 0xfd0) >> 2];
                 break;
-            case REV_NVIC:
-                /* Shouldn't be able to get here */
-                abort();
             default:
                 res = 0;
             }
@@ -1028,7 +1007,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
                 continue; /* Ignore Non-secure access of Group0 IRQ */
             }
 
-            if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+            if (s->revision == REV_11MPCORE) {
                 if (value & (1 << (i * 2))) {
                     GIC_SET_MODEL(irq + i);
                 } else {
@@ -1046,7 +1025,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         goto bad_reg;
     } else if (offset < 0xf20) {
         /* GICD_CPENDSGIRn */
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
         irq = (offset - 0xf10);
@@ -1060,7 +1039,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         }
     } else if (offset < 0xf30) {
         /* GICD_SPENDSGIRn */
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
         irq = (offset - 0xf20);
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 4a8df44fb1..70f1134823 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -99,9 +99,7 @@ void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler,
      *  [N+32..N+63] PPIs for CPU 1
      *   ...
      */
-    if (s->revision != REV_NVIC) {
-        i += (GIC_INTERNAL * s->num_cpu);
-    }
+    i += (GIC_INTERNAL * s->num_cpu);
     qdev_init_gpio_in(DEVICE(s), handler, i);
 
     for (i = 0; i < s->num_cpu; i++) {
@@ -121,16 +119,12 @@ void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler,
     memory_region_init_io(&s->iomem, OBJECT(s), ops, s, "gic_dist", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
 
-    if (s->revision != REV_NVIC) {
-        /* This is the main CPU interface "for this core". It is always
-         * present because it is required by both software emulation and KVM.
-         * NVIC is not handled here because its CPU interface is different,
-         * neither it can use KVM.
-         */
-        memory_region_init_io(&s->cpuiomem[0], OBJECT(s), ops ? &ops[1] : NULL,
-                              s, "gic_cpu", s->revision == 2 ? 0x2000 : 0x100);
-        sysbus_init_mmio(sbd, &s->cpuiomem[0]);
-    }
+    /* This is the main CPU interface "for this core". It is always
+     * present because it is required by both software emulation and KVM.
+     */
+    memory_region_init_io(&s->cpuiomem[0], OBJECT(s), ops ? &ops[1] : NULL,
+                          s, "gic_cpu", s->revision == 2 ? 0x2000 : 0x100);
+    sysbus_init_mmio(sbd, &s->cpuiomem[0]);
 }
 
 static void arm_gic_common_realize(DeviceState *dev, Error **errp)
@@ -162,7 +156,7 @@ static void arm_gic_common_realize(DeviceState *dev, Error **errp)
     }
 
     if (s->security_extn &&
-        (s->revision == REV_11MPCORE || s->revision == REV_NVIC)) {
+        (s->revision == REV_11MPCORE)) {
         error_setg(errp, "this GIC revision does not implement "
                    "the security extensions");
         return;
@@ -255,7 +249,6 @@ static Property arm_gic_common_properties[] = {
     DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32),
     /* Revision can be 1 or 2 for GIC architecture specification
      * versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC.
-     * (Internally, 0xffffffff also indicates "not a GIC but an NVIC".)
      */
     DEFINE_PROP_UINT32("revision", GICState, revision, 1),
     /* True if the GIC should implement the security extensions */
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 16b9b0f7eb..c6493d6c07 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
     }
 };
 
+static int icc_sre_el1_reg_pre_load(void *opaque)
+{
+    GICv3CPUState *cs = opaque;
+
+   /*
+    * If the sre_el1 subsection is not transferred this
+    * means SRE_EL1 is 0x7 (which might not be the same as
+    * our reset value).
+    */
+    cs->icc_sre_el1 = 0x7;
+    return 0;
+}
+
+static bool icc_sre_el1_reg_needed(void *opaque)
+{
+    GICv3CPUState *cs = opaque;
+
+    return cs->icc_sre_el1 != 7;
+}
+
+const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
+    .name = "arm_gicv3_cpu/sre_el1",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = icc_sre_el1_reg_pre_load,
+    .needed = icc_sre_el1_reg_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_gicv3_cpu = {
     .name = "arm_gicv3_cpu",
     .version_id = 1,
@@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
     .subsections = (const VMStateDescription * []) {
         &vmstate_gicv3_cpu_virt,
         NULL
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_gicv3_cpu_sre_el1,
+        NULL
     }
 };
 
@@ -216,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
 
         s->cpu[i].cpu = cpu;
         s->cpu[i].gic = s;
+        /* Store GICv3CPUState in CPUARMState gicv3state pointer */
+        gicv3_set_gicv3state(cpu, &s->cpu[i]);
 
         /* Pre-construct the GICR_TYPER:
          * For our implementation:
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index f775aba507..0b208560bd 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -19,6 +19,14 @@
 #include "gicv3_internal.h"
 #include "cpu.h"
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+
+    env->gicv3state = (void *)s;
+};
+
 static GICv3CPUState *icc_cs_from_env(CPUARMState *env)
 {
     /* Given the CPU, find the right GICv3CPUState struct.
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index d69dc47370..81f0403117 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,10 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +46,32 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
      OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2)         \
+                             (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+                              ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+                              ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+                              ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+                              ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1     \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
     ARMGICv3CommonClass parent_class;
     DeviceRealize parent_realize;
@@ -57,16 +85,549 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level)
     kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+    ((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+                                   uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                      KVM_VGIC_ATTR(offset, 0),
+                      val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+                                   uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+                      KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+                      val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+                                   uint64_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+                      KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+                      val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+                                             uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+                      KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+                      (VGIC_LEVEL_INFO_LINE_LEVEL <<
+                       KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+                      val, write);
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+    for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+    uint32_t reg, *field;
+    int irq;
+
+    field = (uint32_t *)bmp;
+    for_each_dist_irq_reg(irq, s->num_irq, 8) {
+        kvm_gicd_access(s, offset, &reg, false);
+        *field = reg;
+        offset += 4;
+        field++;
+    }
+}
+
+static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+    uint32_t reg, *field;
+    int irq;
+
+    field = (uint32_t *)bmp;
+    for_each_dist_irq_reg(irq, s->num_irq, 8) {
+        reg = *field;
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+        field++;
+    }
+}
+
+static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
+                                      uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 2) {
+        kvm_gicd_access(s, offset, &reg, false);
+        reg = half_unshuffle32(reg >> 1);
+        if (irq % 32 != 0) {
+            reg = (reg << 16);
+        }
+        *gic_bmp_ptr32(bmp, irq) |=  reg;
+        offset += 4;
+    }
+}
+
+static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
+                                      uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 2) {
+        reg = *gic_bmp_ptr32(bmp, irq);
+        if (irq % 32 != 0) {
+            reg = (reg & 0xffff0000) >> 16;
+        } else {
+            reg = reg & 0xffff;
+        }
+        reg = half_shuffle32(reg) << 1;
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+    }
+}
+
+static void kvm_gic_get_line_level_bmp(GICv3State *s, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        kvm_gic_line_level_access(s, irq, 0, &reg, false);
+        *gic_bmp_ptr32(bmp, irq) = reg;
+    }
+}
+
+static void kvm_gic_put_line_level_bmp(GICv3State *s, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        reg = *gic_bmp_ptr32(bmp, irq);
+        kvm_gic_line_level_access(s, irq, 0, &reg, true);
+    }
+}
+
+/* Read a bitmap register group from the kernel VGIC. */
+static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        kvm_gicd_access(s, offset, &reg, false);
+        *gic_bmp_ptr32(bmp, irq) = reg;
+        offset += 4;
+    }
+}
+
+static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
+                            uint32_t clroffset, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        /* If this bitmap is a set/clear register pair, first write to the
+         * clear-reg to clear all bits before using the set-reg to write
+         * the 1 bits.
+         */
+        if (clroffset != 0) {
+            reg = 0;
+            kvm_gicd_access(s, clroffset, &reg, true);
+        }
+        reg = *gic_bmp_ptr32(bmp, irq);
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+    }
+}
+
+static void kvm_arm_gicv3_check(GICv3State *s)
+{
+    uint32_t reg;
+    uint32_t num_irq;
+
+    /* Sanity checking s->num_irq */
+    kvm_gicd_access(s, GICD_TYPER, &reg, false);
+    num_irq = ((reg & 0x1f) + 1) * 32;
+
+    if (num_irq < s->num_irq) {
+        error_report("Model requests %u IRQs, but kernel supports max %u",
+                     s->num_irq, num_irq);
+        abort();
+    }
+}
+
 static void kvm_arm_gicv3_put(GICv3State *s)
 {
-    /* TODO */
-    DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+    uint32_t regl, regh, reg;
+    uint64_t reg64, redist_typer;
+    int ncpu, i;
+
+    kvm_arm_gicv3_check(s);
+
+    kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
+    kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
+    redist_typer = ((uint64_t)regh << 32) | regl;
+
+    reg = s->gicd_ctlr;
+    kvm_gicd_access(s, GICD_CTLR, &reg, true);
+
+    if (redist_typer & GICR_TYPER_PLPIS) {
+        /* Set base addresses before LPIs are enabled by GICR_CTLR write */
+        for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+            GICv3CPUState *c = &s->cpu[ncpu];
+
+            reg64 = c->gicr_propbaser;
+            regl = (uint32_t)reg64;
+            kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, true);
+            regh = (uint32_t)(reg64 >> 32);
+            kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, true);
+
+            reg64 = c->gicr_pendbaser;
+            if (!c->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) {
+                /* Setting PTZ is advised if LPIs are disabled, to reduce
+                 * GIC initialization time.
+                 */
+                reg64 |= GICR_PENDBASER_PTZ;
+            }
+            regl = (uint32_t)reg64;
+            kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, true);
+            regh = (uint32_t)(reg64 >> 32);
+            kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, true);
+        }
+    }
+
+    /* Redistributor state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+
+        reg = c->gicr_ctlr;
+        kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, true);
+
+        reg = c->gicr_statusr[GICV3_NS];
+        kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, true);
+
+        reg = c->gicr_waker;
+        kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, true);
+
+        reg = c->gicr_igroupr0;
+        kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICENABLER0, ncpu, &reg, true);
+        reg = c->gicr_ienabler0;
+        kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, true);
+
+        /* Restore config before pending so we treat level/edge correctly */
+        reg = half_shuffle32(c->edge_trigger >> 16) << 1;
+        kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, true);
+
+        reg = c->level;
+        kvm_gic_line_level_access(s, 0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICPENDR0, ncpu, &reg, true);
+        reg = c->gicr_ipendr0;
+        kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICACTIVER0, ncpu, &reg, true);
+        reg = c->gicr_iactiver0;
+        kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, true);
+
+        for (i = 0; i < GIC_INTERNAL; i += 4) {
+            reg = c->gicr_ipriorityr[i] |
+                (c->gicr_ipriorityr[i + 1] << 8) |
+                (c->gicr_ipriorityr[i + 2] << 16) |
+                (c->gicr_ipriorityr[i + 3] << 24);
+            kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, true);
+        }
+    }
+
+    /* Distributor state (shared between all CPUs */
+    reg = s->gicd_statusr[GICV3_NS];
+    kvm_gicd_access(s, GICD_STATUSR, &reg, true);
+
+    /* s->enable bitmap -> GICD_ISENABLERn */
+    kvm_dist_putbmp(s, GICD_ISENABLER, GICD_ICENABLER, s->enabled);
+
+    /* s->group bitmap -> GICD_IGROUPRn */
+    kvm_dist_putbmp(s, GICD_IGROUPR, 0, s->group);
+
+    /* Restore targets before pending to ensure the pending state is set on
+     * the appropriate CPU interfaces in the kernel
+     */
+
+    /* s->gicd_irouter[irq] -> GICD_IROUTERn
+     * We can't use kvm_dist_put() here because the registers are 64-bit
+     */
+    for (i = GIC_INTERNAL; i < s->num_irq; i++) {
+        uint32_t offset;
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i);
+        reg = (uint32_t)s->gicd_irouter[i];
+        kvm_gicd_access(s, offset, &reg, true);
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
+        reg = (uint32_t)(s->gicd_irouter[i] >> 32);
+        kvm_gicd_access(s, offset, &reg, true);
+    }
+
+    /* s->trigger bitmap -> GICD_ICFGRn
+     * (restore configuration registers before pending IRQs so we treat
+     * level/edge correctly)
+     */
+    kvm_dist_put_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
+
+    /* s->level bitmap ->  line_level */
+    kvm_gic_put_line_level_bmp(s, s->level);
+
+    /* s->pending bitmap -> GICD_ISPENDRn */
+    kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending);
+
+    /* s->active bitmap -> GICD_ISACTIVERn */
+    kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active);
+
+    /* s->gicd_ipriority[] -> GICD_IPRIORITYRn */
+    kvm_dist_put_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
+
+    /* CPU Interface state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+        int num_pri_bits;
+
+        kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true);
+        kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
+                        &c->icc_ctlr_el1[GICV3_NS], true);
+        kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G0], true);
+        kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G1NS], true);
+        kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, true);
+        kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], true);
+        kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], true);
+
+        num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
+                        ICC_CTLR_EL1_PRIBITS_MASK) >>
+                        ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
+
+        switch (num_pri_bits) {
+        case 7:
+            reg64 = c->icc_apr[GICV3_G0][3];
+            kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, true);
+            reg64 = c->icc_apr[GICV3_G0][2];
+            kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, true);
+        case 6:
+            reg64 = c->icc_apr[GICV3_G0][1];
+            kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, true);
+        default:
+            reg64 = c->icc_apr[GICV3_G0][0];
+            kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, true);
+        }
+
+        switch (num_pri_bits) {
+        case 7:
+            reg64 = c->icc_apr[GICV3_G1NS][3];
+            kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, true);
+            reg64 = c->icc_apr[GICV3_G1NS][2];
+            kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, true);
+        case 6:
+            reg64 = c->icc_apr[GICV3_G1NS][1];
+            kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, true);
+        default:
+            reg64 = c->icc_apr[GICV3_G1NS][0];
+            kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, true);
+        }
+    }
 }
 
 static void kvm_arm_gicv3_get(GICv3State *s)
 {
-    /* TODO */
-    DPRINTF("Cannot get kernel gic state, no kernel interface\n");
+    uint32_t regl, regh, reg;
+    uint64_t reg64, redist_typer;
+    int ncpu, i;
+
+    kvm_arm_gicv3_check(s);
+
+    kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
+    kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
+    redist_typer = ((uint64_t)regh << 32) | regl;
+
+    kvm_gicd_access(s, GICD_CTLR, &reg, false);
+    s->gicd_ctlr = reg;
+
+    /* Redistributor state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+
+        kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, false);
+        c->gicr_ctlr = reg;
+
+        kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, false);
+        c->gicr_statusr[GICV3_NS] = reg;
+
+        kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, false);
+        c->gicr_waker = reg;
+
+        kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, false);
+        c->gicr_igroupr0 = reg;
+        kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, false);
+        c->gicr_ienabler0 = reg;
+        kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, false);
+        c->edge_trigger = half_unshuffle32(reg >> 1) << 16;
+        kvm_gic_line_level_access(s, 0, ncpu, &reg, false);
+        c->level = reg;
+        kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, false);
+        c->gicr_ipendr0 = reg;
+        kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, false);
+        c->gicr_iactiver0 = reg;
+
+        for (i = 0; i < GIC_INTERNAL; i += 4) {
+            kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, false);
+            c->gicr_ipriorityr[i] = extract32(reg, 0, 8);
+            c->gicr_ipriorityr[i + 1] = extract32(reg, 8, 8);
+            c->gicr_ipriorityr[i + 2] = extract32(reg, 16, 8);
+            c->gicr_ipriorityr[i + 3] = extract32(reg, 24, 8);
+        }
+    }
+
+    if (redist_typer & GICR_TYPER_PLPIS) {
+        for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+            GICv3CPUState *c = &s->cpu[ncpu];
+
+            kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, false);
+            kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, false);
+            c->gicr_propbaser = ((uint64_t)regh << 32) | regl;
+
+            kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, false);
+            kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, false);
+            c->gicr_pendbaser = ((uint64_t)regh << 32) | regl;
+        }
+    }
+
+    /* Distributor state (shared between all CPUs */
+
+    kvm_gicd_access(s, GICD_STATUSR, &reg, false);
+    s->gicd_statusr[GICV3_NS] = reg;
+
+    /* GICD_IGROUPRn -> s->group bitmap */
+    kvm_dist_getbmp(s, GICD_IGROUPR, s->group);
+
+    /* GICD_ISENABLERn -> s->enabled bitmap */
+    kvm_dist_getbmp(s, GICD_ISENABLER, s->enabled);
+
+    /* Line level of irq */
+    kvm_gic_get_line_level_bmp(s, s->level);
+    /* GICD_ISPENDRn -> s->pending bitmap */
+    kvm_dist_getbmp(s, GICD_ISPENDR, s->pending);
+
+    /* GICD_ISACTIVERn -> s->active bitmap */
+    kvm_dist_getbmp(s, GICD_ISACTIVER, s->active);
+
+    /* GICD_ICFGRn -> s->trigger bitmap */
+    kvm_dist_get_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
+
+    /* GICD_IPRIORITYRn -> s->gicd_ipriority[] */
+    kvm_dist_get_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
+
+    /* GICD_IROUTERn -> s->gicd_irouter[irq] */
+    for (i = GIC_INTERNAL; i < s->num_irq; i++) {
+        uint32_t offset;
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i);
+        kvm_gicd_access(s, offset, &regl, false);
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
+        kvm_gicd_access(s, offset, &regh, false);
+        s->gicd_irouter[i] = ((uint64_t)regh << 32) | regl;
+    }
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+        int num_pri_bits;
+
+        kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false);
+        kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
+                        &c->icc_ctlr_el1[GICV3_NS], false);
+        kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G0], false);
+        kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G1NS], false);
+        kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, false);
+        kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], false);
+        kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], false);
+        num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
+                        ICC_CTLR_EL1_PRIBITS_MASK) >>
+                        ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
+
+        switch (num_pri_bits) {
+        case 7:
+            kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][3] = reg64;
+            kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][2] = reg64;
+        case 6:
+            kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][1] = reg64;
+        default:
+            kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][0] = reg64;
+        }
+
+        switch (num_pri_bits) {
+        case 7:
+            kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][3] = reg64;
+            kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][2] = reg64;
+        case 6:
+            kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][1] = reg64;
+        default:
+            kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][0] = reg64;
+        }
+    }
+}
+
+static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu;
+    GICv3State *s;
+    GICv3CPUState *c;
+
+    c = (GICv3CPUState *)env->gicv3state;
+    s = c->gic;
+    cpu = ARM_CPU(c->cpu);
+
+    /* Initialize to actual HW supported configuration */
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+                      KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+                      &c->icc_ctlr_el1[GICV3_NS], false);
+
+    c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+    c->icc_pmr_el1 = 0;
+    c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+    c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+    c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+    c->icc_sre_el1 = 0x7;
+    memset(c->icc_apr, 0, sizeof(c->icc_apr));
+    memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
 }
 
 static void kvm_arm_gicv3_reset(DeviceState *dev)
@@ -77,9 +638,43 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
     DPRINTF("Reset\n");
 
     kgc->parent_reset(dev);
+
+    if (s->migration_blocker) {
+        DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+        return;
+    }
+
     kvm_arm_gicv3_put(s);
 }
 
+/*
+ * CPU interface registers of GIC needs to be reset on CPU reset.
+ * For the calling arm_gicv3_icc_reset() on CPU reset, we register
+ * below ARMCPRegInfo. As we reset the whole cpu interface under single
+ * register reset, we define only one register of CPU interface instead
+ * of defining all the registers.
+ */
+static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
+    { .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
+      /*
+       * If ARM_CP_NOP is used, resetfn is not called,
+       * So ARM_CP_NO_RAW is appropriate type.
+       */
+      .type = ARM_CP_NO_RAW,
+      .access = PL1_RW,
+      .readfn = arm_cp_read_zero,
+      .writefn = arm_cp_write_ignore,
+      /*
+       * We hang the whole cpu interface reset routine off here
+       * rather than parcelling it out into one little function
+       * per register
+       */
+      .resetfn = arm_gicv3_icc_reset,
+    },
+    REGINFO_SENTINEL
+};
+
 static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 {
     GICv3State *s = KVM_ARM_GICV3(dev);
@@ -103,16 +698,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 
     gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
-    /* Block migration of a KVM GICv3 device: the API for saving and restoring
-     * the state in the kernel is not yet finalised in the kernel or
-     * implemented in QEMU.
-     */
-    error_setg(&s->migration_blocker, "vGICv3 migration is not implemented");
-    migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        return;
+    for (i = 0; i < s->num_cpu; i++) {
+        ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+
+        define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
     }
 
     /* Try to create the device via the device control API */
@@ -145,6 +734,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 
         kvm_irqchip_commit_routes(kvm_state);
     }
+
+    if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                               GICD_CTLR)) {
+        error_setg(&s->migration_blocker, "This operating system kernel does "
+                                          "not support vGICv3 migration");
+        migrate_add_blocker(s->migration_blocker, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_free(s->migration_blocker);
+            return;
+        }
+    }
 }
 
 static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index fe5c303de9..32ffa0bf35 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -17,213 +17,425 @@
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "hw/arm/arm.h"
-#include "exec/address-spaces.h"
-#include "gic_internal.h"
+#include "hw/arm/armv7m_nvic.h"
+#include "target/arm/cpu.h"
 #include "qemu/log.h"
+#include "trace.h"
 
-typedef struct {
-    GICState gic;
-    ARMCPU *cpu;
-    struct {
-        uint32_t control;
-        uint32_t reload;
-        int64_t tick;
-        QEMUTimer *timer;
-    } systick;
-    MemoryRegion sysregmem;
-    MemoryRegion gic_iomem_alias;
-    MemoryRegion container;
-    uint32_t num_irq;
-    qemu_irq sysresetreq;
-} nvic_state;
-
-#define TYPE_NVIC "armv7m_nvic"
-/**
- * NVICClass:
- * @parent_reset: the parent class' reset handler.
+/* IRQ number counting:
  *
- * A model of the v7M NVIC and System Controller
+ * the num-irq property counts the number of external IRQ lines
+ *
+ * NVICState::num_irq counts the total number of exceptions
+ * (external IRQs, the 15 internal exceptions including reset,
+ * and one for the unused exception number 0).
+ *
+ * NVIC_MAX_IRQ is the highest permitted number of external IRQ lines.
+ *
+ * NVIC_MAX_VECTORS is the highest permitted number of exceptions.
+ *
+ * Iterating through all exceptions should typically be done with
+ * for (i = 1; i < s->num_irq; i++) to avoid the unused slot 0.
+ *
+ * The external qemu_irq lines are the NVIC's external IRQ lines,
+ * so line 0 is exception 16.
+ *
+ * In the terminology of the architecture manual, "interrupts" are
+ * a subcategory of exception referring to the external interrupts
+ * (which are exception numbers NVIC_FIRST_IRQ and upward).
+ * For historical reasons QEMU tends to use "interrupt" and
+ * "exception" more or less interchangeably.
+ */
+#define NVIC_FIRST_IRQ 16
+#define NVIC_MAX_IRQ (NVIC_MAX_VECTORS - NVIC_FIRST_IRQ)
+
+/* Effective running priority of the CPU when no exception is active
+ * (higher than the highest possible priority value)
  */
-typedef struct NVICClass {
-    /*< private >*/
-    ARMGICClass parent_class;
-    /*< public >*/
-    DeviceRealize parent_realize;
-    void (*parent_reset)(DeviceState *dev);
-} NVICClass;
-
-#define NVIC_CLASS(klass) \
-    OBJECT_CLASS_CHECK(NVICClass, (klass), TYPE_NVIC)
-#define NVIC_GET_CLASS(obj) \
-    OBJECT_GET_CLASS(NVICClass, (obj), TYPE_NVIC)
-#define NVIC(obj) \
-    OBJECT_CHECK(nvic_state, (obj), TYPE_NVIC)
+#define NVIC_NOEXC_PRIO 0x100
 
 static const uint8_t nvic_id[] = {
     0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1
 };
 
-/* qemu timers run at 1GHz.   We want something closer to 1MHz.  */
-#define SYSTICK_SCALE 1000ULL
+static int nvic_pending_prio(NVICState *s)
+{
+    /* return the priority of the current pending interrupt,
+     * or NVIC_NOEXC_PRIO if no interrupt is pending
+     */
+    return s->vectpending ? s->vectors[s->vectpending].prio : NVIC_NOEXC_PRIO;
+}
 
-#define SYSTICK_ENABLE    (1 << 0)
-#define SYSTICK_TICKINT   (1 << 1)
-#define SYSTICK_CLKSOURCE (1 << 2)
-#define SYSTICK_COUNTFLAG (1 << 16)
+/* Return the value of the ISCR RETTOBASE bit:
+ * 1 if there is exactly one active exception
+ * 0 if there is more than one active exception
+ * UNKNOWN if there are no active exceptions (we choose 1,
+ * which matches the choice Cortex-M3 is documented as making).
+ *
+ * NB: some versions of the documentation talk about this
+ * counting "active exceptions other than the one shown by IPSR";
+ * this is only different in the obscure corner case where guest
+ * code has manually deactivated an exception and is about
+ * to fail an exception-return integrity check. The definition
+ * above is the one from the v8M ARM ARM and is also in line
+ * with the behaviour documented for the Cortex-M3.
+ */
+static bool nvic_rettobase(NVICState *s)
+{
+    int irq, nhand = 0;
 
-int system_clock_scale;
+    for (irq = ARMV7M_EXCP_RESET; irq < s->num_irq; irq++) {
+        if (s->vectors[irq].active) {
+            nhand++;
+            if (nhand == 2) {
+                return 0;
+            }
+        }
+    }
 
-/* Conversion factor from qemu timer to SysTick frequencies.  */
-static inline int64_t systick_scale(nvic_state *s)
-{
-    if (s->systick.control & SYSTICK_CLKSOURCE)
-        return system_clock_scale;
-    else
-        return 1000;
+    return 1;
 }
 
-static void systick_reload(nvic_state *s, int reset)
+/* Return the value of the ISCR ISRPENDING bit:
+ * 1 if an external interrupt is pending
+ * 0 if no external interrupt is pending
+ */
+static bool nvic_isrpending(NVICState *s)
 {
-    /* The Cortex-M3 Devices Generic User Guide says that "When the
-     * ENABLE bit is set to 1, the counter loads the RELOAD value from the
-     * SYST RVR register and then counts down". So, we need to check the
-     * ENABLE bit before reloading the value.
+    int irq;
+
+    /* We can shortcut if the highest priority pending interrupt
+     * happens to be external or if there is nothing pending.
      */
-    if ((s->systick.control & SYSTICK_ENABLE) == 0) {
-        return;
+    if (s->vectpending > NVIC_FIRST_IRQ) {
+        return true;
+    }
+    if (s->vectpending == 0) {
+        return false;
+    }
+
+    for (irq = NVIC_FIRST_IRQ; irq < s->num_irq; irq++) {
+        if (s->vectors[irq].pending) {
+            return true;
+        }
     }
+    return false;
+}
 
-    if (reset)
-        s->systick.tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    s->systick.tick += (s->systick.reload + 1) * systick_scale(s);
-    timer_mod(s->systick.timer, s->systick.tick);
+/* Return a mask word which clears the subpriority bits from
+ * a priority value for an M-profile exception, leaving only
+ * the group priority.
+ */
+static inline uint32_t nvic_gprio_mask(NVICState *s)
+{
+    return ~0U << (s->prigroup + 1);
 }
 
-static void systick_timer_tick(void * opaque)
+/* Recompute vectpending and exception_prio */
+static void nvic_recompute_state(NVICState *s)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    s->systick.control |= SYSTICK_COUNTFLAG;
-    if (s->systick.control & SYSTICK_TICKINT) {
-        /* Trigger the interrupt.  */
-        armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
+    int i;
+    int pend_prio = NVIC_NOEXC_PRIO;
+    int active_prio = NVIC_NOEXC_PRIO;
+    int pend_irq = 0;
+
+    for (i = 1; i < s->num_irq; i++) {
+        VecInfo *vec = &s->vectors[i];
+
+        if (vec->enabled && vec->pending && vec->prio < pend_prio) {
+            pend_prio = vec->prio;
+            pend_irq = i;
+        }
+        if (vec->active && vec->prio < active_prio) {
+            active_prio = vec->prio;
+        }
     }
-    if (s->systick.reload == 0) {
-        s->systick.control &= ~SYSTICK_ENABLE;
+
+    s->vectpending = pend_irq;
+    s->exception_prio = active_prio & nvic_gprio_mask(s);
+
+    trace_nvic_recompute_state(s->vectpending, s->exception_prio);
+}
+
+/* Return the current execution priority of the CPU
+ * (equivalent to the pseudocode ExecutionPriority function).
+ * This is a value between -2 (NMI priority) and NVIC_NOEXC_PRIO.
+ */
+static inline int nvic_exec_prio(NVICState *s)
+{
+    CPUARMState *env = &s->cpu->env;
+    int running;
+
+    if (env->daif & PSTATE_F) { /* FAULTMASK */
+        running = -1;
+    } else if (env->daif & PSTATE_I) { /* PRIMASK */
+        running = 0;
+    } else if (env->v7m.basepri > 0) {
+        running = env->v7m.basepri & nvic_gprio_mask(s);
     } else {
-        systick_reload(s, 0);
+        running = NVIC_NOEXC_PRIO; /* lower than any possible priority */
     }
+    /* consider priority of active handler */
+    return MIN(running, s->exception_prio);
 }
 
-static void systick_reset(nvic_state *s)
+bool armv7m_nvic_can_take_pending_exception(void *opaque)
 {
-    s->systick.control = 0;
-    s->systick.reload = 0;
-    s->systick.tick = 0;
-    timer_del(s->systick.timer);
+    NVICState *s = opaque;
+
+    return nvic_exec_prio(s) > nvic_pending_prio(s);
+}
+
+/* caller must call nvic_irq_update() after this */
+static void set_prio(NVICState *s, unsigned irq, uint8_t prio)
+{
+    assert(irq > ARMV7M_EXCP_NMI); /* only use for configurable prios */
+    assert(irq < s->num_irq);
+
+    s->vectors[irq].prio = prio;
+
+    trace_nvic_set_prio(irq, prio);
+}
+
+/* Recompute state and assert irq line accordingly.
+ * Must be called after changes to:
+ *  vec->active, vec->enabled, vec->pending or vec->prio for any vector
+ *  prigroup
+ */
+static void nvic_irq_update(NVICState *s)
+{
+    int lvl;
+    int pend_prio;
+
+    nvic_recompute_state(s);
+    pend_prio = nvic_pending_prio(s);
+
+    /* Raise NVIC output if this IRQ would be taken, except that we
+     * ignore the effects of the BASEPRI, FAULTMASK and PRIMASK (which
+     * will be checked for in arm_v7m_cpu_exec_interrupt()); changes
+     * to those CPU registers don't cause us to recalculate the NVIC
+     * pending info.
+     */
+    lvl = (pend_prio < s->exception_prio);
+    trace_nvic_irq_update(s->vectpending, pend_prio, s->exception_prio, lvl);
+    qemu_set_irq(s->excpout, lvl);
+}
+
+static void armv7m_nvic_clear_pending(void *opaque, int irq)
+{
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+    trace_nvic_clear_pending(irq, vec->enabled, vec->prio);
+    if (vec->pending) {
+        vec->pending = 0;
+        nvic_irq_update(s);
+    }
 }
 
-/* The external routines use the hardware vector numbering, ie. the first
-   IRQ is #16.  The internal GIC routines use #32 as the first IRQ.  */
 void armv7m_nvic_set_pending(void *opaque, int irq)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    if (irq >= 16)
-        irq += 16;
-    gic_set_pending_private(&s->gic, 0, irq);
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+    trace_nvic_set_pending(irq, vec->enabled, vec->prio);
+
+
+    if (irq >= ARMV7M_EXCP_HARD && irq < ARMV7M_EXCP_PENDSV) {
+        /* If a synchronous exception is pending then it may be
+         * escalated to HardFault if:
+         *  * it is equal or lower priority to current execution
+         *  * it is disabled
+         * (ie we need to take it immediately but we can't do so).
+         * Asynchronous exceptions (and interrupts) simply remain pending.
+         *
+         * For QEMU, we don't have any imprecise (asynchronous) faults,
+         * so we can assume that PREFETCH_ABORT and DATA_ABORT are always
+         * synchronous.
+         * Debug exceptions are awkward because only Debug exceptions
+         * resulting from the BKPT instruction should be escalated,
+         * but we don't currently implement any Debug exceptions other
+         * than those that result from BKPT, so we treat all debug exceptions
+         * as needing escalation.
+         *
+         * This all means we can identify whether to escalate based only on
+         * the exception number and don't (yet) need the caller to explicitly
+         * tell us whether this exception is synchronous or not.
+         */
+        int running = nvic_exec_prio(s);
+        bool escalate = false;
+
+        if (vec->prio >= running) {
+            trace_nvic_escalate_prio(irq, vec->prio, running);
+            escalate = true;
+        } else if (!vec->enabled) {
+            trace_nvic_escalate_disabled(irq);
+            escalate = true;
+        }
+
+        if (escalate) {
+            if (running < 0) {
+                /* We want to escalate to HardFault but we can't take a
+                 * synchronous HardFault at this point either. This is a
+                 * Lockup condition due to a guest bug. We don't model
+                 * Lockup, so report via cpu_abort() instead.
+                 */
+                cpu_abort(&s->cpu->parent_obj,
+                          "Lockup: can't escalate %d to HardFault "
+                          "(current priority %d)\n", irq, running);
+            }
+
+            /* We can do the escalation, so we take HardFault instead */
+            irq = ARMV7M_EXCP_HARD;
+            vec = &s->vectors[irq];
+            s->cpu->env.v7m.hfsr |= R_V7M_HFSR_FORCED_MASK;
+        }
+    }
+
+    if (!vec->pending) {
+        vec->pending = 1;
+        nvic_irq_update(s);
+    }
 }
 
 /* Make pending IRQ active.  */
-int armv7m_nvic_acknowledge_irq(void *opaque)
+void armv7m_nvic_acknowledge_irq(void *opaque)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    uint32_t irq;
-
-    irq = gic_acknowledge_irq(&s->gic, 0, MEMTXATTRS_UNSPECIFIED);
-    if (irq == 1023)
-        hw_error("Interrupt but no vector\n");
-    if (irq >= 32)
-        irq -= 16;
-    return irq;
+    NVICState *s = (NVICState *)opaque;
+    CPUARMState *env = &s->cpu->env;
+    const int pending = s->vectpending;
+    const int running = nvic_exec_prio(s);
+    int pendgroupprio;
+    VecInfo *vec;
+
+    assert(pending > ARMV7M_EXCP_RESET && pending < s->num_irq);
+
+    vec = &s->vectors[pending];
+
+    assert(vec->enabled);
+    assert(vec->pending);
+
+    pendgroupprio = vec->prio & nvic_gprio_mask(s);
+    assert(pendgroupprio < running);
+
+    trace_nvic_acknowledge_irq(pending, vec->prio);
+
+    vec->active = 1;
+    vec->pending = 0;
+
+    env->v7m.exception = s->vectpending;
+
+    nvic_irq_update(s);
 }
 
-void armv7m_nvic_complete_irq(void *opaque, int irq)
+int armv7m_nvic_complete_irq(void *opaque, int irq)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    if (irq >= 16)
-        irq += 16;
-    gic_complete_irq(&s->gic, 0, irq, MEMTXATTRS_UNSPECIFIED);
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+    int ret;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+
+    trace_nvic_complete_irq(irq);
+
+    if (!vec->active) {
+        /* Tell the caller this was an illegal exception return */
+        return -1;
+    }
+
+    ret = nvic_rettobase(s);
+
+    vec->active = 0;
+    if (vec->level) {
+        /* Re-pend the exception if it's still held high; only
+         * happens for extenal IRQs
+         */
+        assert(irq >= NVIC_FIRST_IRQ);
+        vec->pending = 1;
+    }
+
+    nvic_irq_update(s);
+
+    return ret;
+}
+
+/* callback when external interrupt line is changed */
+static void set_irq_level(void *opaque, int n, int level)
+{
+    NVICState *s = opaque;
+    VecInfo *vec;
+
+    n += NVIC_FIRST_IRQ;
+
+    assert(n >= NVIC_FIRST_IRQ && n < s->num_irq);
+
+    trace_nvic_set_irq_level(n, level);
+
+    /* The pending status of an external interrupt is
+     * latched on rising edge and exception handler return.
+     *
+     * Pulsing the IRQ will always run the handler
+     * once, and the handler will re-run until the
+     * level is low when the handler completes.
+     */
+    vec = &s->vectors[n];
+    if (level != vec->level) {
+        vec->level = level;
+        if (level) {
+            armv7m_nvic_set_pending(s, n);
+        }
+    }
 }
 
-static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
+static uint32_t nvic_readl(NVICState *s, uint32_t offset)
 {
     ARMCPU *cpu = s->cpu;
     uint32_t val;
-    int irq;
 
     switch (offset) {
     case 4: /* Interrupt Control Type.  */
-        return (s->num_irq / 32) - 1;
-    case 0x10: /* SysTick Control and Status.  */
-        val = s->systick.control;
-        s->systick.control &= ~SYSTICK_COUNTFLAG;
-        return val;
-    case 0x14: /* SysTick Reload Value.  */
-        return s->systick.reload;
-    case 0x18: /* SysTick Current Value.  */
-        {
-            int64_t t;
-            if ((s->systick.control & SYSTICK_ENABLE) == 0)
-                return 0;
-            t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-            if (t >= s->systick.tick)
-                return 0;
-            val = ((s->systick.tick - (t + 1)) / systick_scale(s)) + 1;
-            /* The interrupt in triggered when the timer reaches zero.
-               However the counter is not reloaded until the next clock
-               tick.  This is a hack to return zero during the first tick.  */
-            if (val > s->systick.reload)
-                val = 0;
-            return val;
-        }
-    case 0x1c: /* SysTick Calibration Value.  */
-        return 10000;
+        return ((s->num_irq - NVIC_FIRST_IRQ) / 32) - 1;
     case 0xd00: /* CPUID Base.  */
         return cpu->midr;
     case 0xd04: /* Interrupt Control State.  */
         /* VECTACTIVE */
         val = cpu->env.v7m.exception;
-        if (val == 1023) {
-            val = 0;
-        } else if (val >= 32) {
-            val -= 16;
-        }
         /* VECTPENDING */
-        if (s->gic.current_pending[0] != 1023)
-            val |= (s->gic.current_pending[0] << 12);
-        /* ISRPENDING and RETTOBASE */
-        for (irq = 32; irq < s->num_irq; irq++) {
-            if (s->gic.irq_state[irq].pending) {
-                val |= (1 << 22);
-                break;
-            }
-            if (irq != cpu->env.v7m.exception && s->gic.irq_state[irq].active) {
-                val |= (1 << 11);
-            }
+        val |= (s->vectpending & 0xff) << 12;
+        /* ISRPENDING - set if any external IRQ is pending */
+        if (nvic_isrpending(s)) {
+            val |= (1 << 22);
+        }
+        /* RETTOBASE - set if only one handler is active */
+        if (nvic_rettobase(s)) {
+            val |= (1 << 11);
         }
         /* PENDSTSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_SYSTICK].pending)
+        if (s->vectors[ARMV7M_EXCP_SYSTICK].pending) {
             val |= (1 << 26);
+        }
         /* PENDSVSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_PENDSV].pending)
+        if (s->vectors[ARMV7M_EXCP_PENDSV].pending) {
             val |= (1 << 28);
+        }
         /* NMIPENDSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_NMI].pending)
+        if (s->vectors[ARMV7M_EXCP_NMI].pending) {
             val |= (1 << 31);
+        }
+        /* ISRPREEMPT not implemented */
         return val;
     case 0xd08: /* Vector Table Offset.  */
         return cpu->env.v7m.vecbase;
     case 0xd0c: /* Application Interrupt/Reset Control.  */
-        return 0xfa050000;
+        return 0xfa050000 | (s->prigroup << 8);
     case 0xd10: /* System Control.  */
         /* TODO: Implement SLEEPONEXIT.  */
         return 0;
@@ -231,20 +443,48 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
         return cpu->env.v7m.ccr;
     case 0xd24: /* System Handler Status.  */
         val = 0;
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].active) val |= (1 << 0);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].active) val |= (1 << 1);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].active) val |= (1 << 3);
-        if (s->gic.irq_state[ARMV7M_EXCP_SVC].active) val |= (1 << 7);
-        if (s->gic.irq_state[ARMV7M_EXCP_DEBUG].active) val |= (1 << 8);
-        if (s->gic.irq_state[ARMV7M_EXCP_PENDSV].active) val |= (1 << 10);
-        if (s->gic.irq_state[ARMV7M_EXCP_SYSTICK].active) val |= (1 << 11);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].pending) val |= (1 << 12);
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].pending) val |= (1 << 13);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].pending) val |= (1 << 14);
-        if (s->gic.irq_state[ARMV7M_EXCP_SVC].pending) val |= (1 << 15);
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].enabled) val |= (1 << 16);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].enabled) val |= (1 << 17);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].enabled) val |= (1 << 18);
+        if (s->vectors[ARMV7M_EXCP_MEM].active) {
+            val |= (1 << 0);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].active) {
+            val |= (1 << 1);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].active) {
+            val |= (1 << 3);
+        }
+        if (s->vectors[ARMV7M_EXCP_SVC].active) {
+            val |= (1 << 7);
+        }
+        if (s->vectors[ARMV7M_EXCP_DEBUG].active) {
+            val |= (1 << 8);
+        }
+        if (s->vectors[ARMV7M_EXCP_PENDSV].active) {
+            val |= (1 << 10);
+        }
+        if (s->vectors[ARMV7M_EXCP_SYSTICK].active) {
+            val |= (1 << 11);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].pending) {
+            val |= (1 << 12);
+        }
+        if (s->vectors[ARMV7M_EXCP_MEM].pending) {
+            val |= (1 << 13);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].pending) {
+            val |= (1 << 14);
+        }
+        if (s->vectors[ARMV7M_EXCP_SVC].pending) {
+            val |= (1 << 15);
+        }
+        if (s->vectors[ARMV7M_EXCP_MEM].enabled) {
+            val |= (1 << 16);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].enabled) {
+            val |= (1 << 17);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].enabled) {
+            val |= (1 << 18);
+        }
         return val;
     case 0xd28: /* Configurable Fault Status.  */
         return cpu->env.v7m.cfsr;
@@ -294,43 +534,11 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
     }
 }
 
-static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
+static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value)
 {
     ARMCPU *cpu = s->cpu;
-    uint32_t oldval;
+
     switch (offset) {
-    case 0x10: /* SysTick Control and Status.  */
-        oldval = s->systick.control;
-        s->systick.control &= 0xfffffff8;
-        s->systick.control |= value & 7;
-        if ((oldval ^ value) & SYSTICK_ENABLE) {
-            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-            if (value & SYSTICK_ENABLE) {
-                if (s->systick.tick) {
-                    s->systick.tick += now;
-                    timer_mod(s->systick.timer, s->systick.tick);
-                } else {
-                    systick_reload(s, 1);
-                }
-            } else {
-                timer_del(s->systick.timer);
-                s->systick.tick -= now;
-                if (s->systick.tick < 0)
-                  s->systick.tick = 0;
-            }
-        } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
-            /* This is a hack. Force the timer to be reloaded
-               when the reference clock is changed.  */
-            systick_reload(s, 1);
-        }
-        break;
-    case 0x14: /* SysTick Reload Value.  */
-        s->systick.reload = value;
-        break;
-    case 0x18: /* SysTick Current Value.  Writes reload the timer.  */
-        systick_reload(s, 1);
-        s->systick.control &= ~SYSTICK_COUNTFLAG;
-        break;
     case 0xd04: /* Interrupt Control State.  */
         if (value & (1 << 31)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_NMI);
@@ -338,14 +546,12 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
         if (value & (1 << 28)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_PENDSV);
         } else if (value & (1 << 27)) {
-            s->gic.irq_state[ARMV7M_EXCP_PENDSV].pending = 0;
-            gic_update(&s->gic);
+            armv7m_nvic_clear_pending(s, ARMV7M_EXCP_PENDSV);
         }
         if (value & (1 << 26)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
         } else if (value & (1 << 25)) {
-            s->gic.irq_state[ARMV7M_EXCP_SYSTICK].pending = 0;
-            gic_update(&s->gic);
+            armv7m_nvic_clear_pending(s, ARMV7M_EXCP_SYSTICK);
         }
         break;
     case 0xd08: /* Vector Table Offset.  */
@@ -357,14 +563,17 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
                 qemu_irq_pulse(s->sysresetreq);
             }
             if (value & 2) {
-                qemu_log_mask(LOG_UNIMP, "VECTCLRACTIVE unimplemented\n");
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Setting VECTCLRACTIVE when not in DEBUG mode "
+                              "is UNPREDICTABLE\n");
             }
             if (value & 1) {
-                qemu_log_mask(LOG_UNIMP, "AIRCR system reset unimplemented\n");
-            }
-            if (value & 0x700) {
-                qemu_log_mask(LOG_UNIMP, "PRIGROUP unimplemented\n");
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Setting VECTRESET when not in DEBUG mode "
+                              "is UNPREDICTABLE\n");
             }
+            s->prigroup = extract32(value, 8, 3);
+            nvic_irq_update(s);
         }
         break;
     case 0xd10: /* System Control.  */
@@ -383,11 +592,21 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
         cpu->env.v7m.ccr = value;
         break;
     case 0xd24: /* System Handler Control.  */
-        /* TODO: Real hardware allows you to set/clear the active bits
-           under some circumstances.  We don't implement this.  */
-        s->gic.irq_state[ARMV7M_EXCP_MEM].enabled = (value & (1 << 16)) != 0;
-        s->gic.irq_state[ARMV7M_EXCP_BUS].enabled = (value & (1 << 17)) != 0;
-        s->gic.irq_state[ARMV7M_EXCP_USAGE].enabled = (value & (1 << 18)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].active = (value & (1 << 0)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].active = (value & (1 << 1)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].active = (value & (1 << 3)) != 0;
+        s->vectors[ARMV7M_EXCP_SVC].active = (value & (1 << 7)) != 0;
+        s->vectors[ARMV7M_EXCP_DEBUG].active = (value & (1 << 8)) != 0;
+        s->vectors[ARMV7M_EXCP_PENDSV].active = (value & (1 << 10)) != 0;
+        s->vectors[ARMV7M_EXCP_SYSTICK].active = (value & (1 << 11)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].pending = (value & (1 << 12)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].pending = (value & (1 << 13)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].pending = (value & (1 << 14)) != 0;
+        s->vectors[ARMV7M_EXCP_SVC].pending = (value & (1 << 15)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].enabled = (value & (1 << 16)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].enabled = (value & (1 << 17)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].enabled = (value & (1 << 18)) != 0;
+        nvic_irq_update(s);
         break;
     case 0xd28: /* Configurable Fault Status.  */
         cpu->env.v7m.cfsr &= ~value; /* W1C */
@@ -409,13 +628,16 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
                       "NVIC: Aux fault status registers unimplemented\n");
         break;
     case 0xf00: /* Software Triggered Interrupt Register */
+    {
         /* user mode can only write to STIR if CCR.USERSETMPEND permits it */
-        if ((value & 0x1ff) < s->num_irq &&
+        int excnum = (value & 0x1ff) + NVIC_FIRST_IRQ;
+        if (excnum < s->num_irq &&
             (arm_current_el(&cpu->env) ||
              (cpu->env.v7m.ccr & R_V7M_CCR_USERSETMPEND_MASK))) {
-            gic_set_pending_private(&s->gic, 0, value & 0x1ff);
+            armv7m_nvic_set_pending(s, excnum);
         }
         break;
+    }
     default:
         qemu_log_mask(LOG_GUEST_ERROR,
                       "NVIC: Bad write offset 0x%x\n", offset);
@@ -425,46 +647,142 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
 static uint64_t nvic_sysreg_read(void *opaque, hwaddr addr,
                                  unsigned size)
 {
-    nvic_state *s = (nvic_state *)opaque;
+    NVICState *s = (NVICState *)opaque;
     uint32_t offset = addr;
-    int i;
+    unsigned i, startvec, end;
     uint32_t val;
 
     switch (offset) {
+    /* reads of set and clear both return the status */
+    case 0x100 ... 0x13f: /* NVIC Set enable */
+        offset += 0x80;
+        /* fall through */
+    case 0x180 ... 0x1bf: /* NVIC Clear enable */
+        val = 0;
+        startvec = offset - 0x180 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].enabled) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x200 ... 0x23f: /* NVIC Set pend */
+        offset += 0x80;
+        /* fall through */
+    case 0x280 ... 0x2bf: /* NVIC Clear pend */
+        val = 0;
+        startvec = offset - 0x280 + NVIC_FIRST_IRQ; /* vector # */
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].pending) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x300 ... 0x33f: /* NVIC Active */
+        val = 0;
+        startvec = offset - 0x300 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].active) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x400 ... 0x5ef: /* NVIC Priority */
+        val = 0;
+        startvec = offset - 0x400 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0; i < size && startvec + i < s->num_irq; i++) {
+            val |= s->vectors[startvec + i].prio << (8 * i);
+        }
+        break;
     case 0xd18 ... 0xd23: /* System Handler Priority.  */
         val = 0;
         for (i = 0; i < size; i++) {
-            val |= s->gic.priority1[(offset - 0xd14) + i][0] << (i * 8);
+            val |= s->vectors[(offset - 0xd14) + i].prio << (i * 8);
         }
-        return val;
+        break;
     case 0xfe0 ... 0xfff: /* ID.  */
         if (offset & 3) {
-            return 0;
+            val = 0;
+        } else {
+            val = nvic_id[(offset - 0xfe0) >> 2];
+        }
+        break;
+    default:
+        if (size == 4) {
+            val = nvic_readl(s, offset);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "NVIC: Bad read of size %d at offset 0x%x\n",
+                          size, offset);
+            val = 0;
         }
-        return nvic_id[(offset - 0xfe0) >> 2];
-    }
-    if (size == 4) {
-        return nvic_readl(s, offset);
     }
-    qemu_log_mask(LOG_GUEST_ERROR,
-                  "NVIC: Bad read of size %d at offset 0x%x\n", size, offset);
-    return 0;
+
+    trace_nvic_sysreg_read(addr, val, size);
+    return val;
 }
 
 static void nvic_sysreg_write(void *opaque, hwaddr addr,
                               uint64_t value, unsigned size)
 {
-    nvic_state *s = (nvic_state *)opaque;
+    NVICState *s = (NVICState *)opaque;
     uint32_t offset = addr;
-    int i;
+    unsigned i, startvec, end;
+    unsigned setval = 0;
+
+    trace_nvic_sysreg_write(addr, value, size);
 
     switch (offset) {
+    case 0x100 ... 0x13f: /* NVIC Set enable */
+        offset += 0x80;
+        setval = 1;
+        /* fall through */
+    case 0x180 ... 0x1bf: /* NVIC Clear enable */
+        startvec = 8 * (offset - 0x180) + NVIC_FIRST_IRQ;
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (value & (1 << i)) {
+                s->vectors[startvec + i].enabled = setval;
+            }
+        }
+        nvic_irq_update(s);
+        return;
+    case 0x200 ... 0x23f: /* NVIC Set pend */
+        /* the special logic in armv7m_nvic_set_pending()
+         * is not needed since IRQs are never escalated
+         */
+        offset += 0x80;
+        setval = 1;
+        /* fall through */
+    case 0x280 ... 0x2bf: /* NVIC Clear pend */
+        startvec = 8 * (offset - 0x280) + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (value & (1 << i)) {
+                s->vectors[startvec + i].pending = setval;
+            }
+        }
+        nvic_irq_update(s);
+        return;
+    case 0x300 ... 0x33f: /* NVIC Active */
+        return; /* R/O */
+    case 0x400 ... 0x5ef: /* NVIC Priority */
+        startvec = 8 * (offset - 0x400) + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0; i < size && startvec + i < s->num_irq; i++) {
+            set_prio(s, startvec + i, (value >> (i * 8)) & 0xff);
+        }
+        nvic_irq_update(s);
+        return;
     case 0xd18 ... 0xd23: /* System Handler Priority.  */
         for (i = 0; i < size; i++) {
-            s->gic.priority1[(offset - 0xd14) + i][0] =
-                (value >> (i * 8)) & 0xff;
+            unsigned hdlidx = (offset - 0xd14) + i;
+            set_prio(s, hdlidx, (value >> (i * 8)) & 0xff);
         }
-        gic_update(&s->gic);
+        nvic_irq_update(s);
         return;
     }
     if (size == 4) {
@@ -481,61 +799,143 @@ static const MemoryRegionOps nvic_sysreg_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static const VMStateDescription vmstate_nvic = {
-    .name = "armv7m_nvic",
+static int nvic_post_load(void *opaque, int version_id)
+{
+    NVICState *s = opaque;
+    unsigned i;
+
+    /* Check for out of range priority settings */
+    if (s->vectors[ARMV7M_EXCP_RESET].prio != -3 ||
+        s->vectors[ARMV7M_EXCP_NMI].prio != -2 ||
+        s->vectors[ARMV7M_EXCP_HARD].prio != -1) {
+        return 1;
+    }
+    for (i = ARMV7M_EXCP_MEM; i < s->num_irq; i++) {
+        if (s->vectors[i].prio & ~0xff) {
+            return 1;
+        }
+    }
+
+    nvic_recompute_state(s);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_VecInfo = {
+    .name = "armv7m_nvic_info",
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32(systick.control, nvic_state),
-        VMSTATE_UINT32(systick.reload, nvic_state),
-        VMSTATE_INT64(systick.tick, nvic_state),
-        VMSTATE_TIMER_PTR(systick.timer, nvic_state),
+        VMSTATE_INT16(prio, VecInfo),
+        VMSTATE_UINT8(enabled, VecInfo),
+        VMSTATE_UINT8(pending, VecInfo),
+        VMSTATE_UINT8(active, VecInfo),
+        VMSTATE_UINT8(level, VecInfo),
         VMSTATE_END_OF_LIST()
     }
 };
 
+static const VMStateDescription vmstate_nvic = {
+    .name = "armv7m_nvic",
+    .version_id = 4,
+    .minimum_version_id = 4,
+    .post_load = &nvic_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_ARRAY(vectors, NVICState, NVIC_MAX_VECTORS, 1,
+                             vmstate_VecInfo, VecInfo),
+        VMSTATE_UINT32(prigroup, NVICState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property props_nvic[] = {
+    /* Number of external IRQ lines (so excluding the 16 internal exceptions) */
+    DEFINE_PROP_UINT32("num-irq", NVICState, num_irq, 64),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void armv7m_nvic_reset(DeviceState *dev)
 {
-    nvic_state *s = NVIC(dev);
-    NVICClass *nc = NVIC_GET_CLASS(s);
-    nc->parent_reset(dev);
-    /* Common GIC reset resets to disabled; the NVIC doesn't have
-     * per-CPU interfaces so mark our non-existent CPU interface
-     * as enabled by default, and with a priority mask which allows
-     * all interrupts through.
+    NVICState *s = NVIC(dev);
+
+    s->vectors[ARMV7M_EXCP_NMI].enabled = 1;
+    s->vectors[ARMV7M_EXCP_HARD].enabled = 1;
+    /* MEM, BUS, and USAGE are enabled through
+     * the System Handler Control register
+     */
+    s->vectors[ARMV7M_EXCP_SVC].enabled = 1;
+    s->vectors[ARMV7M_EXCP_DEBUG].enabled = 1;
+    s->vectors[ARMV7M_EXCP_PENDSV].enabled = 1;
+    s->vectors[ARMV7M_EXCP_SYSTICK].enabled = 1;
+
+    s->vectors[ARMV7M_EXCP_RESET].prio = -3;
+    s->vectors[ARMV7M_EXCP_NMI].prio = -2;
+    s->vectors[ARMV7M_EXCP_HARD].prio = -1;
+
+    /* Strictly speaking the reset handler should be enabled.
+     * However, we don't simulate soft resets through the NVIC,
+     * and the reset vector should never be pended.
+     * So we leave it disabled to catch logic errors.
      */
-    s->gic.cpu_ctlr[0] = GICC_CTLR_EN_GRP0;
-    s->gic.priority_mask[0] = 0x100;
-    /* The NVIC as a whole is always enabled. */
-    s->gic.ctlr = 1;
-    systick_reset(s);
+
+    s->exception_prio = NVIC_NOEXC_PRIO;
+    s->vectpending = 0;
+}
+
+static void nvic_systick_trigger(void *opaque, int n, int level)
+{
+    NVICState *s = opaque;
+
+    if (level) {
+        /* SysTick just asked us to pend its exception.
+         * (This is different from an external interrupt line's
+         * behaviour.)
+         */
+        armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
+    }
 }
 
 static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
 {
-    nvic_state *s = NVIC(dev);
-    NVICClass *nc = NVIC_GET_CLASS(s);
-    Error *local_err = NULL;
+    NVICState *s = NVIC(dev);
+    SysBusDevice *systick_sbd;
+    Error *err = NULL;
 
     s->cpu = ARM_CPU(qemu_get_cpu(0));
     assert(s->cpu);
-    /* The NVIC always has only one CPU */
-    s->gic.num_cpu = 1;
-    /* Tell the common code we're an NVIC */
-    s->gic.revision = 0xffffffff;
-    s->num_irq = s->gic.num_irq;
-    nc->parent_realize(dev, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+
+    if (s->num_irq > NVIC_MAX_IRQ) {
+        error_setg(errp, "num-irq %d exceeds NVIC maximum", s->num_irq);
+        return;
+    }
+
+    qdev_init_gpio_in(dev, set_irq_level, s->num_irq);
+
+    /* include space for internal exception vectors */
+    s->num_irq += NVIC_FIRST_IRQ;
+
+    object_property_set_bool(OBJECT(&s->systick), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
         return;
     }
-    gic_init_irqs_and_distributor(&s->gic);
-    /* The NVIC and system controller register area looks like this:
-     *  0..0xff : system control registers, including systick
-     *  0x100..0xcff : GIC-like registers
-     *  0xd00..0xfff : system control registers
-     * We use overlaying to put the GIC like registers
-     * over the top of the system control register region.
+    systick_sbd = SYS_BUS_DEVICE(&s->systick);
+    sysbus_connect_irq(systick_sbd, 0,
+                       qdev_get_gpio_in_named(dev, "systick-trigger", 0));
+
+    /* The NVIC and System Control Space (SCS) starts at 0xe000e000
+     * and looks like this:
+     *  0x004 - ICTR
+     *  0x010 - 0xff - systick
+     *  0x100..0x7ec - NVIC
+     *  0x7f0..0xcff - Reserved
+     *  0xd00..0xd3c - SCS registers
+     *  0xd40..0xeff - Reserved or Not implemented
+     *  0xf00 - STIR
+     *
+     * At the moment there is only one thing in the container region,
+     * but we leave it in place to allow us to pull systick out into
+     * its own device object later.
      */
     memory_region_init(&s->container, OBJECT(s), "nvic", 0x1000);
     /* The system register region goes at the bottom of the priority
@@ -544,19 +944,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s,
                           "nvic_sysregs", 0x1000);
     memory_region_add_subregion(&s->container, 0, &s->sysregmem);
-    /* Alias the GIC region so we can get only the section of it
-     * we need, and layer it on top of the system register region.
-     */
-    memory_region_init_alias(&s->gic_iomem_alias, OBJECT(s),
-                             "nvic-gic", &s->gic.iomem,
-                             0x100, 0xc00);
-    memory_region_add_subregion_overlap(&s->container, 0x100,
-                                        &s->gic_iomem_alias, 1);
-    /* Map the whole thing into system memory at the location required
-     * by the v7M architecture.
-     */
-    memory_region_add_subregion(get_system_memory(), 0xe000e000, &s->container);
-    s->systick.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
+    memory_region_add_subregion_overlap(&s->container, 0x10,
+                                        sysbus_mmio_get_region(systick_sbd, 0),
+                                        1);
+
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container);
 }
 
 static void armv7m_nvic_instance_init(Object *obj)
@@ -567,36 +959,35 @@ static void armv7m_nvic_instance_init(Object *obj)
      * any user-specified property setting, so just modify the
      * value in the GICState struct.
      */
-    GICState *s = ARM_GIC_COMMON(obj);
     DeviceState *dev = DEVICE(obj);
-    nvic_state *nvic = NVIC(obj);
-    /* The ARM v7m may have anything from 0 to 496 external interrupt
-     * IRQ lines. We default to 64. Other boards may differ and should
-     * set the num-irq property appropriately.
-     */
-    s->num_irq = 64;
+    NVICState *nvic = NVIC(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    object_initialize(&nvic->systick, sizeof(nvic->systick), TYPE_SYSTICK);
+    qdev_set_parent_bus(DEVICE(&nvic->systick), sysbus_get_default());
+
+    sysbus_init_irq(sbd, &nvic->excpout);
     qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1);
+    qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", 1);
 }
 
 static void armv7m_nvic_class_init(ObjectClass *klass, void *data)
 {
-    NVICClass *nc = NVIC_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    nc->parent_reset = dc->reset;
-    nc->parent_realize = dc->realize;
     dc->vmsd  = &vmstate_nvic;
+    dc->props = props_nvic;
     dc->reset = armv7m_nvic_reset;
     dc->realize = armv7m_nvic_realize;
 }
 
 static const TypeInfo armv7m_nvic_info = {
     .name          = TYPE_NVIC,
-    .parent        = TYPE_ARM_GIC_COMMON,
+    .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_init = armv7m_nvic_instance_init,
-    .instance_size = sizeof(nvic_state),
+    .instance_size = sizeof(NVICState),
     .class_init    = armv7m_nvic_class_init,
-    .class_size    = sizeof(NVICClass),
+    .class_size    = sizeof(SysBusDeviceClass),
 };
 
 static void armv7m_nvic_register_types(void)
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 3f311740da..7fe87b13de 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -25,9 +25,7 @@
 
 #define ALL_CPU_MASK ((unsigned)(((1 << GIC_NCPU) - 1)))
 
-/* The NVIC has 16 internal vectors.  However these are not exposed
-   through the normal GIC interface.  */
-#define GIC_BASE_IRQ ((s->revision == REV_NVIC) ? 32 : 0)
+#define GIC_BASE_IRQ 0
 
 #define GIC_SET_ENABLED(irq, cm) s->irq_state[irq].enabled |= (cm)
 #define GIC_CLEAR_ENABLED(irq, cm) s->irq_state[irq].enabled &= ~(cm)
@@ -75,7 +73,6 @@
 
 /* The special cases for the revision property: */
 #define REV_11MPCORE 0
-#define REV_NVIC 0xffffffff
 
 void gic_set_pending_private(GICState *s, int cpu, int irq);
 uint32_t gic_acknowledge_irq(GICState *s, int cpu, MemTxAttrs attrs);
@@ -87,7 +84,7 @@ void gic_set_priority(GICState *s, int cpu, int irq, uint8_t val,
 
 static inline bool gic_test_pending(GICState *s, int irq, int cm)
 {
-    if (s->revision == REV_NVIC || s->revision == REV_11MPCORE) {
+    if (s->revision == REV_11MPCORE) {
         return s->irq_state[irq].pending & cm;
     } else {
         /* Edge-triggered interrupts are marked pending on a rising edge, but
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index aeb801d133..05303a55c8 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -138,6 +138,7 @@
 #define ICC_CTLR_EL1_EOIMODE        (1U << 1)
 #define ICC_CTLR_EL1_PMHE           (1U << 6)
 #define ICC_CTLR_EL1_PRIBITS_SHIFT 8
+#define ICC_CTLR_EL1_PRIBITS_MASK   (7U << ICC_CTLR_EL1_PRIBITS_SHIFT)
 #define ICC_CTLR_EL1_IDBITS_SHIFT 11
 #define ICC_CTLR_EL1_SEIS           (1U << 14)
 #define ICC_CTLR_EL1_A3V            (1U << 15)
@@ -407,4 +408,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s)
     }
 }
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s);
+
 #endif /* QEMU_ARM_GICV3_INTERNAL_H */
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 39a538d048..729c1288f1 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -161,3 +161,18 @@ gicv3_redist_write(uint32_t cpu, uint64_t offset, uint64_t data, unsigned size,
 gicv3_redist_badwrite(uint32_t cpu, uint64_t offset, uint64_t data, unsigned size, bool secure) "GICv3 redistributor %x write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u secure %d: error"
 gicv3_redist_set_irq(uint32_t cpu, int irq, int level) "GICv3 redistributor %x interrupt %d level changed to %d"
 gicv3_redist_send_sgi(uint32_t cpu, int irq) "GICv3 redistributor %x pending SGI %d"
+
+# hw/intc/armv7m_nvic.c
+nvic_recompute_state(int vectpending, int exception_prio) "NVIC state recomputed: vectpending %d exception_prio %d"
+nvic_set_prio(int irq, uint8_t prio) "NVIC set irq %d priority %d"
+nvic_irq_update(int vectpending, int pendprio, int exception_prio, int level) "NVIC vectpending %d pending prio %d exception_prio %d: setting irq line to %d"
+nvic_escalate_prio(int irq, int irqprio, int runprio) "NVIC escalating irq %d to HardFault: insufficient priority %d >= %d"
+nvic_escalate_disabled(int irq) "NVIC escalating irq %d to HardFault: disabled"
+nvic_set_pending(int irq, int en, int prio) "NVIC set pending irq %d (enabled: %d priority %d)"
+nvic_clear_pending(int irq, int en, int prio) "NVIC clear pending irq %d (enabled: %d priority %d)"
+nvic_set_pending_level(int irq) "NVIC set pending: irq %d higher prio than vectpending: setting irq line to 1"
+nvic_acknowledge_irq(int irq, int prio) "NVIC acknowledge IRQ: %d now active (prio %d)"
+nvic_complete_irq(int irq) "NVIC complete IRQ %d"
+nvic_set_irq_level(int irq, int level) "NVIC external irq %d level set to %d"
+nvic_sysreg_read(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
+nvic_sysreg_write(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 095c16a300..ffc0747c7f 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -49,40 +49,41 @@ int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
     return -1;
 }
 
-void xics_cpu_destroy(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *ss = &xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(xi, cs->cpu_index);
 
-    assert(cs->cpu_index < xics->nr_servers);
-    assert(cs == ss->cs);
+    assert(icp);
+    assert(cs == icp->cs);
 
-    ss->output = NULL;
-    ss->cs = NULL;
+    icp->output = NULL;
+    icp->cs = NULL;
 }
 
-void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    ICPState *ss = &xics->ss[cs->cpu_index];
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
+    ICPState *icp = xics_icp_get(xi, cs->cpu_index);
+    ICPStateClass *icpc;
 
-    assert(cs->cpu_index < xics->nr_servers);
+    assert(icp);
 
-    ss->cs = cs;
+    icp->cs = cs;
 
-    if (info->cpu_setup) {
-        info->cpu_setup(xics, cpu);
+    icpc = ICP_GET_CLASS(icp);
+    if (icpc->cpu_setup) {
+        icpc->cpu_setup(icp, cpu);
     }
 
     switch (PPC_INPUT(env)) {
     case PPC_FLAGS_INPUT_POWER7:
-        ss->output = env->irq_inputs[POWER7_INPUT_INT];
+        icp->output = env->irq_inputs[POWER7_INPUT_INT];
         break;
 
     case PPC_FLAGS_INPUT_970:
-        ss->output = env->irq_inputs[PPC970_INPUT_INT];
+        icp->output = env->irq_inputs[PPC970_INPUT_INT];
         break;
 
     default:
@@ -92,185 +93,43 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
     }
 }
 
-static void xics_common_pic_print_info(InterruptStatsProvider *obj,
-                                       Monitor *mon)
+void icp_pic_print_info(ICPState *icp, Monitor *mon)
 {
-    XICSState *xics = XICS_COMMON(obj);
-    ICSState *ics;
-    uint32_t i;
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        ICPState *icp = &xics->ss[i];
-
-        if (!icp->output) {
-            continue;
-        }
-        monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
-                       i, icp->xirr, icp->xirr_owner,
-                       icp->pending_priority, icp->mfrr);
-    }
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        monitor_printf(mon, "ICS %4x..%4x %p\n",
-                       ics->offset, ics->offset + ics->nr_irqs - 1, ics);
-
-        if (!ics->irqs) {
-            continue;
-        }
-
-        for (i = 0; i < ics->nr_irqs; i++) {
-            ICSIRQState *irq = ics->irqs + i;
-
-            if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
-                continue;
-            }
-            monitor_printf(mon, "  %4x %s %02x %02x\n",
-                           ics->offset + i,
-                           (irq->flags & XICS_FLAGS_IRQ_LSI) ?
-                           "LSI" : "MSI",
-                           irq->priority, irq->status);
-        }
-    }
-}
-
-/*
- * XICS Common class - parent for emulated XICS and KVM-XICS
- */
-static void xics_common_reset(DeviceState *d)
-{
-    XICSState *xics = XICS_COMMON(d);
-    ICSState *ics;
-    int i;
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        device_reset(DEVICE(&xics->ss[i]));
-    }
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        device_reset(DEVICE(ics));
-    }
-}
-
-static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name,
-                                  void *opaque, Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    int64_t value = xics->nr_irqs;
+    int cpu_index = icp->cs ? icp->cs->cpu_index : -1;
 
-    visit_type_int(v, name, &value, errp);
-}
-
-static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name,
-                                  void *opaque, Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
-    Error *error = NULL;
-    int64_t value;
-
-    visit_type_int(v, name, &value, &error);
-    if (error) {
-        error_propagate(errp, error);
+    if (!icp->output) {
         return;
     }
-    if (xics->nr_irqs) {
-        error_setg(errp, "Number of interrupts is already set to %u",
-                   xics->nr_irqs);
-        return;
-    }
-
-    assert(info->set_nr_irqs);
-    info->set_nr_irqs(xics, value, errp);
-}
-
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                         const char *typename, Error **errp)
-{
-    int i;
-
-    xics->nr_servers = nr_servers;
-
-    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
-    for (i = 0; i < xics->nr_servers; i++) {
-        char name[32];
-        ICPState *icp = &xics->ss[i];
-
-        object_initialize(icp, sizeof(*icp), typename);
-        snprintf(name, sizeof(name), "icp[%d]", i);
-        object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp);
-        icp->xics = xics;
-    }
+    monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
+                   cpu_index, icp->xirr, icp->xirr_owner,
+                   icp->pending_priority, icp->mfrr);
 }
 
-static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
-                                     const char *name, void *opaque,
-                                     Error **errp)
+void ics_pic_print_info(ICSState *ics, Monitor *mon)
 {
-    XICSState *xics = XICS_COMMON(obj);
-    int64_t value = xics->nr_servers;
-
-    visit_type_int(v, name, &value, errp);
-}
+    uint32_t i;
 
-static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
-                                     const char *name, void *opaque,
-                                     Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics);
-    Error *error = NULL;
-    int64_t value;
+    monitor_printf(mon, "ICS %4x..%4x %p\n",
+                   ics->offset, ics->offset + ics->nr_irqs - 1, ics);
 
-    visit_type_int(v, name, &value, &error);
-    if (error) {
-        error_propagate(errp, error);
+    if (!ics->irqs) {
         return;
     }
-    if (xics->nr_servers) {
-        error_setg(errp, "Number of servers is already set to %u",
-                   xics->nr_servers);
-        return;
-    }
-
-    assert(xsc->set_nr_servers);
-    xsc->set_nr_servers(xics, value, errp);
-}
-
-static void xics_common_initfn(Object *obj)
-{
-    XICSState *xics = XICS_COMMON(obj);
 
-    QLIST_INIT(&xics->ics);
-    object_property_add(obj, "nr_irqs", "int",
-                        xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
-                        NULL, NULL, NULL);
-    object_property_add(obj, "nr_servers", "int",
-                        xics_prop_get_nr_servers, xics_prop_set_nr_servers,
-                        NULL, NULL, NULL);
-}
-
-static void xics_common_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+    for (i = 0; i < ics->nr_irqs; i++) {
+        ICSIRQState *irq = ics->irqs + i;
 
-    dc->reset = xics_common_reset;
-    ic->print_info = xics_common_pic_print_info;
+        if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+            continue;
+        }
+        monitor_printf(mon, "  %4x %s %02x %02x\n",
+                       ics->offset + i,
+                       (irq->flags & XICS_FLAGS_IRQ_LSI) ?
+                       "LSI" : "MSI",
+                       irq->priority, irq->status);
+    }
 }
 
-static const TypeInfo xics_common_info = {
-    .name          = TYPE_XICS_COMMON,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(XICSState),
-    .class_size    = sizeof(XICSStateClass),
-    .instance_init = xics_common_initfn,
-    .class_init    = xics_common_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_INTERRUPT_STATS_PROVIDER },
-        { }
-    },
-};
-
 /*
  * ICP: Presentation layer
  */
@@ -278,8 +137,8 @@ static const TypeInfo xics_common_info = {
 #define XISR_MASK  0x00ffffff
 #define CPPR_MASK  0xff000000
 
-#define XISR(ss)   (((ss)->xirr) & XISR_MASK)
-#define CPPR(ss)   (((ss)->xirr) >> 24)
+#define XISR(icp)   (((icp)->xirr) & XISR_MASK)
+#define CPPR(icp)   (((icp)->xirr) >> 24)
 
 static void ics_reject(ICSState *ics, uint32_t nr)
 {
@@ -290,7 +149,7 @@ static void ics_reject(ICSState *ics, uint32_t nr)
     }
 }
 
-static void ics_resend(ICSState *ics)
+void ics_resend(ICSState *ics)
 {
     ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
 
@@ -308,151 +167,152 @@ static void ics_eoi(ICSState *ics, int nr)
     }
 }
 
-static void icp_check_ipi(ICPState *ss)
+static void icp_check_ipi(ICPState *icp)
 {
-    if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+    if (XISR(icp) && (icp->pending_priority <= icp->mfrr)) {
         return;
     }
 
-    trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
+    trace_xics_icp_check_ipi(icp->cs->cpu_index, icp->mfrr);
 
-    if (XISR(ss) && ss->xirr_owner) {
-        ics_reject(ss->xirr_owner, XISR(ss));
+    if (XISR(icp) && icp->xirr_owner) {
+        ics_reject(icp->xirr_owner, XISR(icp));
     }
 
-    ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
-    ss->pending_priority = ss->mfrr;
-    ss->xirr_owner = NULL;
-    qemu_irq_raise(ss->output);
+    icp->xirr = (icp->xirr & ~XISR_MASK) | XICS_IPI;
+    icp->pending_priority = icp->mfrr;
+    icp->xirr_owner = NULL;
+    qemu_irq_raise(icp->output);
 }
 
-static void icp_resend(ICPState *ss)
+void icp_resend(ICPState *icp)
 {
-    ICSState *ics;
+    XICSFabric *xi = icp->xics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
 
-    if (ss->mfrr < CPPR(ss)) {
-        icp_check_ipi(ss);
-    }
-    QLIST_FOREACH(ics, &ss->xics->ics, list) {
-        ics_resend(ics);
+    if (icp->mfrr < CPPR(icp)) {
+        icp_check_ipi(icp);
     }
+
+    xic->ics_resend(xi);
 }
 
-void icp_set_cppr(ICPState *ss, uint8_t cppr)
+void icp_set_cppr(ICPState *icp, uint8_t cppr)
 {
     uint8_t old_cppr;
     uint32_t old_xisr;
 
-    old_cppr = CPPR(ss);
-    ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+    old_cppr = CPPR(icp);
+    icp->xirr = (icp->xirr & ~CPPR_MASK) | (cppr << 24);
 
     if (cppr < old_cppr) {
-        if (XISR(ss) && (cppr <= ss->pending_priority)) {
-            old_xisr = XISR(ss);
-            ss->xirr &= ~XISR_MASK; /* Clear XISR */
-            ss->pending_priority = 0xff;
-            qemu_irq_lower(ss->output);
-            if (ss->xirr_owner) {
-                ics_reject(ss->xirr_owner, old_xisr);
-                ss->xirr_owner = NULL;
+        if (XISR(icp) && (cppr <= icp->pending_priority)) {
+            old_xisr = XISR(icp);
+            icp->xirr &= ~XISR_MASK; /* Clear XISR */
+            icp->pending_priority = 0xff;
+            qemu_irq_lower(icp->output);
+            if (icp->xirr_owner) {
+                ics_reject(icp->xirr_owner, old_xisr);
+                icp->xirr_owner = NULL;
             }
         }
     } else {
-        if (!XISR(ss)) {
-            icp_resend(ss);
+        if (!XISR(icp)) {
+            icp_resend(icp);
         }
     }
 }
 
-void icp_set_mfrr(ICPState *ss, uint8_t mfrr)
+void icp_set_mfrr(ICPState *icp, uint8_t mfrr)
 {
-    ss->mfrr = mfrr;
-    if (mfrr < CPPR(ss)) {
-        icp_check_ipi(ss);
+    icp->mfrr = mfrr;
+    if (mfrr < CPPR(icp)) {
+        icp_check_ipi(icp);
     }
 }
 
-uint32_t icp_accept(ICPState *ss)
+uint32_t icp_accept(ICPState *icp)
 {
-    uint32_t xirr = ss->xirr;
+    uint32_t xirr = icp->xirr;
 
-    qemu_irq_lower(ss->output);
-    ss->xirr = ss->pending_priority << 24;
-    ss->pending_priority = 0xff;
-    ss->xirr_owner = NULL;
+    qemu_irq_lower(icp->output);
+    icp->xirr = icp->pending_priority << 24;
+    icp->pending_priority = 0xff;
+    icp->xirr_owner = NULL;
 
-    trace_xics_icp_accept(xirr, ss->xirr);
+    trace_xics_icp_accept(xirr, icp->xirr);
 
     return xirr;
 }
 
-uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
+uint32_t icp_ipoll(ICPState *icp, uint32_t *mfrr)
 {
     if (mfrr) {
-        *mfrr = ss->mfrr;
+        *mfrr = icp->mfrr;
     }
-    return ss->xirr;
+    return icp->xirr;
 }
 
-void icp_eoi(ICPState *ss, uint32_t xirr)
+void icp_eoi(ICPState *icp, uint32_t xirr)
 {
+    XICSFabric *xi = icp->xics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
     ICSState *ics;
     uint32_t irq;
 
     /* Send EOI -> ICS */
-    ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
-    trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr);
+    icp->xirr = (icp->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+    trace_xics_icp_eoi(icp->cs->cpu_index, xirr, icp->xirr);
     irq = xirr & XISR_MASK;
-    QLIST_FOREACH(ics, &ss->xics->ics, list) {
-        if (ics_valid_irq(ics, irq)) {
-            ics_eoi(ics, irq);
-        }
+
+    ics = xic->ics_get(xi, irq);
+    if (ics) {
+        ics_eoi(ics, irq);
     }
-    if (!XISR(ss)) {
-        icp_resend(ss);
+    if (!XISR(icp)) {
+        icp_resend(icp);
     }
 }
 
 static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
 {
-    XICSState *xics = ics->xics;
-    ICPState *ss = xics->ss + server;
+    ICPState *icp = xics_icp_get(ics->xics, server);
 
     trace_xics_icp_irq(server, nr, priority);
 
-    if ((priority >= CPPR(ss))
-        || (XISR(ss) && (ss->pending_priority <= priority))) {
+    if ((priority >= CPPR(icp))
+        || (XISR(icp) && (icp->pending_priority <= priority))) {
         ics_reject(ics, nr);
     } else {
-        if (XISR(ss) && ss->xirr_owner) {
-            ics_reject(ss->xirr_owner, XISR(ss));
-            ss->xirr_owner = NULL;
+        if (XISR(icp) && icp->xirr_owner) {
+            ics_reject(icp->xirr_owner, XISR(icp));
+            icp->xirr_owner = NULL;
         }
-        ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
-        ss->xirr_owner = ics;
-        ss->pending_priority = priority;
-        trace_xics_icp_raise(ss->xirr, ss->pending_priority);
-        qemu_irq_raise(ss->output);
+        icp->xirr = (icp->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+        icp->xirr_owner = ics;
+        icp->pending_priority = priority;
+        trace_xics_icp_raise(icp->xirr, icp->pending_priority);
+        qemu_irq_raise(icp->output);
     }
 }
 
 static void icp_dispatch_pre_save(void *opaque)
 {
-    ICPState *ss = opaque;
-    ICPStateClass *info = ICP_GET_CLASS(ss);
+    ICPState *icp = opaque;
+    ICPStateClass *info = ICP_GET_CLASS(icp);
 
     if (info->pre_save) {
-        info->pre_save(ss);
+        info->pre_save(icp);
     }
 }
 
 static int icp_dispatch_post_load(void *opaque, int version_id)
 {
-    ICPState *ss = opaque;
-    ICPStateClass *info = ICP_GET_CLASS(ss);
+    ICPState *icp = opaque;
+    ICPStateClass *info = ICP_GET_CLASS(icp);
 
     if (info->post_load) {
-        return info->post_load(ss, version_id);
+        return info->post_load(icp, version_id);
     }
 
     return 0;
@@ -485,12 +345,30 @@ static void icp_reset(DeviceState *dev)
     qemu_set_irq(icp->output, 0);
 }
 
+static void icp_realize(DeviceState *dev, Error **errp)
+{
+    ICPState *icp = ICP(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xics", &err);
+    if (!obj) {
+        error_setg(errp, "%s: required link 'xics' not found: %s",
+                   __func__, error_get_pretty(err));
+        return;
+    }
+
+    icp->xics = XICS_FABRIC(obj);
+}
+
+
 static void icp_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->reset = icp_reset;
     dc->vmsd = &vmstate_icp_server;
+    dc->realize = icp_realize;
 }
 
 static const TypeInfo icp_info = {
@@ -663,17 +541,6 @@ static void ics_simple_reset(DeviceState *dev)
     }
 }
 
-static int ics_simple_post_load(ICSState *ics, int version_id)
-{
-    int i;
-
-    for (i = 0; i < ics->xics->nr_servers; i++) {
-        icp_resend(&ics->xics->ss[i]);
-    }
-
-    return 0;
-}
-
 static void ics_simple_dispatch_pre_save(void *opaque)
 {
     ICSState *ics = opaque;
@@ -746,15 +613,20 @@ static void ics_simple_realize(DeviceState *dev, Error **errp)
     ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
 }
 
+static Property ics_simple_properties[] = {
+    DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void ics_simple_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICSStateClass *isc = ICS_BASE_CLASS(klass);
 
-    dc->realize = ics_simple_realize;
+    isc->realize = ics_simple_realize;
+    dc->props = ics_simple_properties;
     dc->vmsd = &vmstate_ics_simple;
     dc->reset = ics_simple_reset;
-    isc->post_load = ics_simple_post_load;
     isc->reject = ics_simple_reject;
     isc->resend = ics_simple_resend;
     isc->eoi = ics_simple_eoi;
@@ -769,38 +641,69 @@ static const TypeInfo ics_simple_info = {
     .instance_init = ics_simple_initfn,
 };
 
+static void ics_base_realize(DeviceState *dev, Error **errp)
+{
+    ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
+    ICSState *ics = ICS_BASE(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xics", &err);
+    if (!obj) {
+        error_setg(errp, "%s: required link 'xics' not found: %s",
+                   __func__, error_get_pretty(err));
+        return;
+    }
+    ics->xics = XICS_FABRIC(obj);
+
+
+    if (icsc->realize) {
+        icsc->realize(dev, errp);
+    }
+}
+
+static void ics_base_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = ics_base_realize;
+}
+
 static const TypeInfo ics_base_info = {
     .name = TYPE_ICS_BASE,
     .parent = TYPE_DEVICE,
     .abstract = true,
     .instance_size = sizeof(ICSState),
+    .class_init = ics_base_class_init,
     .class_size = sizeof(ICSStateClass),
 };
 
+static const TypeInfo xics_fabric_info = {
+    .name = TYPE_XICS_FABRIC,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(XICSFabricClass),
+};
+
 /*
  * Exported functions
  */
-ICSState *xics_find_source(XICSState *xics, int irq)
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq)
 {
-    ICSState *ics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
+    ICSState *ics = xic->ics_get(xi, irq);
 
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        if (ics_valid_irq(ics, irq)) {
-            return ics;
-        }
+    if (ics) {
+        return ics->qirqs[irq - ics->offset];
     }
+
     return NULL;
 }
 
-qemu_irq xics_get_qirq(XICSState *xics, int irq)
+ICPState *xics_icp_get(XICSFabric *xi, int server)
 {
-    ICSState *ics = xics_find_source(xics, irq);
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
 
-    if (ics) {
-        return ics->qirqs[irq - ics->offset];
-    }
-
-    return NULL;
+    return xic->icp_get(xi, server);
 }
 
 void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
@@ -813,10 +716,10 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
 
 static void xics_register_types(void)
 {
-    type_register_static(&xics_common_info);
     type_register_static(&ics_simple_info);
     type_register_static(&ics_base_info);
     type_register_static(&icp_info);
+    type_register_static(&xics_fabric_info);
 }
 
 type_init(xics_register_types)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 17694eaa87..0a3daca3bb 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -40,16 +40,12 @@
 
 #include <sys/ioctl.h>
 
-typedef struct KVMXICSState {
-    XICSState parent_obj;
-
-    int kernel_xics_fd;
-} KVMXICSState;
+static int kernel_xics_fd = -1;
 
 /*
  * ICP-KVM
  */
-static void icp_get_kvm_state(ICPState *ss)
+static void icp_get_kvm_state(ICPState *icp)
 {
     uint64_t state;
     struct kvm_one_reg reg = {
@@ -59,25 +55,25 @@ static void icp_get_kvm_state(ICPState *ss)
     int ret;
 
     /* ICP for this CPU thread is not in use, exiting */
-    if (!ss->cs) {
+    if (!icp->cs) {
         return;
     }
 
-    ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, &reg);
+    ret = kvm_vcpu_ioctl(icp->cs, KVM_GET_ONE_REG, &reg);
     if (ret != 0) {
         error_report("Unable to retrieve KVM interrupt controller state"
-                " for CPU %ld: %s", kvm_arch_vcpu_id(ss->cs), strerror(errno));
+                " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno));
         exit(1);
     }
 
-    ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
-    ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
+    icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
+    icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
         & KVM_REG_PPC_ICP_MFRR_MASK;
-    ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
+    icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
         & KVM_REG_PPC_ICP_PPRI_MASK;
 }
 
-static int icp_set_kvm_state(ICPState *ss, int version_id)
+static int icp_set_kvm_state(ICPState *icp, int version_id)
 {
     uint64_t state;
     struct kvm_one_reg reg = {
@@ -87,18 +83,18 @@ static int icp_set_kvm_state(ICPState *ss, int version_id)
     int ret;
 
     /* ICP for this CPU thread is not in use, exiting */
-    if (!ss->cs) {
+    if (!icp->cs) {
         return 0;
     }
 
-    state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
-        | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
-        | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
+    state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
+        | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
+        | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
 
-    ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, &reg);
+    ret = kvm_vcpu_ioctl(icp->cs, KVM_SET_ONE_REG, &reg);
     if (ret != 0) {
         error_report("Unable to restore KVM interrupt controller state (0x%"
-                PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(ss->cs),
+                PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(icp->cs),
                 strerror(errno));
         return ret;
     }
@@ -122,6 +118,34 @@ static void icp_kvm_reset(DeviceState *dev)
     icp_set_kvm_state(icp, 1);
 }
 
+static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    int ret;
+
+    if (kernel_xics_fd == -1) {
+        abort();
+    }
+
+    /*
+     * If we are reusing a parked vCPU fd corresponding to the CPU
+     * which was hot-removed earlier we don't have to renable
+     * KVM_CAP_IRQ_XICS capability again.
+     */
+    if (icp->cap_irq_xics_enabled) {
+        return;
+    }
+
+    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
+                              kvm_arch_vcpu_id(cs));
+    if (ret < 0) {
+        error_report("Unable to connect CPU%ld to kernel XICS: %s",
+                     kvm_arch_vcpu_id(cs), strerror(errno));
+        exit(1);
+    }
+    icp->cap_irq_xics_enabled = true;
+}
+
 static void icp_kvm_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -130,6 +154,7 @@ static void icp_kvm_class_init(ObjectClass *klass, void *data)
     dc->reset = icp_kvm_reset;
     icpc->pre_save = icp_get_kvm_state;
     icpc->post_load = icp_set_kvm_state;
+    icpc->cpu_setup = icp_kvm_cpu_setup;
 }
 
 static const TypeInfo icp_kvm_info = {
@@ -145,7 +170,6 @@ static const TypeInfo icp_kvm_info = {
  */
 static void ics_get_kvm_state(ICSState *ics)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
     uint64_t state;
     struct kvm_device_attr attr = {
         .flags = 0,
@@ -160,7 +184,7 @@ static void ics_get_kvm_state(ICSState *ics)
 
         attr.attr = i + ics->offset;
 
-        ret = ioctl(xicskvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
+        ret = ioctl(kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
         if (ret != 0) {
             error_report("Unable to retrieve KVM interrupt controller state"
                     " for IRQ %d: %s", i + ics->offset, strerror(errno));
@@ -204,7 +228,6 @@ static void ics_get_kvm_state(ICSState *ics)
 
 static int ics_set_kvm_state(ICSState *ics, int version_id)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
     uint64_t state;
     struct kvm_device_attr attr = {
         .flags = 0,
@@ -238,7 +261,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
             }
         }
 
-        ret = ioctl(xicskvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
+        ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
         if (ret != 0) {
             error_report("Unable to restore KVM interrupt controller state"
                     " for IRQs %d: %s", i + ics->offset, strerror(errno));
@@ -308,7 +331,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICSStateClass *icsc = ICS_BASE_CLASS(klass);
 
-    dc->realize = ics_kvm_realize;
+    icsc->realize = ics_kvm_realize;
     dc->reset = ics_kvm_reset;
     icsc->pre_save = ics_get_kvm_state;
     icsc->post_load = ics_set_kvm_state;
@@ -324,57 +347,6 @@ static const TypeInfo ics_kvm_info = {
 /*
  * XICS-KVM
  */
-static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
-{
-    CPUState *cs;
-    ICPState *ss;
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(xics);
-    int ret;
-
-    cs = CPU(cpu);
-    ss = &xics->ss[cs->cpu_index];
-
-    assert(cs->cpu_index < xics->nr_servers);
-    if (xicskvm->kernel_xics_fd == -1) {
-        abort();
-    }
-
-    /*
-     * If we are reusing a parked vCPU fd corresponding to the CPU
-     * which was hot-removed earlier we don't have to renable
-     * KVM_CAP_IRQ_XICS capability again.
-     */
-    if (ss->cap_irq_xics_enabled) {
-        return;
-    }
-
-    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, xicskvm->kernel_xics_fd,
-                              kvm_arch_vcpu_id(cs));
-    if (ret < 0) {
-        error_report("Unable to connect CPU%ld to kernel XICS: %s",
-                     kvm_arch_vcpu_id(cs), strerror(errno));
-        exit(1);
-    }
-    ss->cap_irq_xics_enabled = true;
-}
-
-static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
-                                 Error **errp)
-{
-    ICSState *ics = QLIST_FIRST(&xics->ics);
-
-    /* This needs to be deprecated ... */
-    xics->nr_irqs = nr_irqs;
-    if (ics) {
-        ics->nr_irqs = nr_irqs;
-    }
-}
-
-static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                                    Error **errp)
-{
-    xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp);
-}
 
 static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                        uint32_t token,
@@ -385,13 +357,9 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                  __func__);
 }
 
-static void xics_kvm_realize(DeviceState *dev, Error **errp)
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev);
-    XICSState *xics = XICS_COMMON(dev);
-    ICSState *ics;
-    int i, rc;
-    Error *error = NULL;
+    int rc;
     struct kvm_create_device xics_create_device = {
         .type = KVM_DEV_TYPE_XICS,
         .flags = 0,
@@ -439,72 +407,24 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
         goto fail;
     }
 
-    xicskvm->kernel_xics_fd = xics_create_device.fd;
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        object_property_set_bool(OBJECT(ics), true, "realized", &error);
-        if (error) {
-            error_propagate(errp, error);
-            goto fail;
-        }
-    }
-
-    assert(xics->nr_servers);
-    for (i = 0; i < xics->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
-                                 &error);
-        if (error) {
-            error_propagate(errp, error);
-            goto fail;
-        }
-    }
+    kernel_xics_fd = xics_create_device.fd;
 
     kvm_kernel_irqchip = true;
     kvm_msi_via_irqfd_allowed = true;
     kvm_gsi_direct_mapping = true;
 
-    return;
+    return rc;
 
 fail:
     kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
     kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
     kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
     kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
+    return -1;
 }
 
-static void xics_kvm_initfn(Object *obj)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    ICSState *ics;
-
-    ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM));
-    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
-    ics->xics = xics;
-    QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_kvm_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    XICSStateClass *xsc = XICS_COMMON_CLASS(oc);
-
-    dc->realize = xics_kvm_realize;
-    xsc->cpu_setup = xics_kvm_cpu_setup;
-    xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
-    xsc->set_nr_servers = xics_kvm_set_nr_servers;
-}
-
-static const TypeInfo xics_spapr_kvm_info = {
-    .name          = TYPE_XICS_SPAPR_KVM,
-    .parent        = TYPE_XICS_COMMON,
-    .instance_size = sizeof(KVMXICSState),
-    .class_init    = xics_kvm_class_init,
-    .instance_init = xics_kvm_initfn,
-};
-
 static void xics_kvm_register_types(void)
 {
-    type_register_static(&xics_spapr_kvm_info);
     type_register_static(&ics_kvm_info);
     type_register_static(&icp_kvm_info);
 }
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e3f1c5e95..84d24b2837 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -44,7 +44,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     target_ulong cppr = args[0];
 
     icp_set_cppr(icp, cppr);
@@ -56,12 +56,13 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     target_ulong server = xics_get_cpu_index_by_dt_id(args[0]);
     target_ulong mfrr = args[1];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), server);
 
-    if (server >= spapr->xics->nr_servers) {
+    if (!icp) {
         return H_PARAMETER;
     }
 
-    icp_set_mfrr(spapr->xics->ss + server, mfrr);
+    icp_set_mfrr(icp, mfrr);
     return H_SUCCESS;
 }
 
@@ -69,7 +70,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
@@ -80,7 +81,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
@@ -92,7 +93,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     target_ulong xirr = args[0];
 
     icp_eoi(icp, xirr);
@@ -103,7 +104,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t mfrr;
     uint32_t xirr = icp_ipoll(icp, &mfrr);
 
@@ -118,7 +119,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno, server, priority;
 
     if ((nargs != 3) || (nret != 1)) {
@@ -134,7 +135,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1));
     priority = rtas_ld(args, 2);
 
-    if (!ics_valid_irq(ics, nr) || (server >= ics->xics->nr_servers)
+    if (!ics_valid_irq(ics, nr) || !xics_icp_get(XICS_FABRIC(spapr), server)
         || (priority > 0xff)) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
@@ -151,7 +152,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 3)) {
@@ -181,7 +182,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t nargs, target_ulong args,
                          uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 1)) {
@@ -212,7 +213,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                         uint32_t nargs, target_ulong args,
                         uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 1)) {
@@ -239,36 +240,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
-                                   Error **errp)
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
-
-    /* This needs to be deprecated ... */
-    xics->nr_irqs = nr_irqs;
-    if (ics) {
-        ics->nr_irqs = nr_irqs;
-    }
-}
-
-static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                                      Error **errp)
-{
-    xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp);
-}
-
-static void xics_spapr_realize(DeviceState *dev, Error **errp)
-{
-    XICSState *xics = XICS_SPAPR(dev);
-    ICSState *ics;
-    Error *error = NULL;
-    int i;
-
-    if (!xics->nr_servers) {
-        error_setg(errp, "Number of servers needs to be greater 0");
-        return;
-    }
-
     /* Registration of global state belongs into realize */
     spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
     spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
@@ -281,55 +254,9 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
     spapr_register_hypercall(H_XIRR_X, h_xirr_x);
     spapr_register_hypercall(H_EOI, h_eoi);
     spapr_register_hypercall(H_IPOLL, h_ipoll);
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        object_property_set_bool(OBJECT(ics), true, "realized", &error);
-        if (error) {
-            error_propagate(errp, error);
-            return;
-        }
-    }
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
-                                 &error);
-        if (error) {
-            error_propagate(errp, error);
-            return;
-        }
-    }
-}
-
-static void xics_spapr_initfn(Object *obj)
-{
-    XICSState *xics = XICS_SPAPR(obj);
-    ICSState *ics;
-
-    ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE));
-    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
-    ics->xics = xics;
-    QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_spapr_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
-
-    dc->realize = xics_spapr_realize;
-    xsc->set_nr_irqs = xics_spapr_set_nr_irqs;
-    xsc->set_nr_servers = xics_spapr_set_nr_servers;
+    return 0;
 }
 
-static const TypeInfo xics_spapr_info = {
-    .name          = TYPE_XICS_SPAPR,
-    .parent        = TYPE_XICS_COMMON,
-    .instance_size = sizeof(XICSState),
-    .class_size = sizeof(XICSStateClass),
-    .class_init    = xics_spapr_class_init,
-    .instance_init = xics_spapr_initfn,
-};
-
 #define ICS_IRQ_FREE(ics, srcno)   \
     (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
 
@@ -354,9 +281,8 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
     return -1;
 }
 
-int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
     int irq;
 
     if (!ics) {
@@ -387,10 +313,9 @@ int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
  * Allocate block of consecutive IRQs, and return the number of the first IRQ in
  * the block. If align==true, aligns the first IRQ number to num.
  */
-int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align,
-                           Error **errp)
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
+                          bool align, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
     int i, first = -1;
 
     if (!ics) {
@@ -440,20 +365,18 @@ static void ics_free(ICSState *ics, int srcno, int num)
     }
 }
 
-void xics_spapr_free(XICSState *xics, int irq, int num)
+void spapr_ics_free(ICSState *ics, int irq, int num)
 {
-    ICSState *ics = xics_find_source(xics, irq);
-
-    if (ics) {
+    if (ics_valid_irq(ics, irq)) {
         trace_xics_ics_free(0, irq, num);
         ics_free(ics, irq - ics->offset, num);
     }
 }
 
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
 {
     uint32_t interrupt_server_ranges_prop[] = {
-        0, cpu_to_be32(xics->nr_servers),
+        0, cpu_to_be32(nr_servers),
     };
     int node;
 
@@ -470,10 +393,3 @@ void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
     _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
     _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
 }
-
-static void xics_spapr_register_types(void)
-{
-    type_register_static(&xics_spapr_info);
-}
-
-type_init(xics_spapr_register_types)
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 898e4ccfb1..c8b489390f 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -26,7 +26,7 @@ obj-$(CONFIG_IVSHMEM) += ivshmem.o
 obj-$(CONFIG_REALVIEW) += arm_sysctl.o
 obj-$(CONFIG_NSERIES) += cbus.o
 obj-$(CONFIG_ECCMEMCTL) += eccmemctl.o
-obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o
+obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o exynos4210_clk.o
 obj-$(CONFIG_IMX) += imx_ccm.o
 obj-$(CONFIG_IMX) += imx31_ccm.o
 obj-$(CONFIG_IMX) += imx25_ccm.o
@@ -42,6 +42,7 @@ obj-$(CONFIG_OMAP) += omap_sdrc.o
 obj-$(CONFIG_OMAP) += omap_tap.o
 obj-$(CONFIG_RASPI) += bcm2835_mbox.o
 obj-$(CONFIG_RASPI) += bcm2835_property.o
+obj-$(CONFIG_RASPI) += bcm2835_rng.o
 obj-$(CONFIG_SLAVIO) += slavio_misc.o
 obj-$(CONFIG_ZYNQ) += zynq_slcr.o
 obj-$(CONFIG_ZYNQ) += zynq-xadc.o
diff --git a/hw/misc/bcm2835_rng.c b/hw/misc/bcm2835_rng.c
new file mode 100644
index 0000000000..4d62143b24
--- /dev/null
+++ b/hw/misc/bcm2835_rng.c
@@ -0,0 +1,149 @@
+/*
+ * BCM2835 Random Number Generator emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "crypto/random.h"
+#include "hw/misc/bcm2835_rng.h"
+
+static uint32_t get_random_bytes(void)
+{
+    uint32_t res;
+    Error *err = NULL;
+
+    if (qcrypto_random_bytes((uint8_t *)&res, sizeof(res), &err) < 0) {
+        /* On failure we don't want to return the guest a non-random
+         * value in case they're really using it for cryptographic
+         * purposes, so the best we can do is die here.
+         * This shouldn't happen unless something's broken.
+         * In theory we could implement this device's full FIFO
+         * and interrupt semantics and then just stop filling the
+         * FIFO. That's a lot of work, though, so we assume any
+         * errors are systematic problems and trust that if we didn't
+         * fail as the guest inited then we won't fail later on
+         * mid-run.
+         */
+        error_report_err(err);
+        exit(1);
+    }
+    return res;
+}
+
+static uint64_t bcm2835_rng_read(void *opaque, hwaddr offset,
+                                 unsigned size)
+{
+    BCM2835RngState *s = (BCM2835RngState *)opaque;
+    uint32_t res = 0;
+
+    assert(size == 4);
+
+    switch (offset) {
+    case 0x0:    /* rng_ctrl */
+        res = s->rng_ctrl;
+        break;
+    case 0x4:    /* rng_status */
+        res = s->rng_status | (1 << 24);
+        break;
+    case 0x8:    /* rng_data */
+        res = get_random_bytes();
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "bcm2835_rng_read: Bad offset %x\n",
+                      (int)offset);
+        res = 0;
+        break;
+    }
+
+    return res;
+}
+
+static void bcm2835_rng_write(void *opaque, hwaddr offset,
+                              uint64_t value, unsigned size)
+{
+    BCM2835RngState *s = (BCM2835RngState *)opaque;
+
+    assert(size == 4);
+
+    switch (offset) {
+    case 0x0:    /* rng_ctrl */
+        s->rng_ctrl = value;
+        break;
+    case 0x4:    /* rng_status */
+        /* we shouldn't let the guest write to bits [31..20] */
+        s->rng_status &= ~0xFFFFF;        /* clear 20 lower bits */
+        s->rng_status |= value & 0xFFFFF; /* set them to new value */
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "bcm2835_rng_write: Bad offset %x\n",
+                      (int)offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps bcm2835_rng_ops = {
+    .read = bcm2835_rng_read,
+    .write = bcm2835_rng_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_rng = {
+    .name = TYPE_BCM2835_RNG,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(rng_ctrl, BCM2835RngState),
+        VMSTATE_UINT32(rng_status, BCM2835RngState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_rng_init(Object *obj)
+{
+    BCM2835RngState *s = BCM2835_RNG(obj);
+
+    memory_region_init_io(&s->iomem, obj, &bcm2835_rng_ops, s,
+                          TYPE_BCM2835_RNG, 0x10);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+}
+
+static void bcm2835_rng_reset(DeviceState *dev)
+{
+    BCM2835RngState *s = BCM2835_RNG(dev);
+
+    s->rng_ctrl = 0;
+    s->rng_status = 0;
+}
+
+static void bcm2835_rng_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = bcm2835_rng_reset;
+    dc->vmsd = &vmstate_bcm2835_rng;
+}
+
+static TypeInfo bcm2835_rng_info = {
+    .name          = TYPE_BCM2835_RNG,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835RngState),
+    .class_init    = bcm2835_rng_class_init,
+    .instance_init = bcm2835_rng_init,
+};
+
+static void bcm2835_rng_register_types(void)
+{
+    type_register_static(&bcm2835_rng_info);
+}
+
+type_init(bcm2835_rng_register_types)
diff --git a/hw/misc/exynos4210_clk.c b/hw/misc/exynos4210_clk.c
new file mode 100644
index 0000000000..81862c0ada
--- /dev/null
+++ b/hw/misc/exynos4210_clk.c
@@ -0,0 +1,164 @@
+/*
+ *  Exynos4210 Clock Controller Emulation
+ *
+ *  Copyright (c) 2017 Krzysztof Kozlowski <krzk@kernel.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "qemu/log.h"
+
+#define TYPE_EXYNOS4210_CLK             "exynos4210.clk"
+#define EXYNOS4210_CLK(obj) \
+    OBJECT_CHECK(Exynos4210ClkState, (obj), TYPE_EXYNOS4210_CLK)
+
+#define CLK_PLL_LOCKED                  BIT(29)
+
+#define EXYNOS4210_CLK_REGS_MEM_SIZE    0x15104
+
+typedef struct Exynos4210Reg {
+    const char   *name; /* for debug only */
+    uint32_t     offset;
+    uint32_t     reset_value;
+} Exynos4210Reg;
+
+/* Clock controller register base: 0x10030000 */
+static const Exynos4210Reg exynos4210_clk_regs[] = {
+    {"EPLL_LOCK",                     0xc010, 0x00000fff},
+    {"VPLL_LOCK",                     0xc020, 0x00000fff},
+    {"EPLL_CON0",                     0xc110, 0x00300301 | CLK_PLL_LOCKED},
+    {"EPLL_CON1",                     0xc114, 0x00000000},
+    {"VPLL_CON0",                     0xc120, 0x00240201 | CLK_PLL_LOCKED},
+    {"VPLL_CON1",                     0xc124, 0x66010464},
+    {"APLL_LOCK",                    0x14000, 0x00000fff},
+    {"MPLL_LOCK",                    0x14004, 0x00000fff},
+    {"APLL_CON0",                    0x14100, 0x00c80601 | CLK_PLL_LOCKED},
+    {"APLL_CON1",                    0x14104, 0x0000001c},
+    {"MPLL_CON0",                    0x14108, 0x00c80601 | CLK_PLL_LOCKED},
+    {"MPLL_CON1",                    0x1410c, 0x0000001c},
+};
+
+#define EXYNOS4210_REGS_NUM       ARRAY_SIZE(exynos4210_clk_regs)
+
+typedef struct Exynos4210ClkState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t reg[EXYNOS4210_REGS_NUM];
+} Exynos4210ClkState;
+
+static uint64_t exynos4210_clk_read(void *opaque, hwaddr offset,
+                                    unsigned size)
+{
+    const Exynos4210ClkState *s = (Exynos4210ClkState *)opaque;
+    const Exynos4210Reg *regs = exynos4210_clk_regs;
+    unsigned int i;
+
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        if (regs->offset == offset) {
+            return s->reg[i];
+        }
+        regs++;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: bad read offset 0x%04x\n",
+                  __func__, (uint32_t)offset);
+    return 0;
+}
+
+static void exynos4210_clk_write(void *opaque, hwaddr offset,
+                                 uint64_t val, unsigned size)
+{
+    Exynos4210ClkState *s = (Exynos4210ClkState *)opaque;
+    const Exynos4210Reg *regs = exynos4210_clk_regs;
+    unsigned int i;
+
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        if (regs->offset == offset) {
+            s->reg[i] = val;
+            return;
+        }
+        regs++;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write offset 0x%04x\n",
+                  __func__, (uint32_t)offset);
+}
+
+static const MemoryRegionOps exynos4210_clk_ops = {
+    .read = exynos4210_clk_read,
+    .write = exynos4210_clk_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false
+    }
+};
+
+static void exynos4210_clk_reset(DeviceState *dev)
+{
+    Exynos4210ClkState *s = EXYNOS4210_CLK(dev);
+    unsigned int i;
+
+    /* Set default values for registers */
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        s->reg[i] = exynos4210_clk_regs[i].reset_value;
+    }
+}
+
+static void exynos4210_clk_init(Object *obj)
+{
+    Exynos4210ClkState *s = EXYNOS4210_CLK(obj);
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+
+    /* memory mapping */
+    memory_region_init_io(&s->iomem, obj, &exynos4210_clk_ops, s,
+                          TYPE_EXYNOS4210_CLK, EXYNOS4210_CLK_REGS_MEM_SIZE);
+    sysbus_init_mmio(dev, &s->iomem);
+}
+
+static const VMStateDescription exynos4210_clk_vmstate = {
+    .name = TYPE_EXYNOS4210_CLK,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(reg, Exynos4210ClkState, EXYNOS4210_REGS_NUM),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void exynos4210_clk_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = exynos4210_clk_reset;
+    dc->vmsd = &exynos4210_clk_vmstate;
+}
+
+static const TypeInfo exynos4210_clk_info = {
+    .name          = TYPE_EXYNOS4210_CLK,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(Exynos4210ClkState),
+    .instance_init = exynos4210_clk_init,
+    .class_init    = exynos4210_clk_class_init,
+};
+
+static void exynos4210_clk_register(void)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "Clock init\n");
+    type_register_static(&exynos4210_clk_info);
+}
+
+type_init(exynos4210_clk_register)
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index e99d4544a2..d4de8ad9f1 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -508,7 +508,7 @@ static void gem_update_int_status(CadenceGEMState *s)
 
     if ((s->num_priority_queues == 1) && s->regs[GEM_ISR]) {
         /* No priority queues, just trigger the interrupt */
-        DB_PRINT("asserting int.\n", i);
+        DB_PRINT("asserting int.\n");
         qemu_set_irq(s->irq[0], 1);
         return;
     }
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 65ba188555..aa5d2c1f5f 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
 {
     sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev);
+    int ret;
 
     if (nvram->blk) {
         nvram->size = blk_getlength(nvram->blk);
+
+        ret = blk_set_perm(nvram->blk,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
     } else {
         nvram->size = DEFAULT_NVRAM_SIZE;
     }
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a563555e7d..273f1e4602 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1530,6 +1530,34 @@ static const pci_class_desc pci_class_descriptions[] =
     { 0, NULL}
 };
 
+static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+                                                  void (*fn)(PCIBus *b,
+                                                             PCIDevice *d,
+                                                             void *opaque),
+                                                  void *opaque)
+{
+    PCIDevice *d;
+    int devfn;
+
+    for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+        d = bus->devices[ARRAY_SIZE(bus->devices) - 1 - devfn];
+        if (d) {
+            fn(bus, d, opaque);
+        }
+    }
+}
+
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+                         void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
+                         void *opaque)
+{
+    bus = pci_find_bus_nr(bus, bus_num);
+
+    if (bus) {
+        pci_for_each_device_under_bus_reverse(bus, fn, opaque);
+    }
+}
+
 static void pci_for_each_device_under_bus(PCIBus *bus,
                                           void (*fn)(PCIBus *b, PCIDevice *d,
                                                      void *opaque),
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 87d8366c44..81c6c1c27c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -63,6 +63,7 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "hw/nmi.h"
+#include "hw/intc/intc.h"
 
 #include "hw/compat.h"
 #include "qemu/cutils.h"
@@ -95,37 +96,68 @@
 
 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
 
-static XICSState *try_create_xics(const char *type, int nr_servers,
-                                  int nr_irqs, Error **errp)
+static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
+                           const char *type_icp, int nr_servers,
+                           int nr_irqs, Error **errp)
 {
-    Error *err = NULL;
-    DeviceState *dev;
+    XICSFabric *xi = XICS_FABRIC(spapr);
+    Error *err = NULL, *local_err = NULL;
+    ICSState *ics = NULL;
+    int i;
 
-    dev = qdev_create(NULL, type);
-    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
-    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
-    object_property_set_bool(OBJECT(dev), true, "realized", &err);
+    ics = ICS_SIMPLE(object_new(type_ics));
+    qdev_set_parent_bus(DEVICE(ics), sysbus_get_default());
+    object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL);
+    object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err);
+    object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL);
+    object_property_set_bool(OBJECT(ics), true, "realized", &local_err);
+    error_propagate(&err, local_err);
     if (err) {
-        error_propagate(errp, err);
-        object_unparent(OBJECT(dev));
-        return NULL;
+        goto error;
     }
-    return XICS_COMMON(dev);
+
+    spapr->icps = g_malloc0(nr_servers * sizeof(ICPState));
+    spapr->nr_servers = nr_servers;
+
+    for (i = 0; i < nr_servers; i++) {
+        ICPState *icp = &spapr->icps[i];
+
+        object_initialize(icp, sizeof(*icp), type_icp);
+        qdev_set_parent_bus(DEVICE(icp), sysbus_get_default());
+        object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL);
+        object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL);
+        object_property_set_bool(OBJECT(icp), true, "realized", &err);
+        if (err) {
+            goto error;
+        }
+        object_unref(OBJECT(icp));
+    }
+
+    spapr->ics = ics;
+    return 0;
+
+error:
+    error_propagate(errp, err);
+    if (ics) {
+        object_unparent(OBJECT(ics));
+    }
+    return -1;
 }
 
-static XICSState *xics_system_init(MachineState *machine,
-                                   int nr_servers, int nr_irqs, Error **errp)
+static int xics_system_init(MachineState *machine,
+                            int nr_servers, int nr_irqs, Error **errp)
 {
-    XICSState *xics = NULL;
+    int rc = -1;
 
     if (kvm_enabled()) {
         Error *err = NULL;
 
-        if (machine_kernel_irqchip_allowed(machine)) {
-            xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs,
-                                   &err);
+        if (machine_kernel_irqchip_allowed(machine) &&
+            !xics_kvm_init(SPAPR_MACHINE(machine), errp)) {
+            rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM,
+                                 TYPE_KVM_ICP, nr_servers, nr_irqs, &err);
         }
-        if (machine_kernel_irqchip_required(machine) && !xics) {
+        if (machine_kernel_irqchip_required(machine) && rc < 0) {
             error_reportf_err(err,
                               "kernel_irqchip requested but unavailable: ");
         } else {
@@ -133,11 +165,13 @@ static XICSState *xics_system_init(MachineState *machine,
         }
     }
 
-    if (!xics) {
-        xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp);
+    if (rc < 0) {
+        xics_spapr_init(SPAPR_MACHINE(machine), errp);
+        rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE,
+                               TYPE_ICP, nr_servers, nr_irqs, errp);
     }
 
-    return xics;
+    return rc;
 }
 
 static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -924,7 +958,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
     _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
 
     /* /interrupt controller */
-    spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP);
+    spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP);
 
     ret = spapr_populate_memory(spapr, fdt);
     if (ret < 0) {
@@ -1053,6 +1087,62 @@ static void close_htab_fd(sPAPRMachineState *spapr)
     spapr->htab_fd = -1;
 }
 
+static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
+}
+
+static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
+                                                hwaddr ptex, int n)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+    hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+
+    if (!spapr->htab) {
+        /*
+         * HTAB is controlled by KVM. Fetch into temporary buffer
+         */
+        ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
+        kvmppc_read_hptes(hptes, ptex, n);
+        return hptes;
+    }
+
+    /*
+     * HTAB is controlled by QEMU. Just point to the internally
+     * accessible PTEG.
+     */
+    return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
+}
+
+static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
+                              const ppc_hash_pte64_t *hptes,
+                              hwaddr ptex, int n)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    if (!spapr->htab) {
+        g_free((void *)hptes);
+    }
+
+    /* Nothing to do for qemu managed HPT */
+}
+
+static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                             uint64_t pte0, uint64_t pte1)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+    hwaddr offset = ptex * HASH_PTE_SIZE_64;
+
+    if (!spapr->htab) {
+        kvmppc_write_hpte(ptex, pte0, pte1);
+    } else {
+        stq_p(spapr->htab + offset, pte0);
+        stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+    }
+}
+
 static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
 {
     int shift;
@@ -1252,6 +1342,13 @@ static int spapr_post_load(void *opaque, int version_id)
     sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
     int err = 0;
 
+    if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
+        int i;
+        for (i = 0; i < spapr->nr_servers; i++) {
+            icp_resend(&spapr->icps[i]);
+        }
+    }
+
     /* In earlier versions, there was no separate qdev for the PAPR
      * RTC, so the RTC offset was stored directly in sPAPREnvironment.
      * So when migrating from those versions, poke the incoming offset
@@ -1902,9 +1999,8 @@ static void ppc_spapr_init(MachineState *machine)
     load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
 
     /* Set up Interrupt Controller before we create the VCPUs */
-    spapr->xics = xics_system_init(machine,
-                                   DIV_ROUND_UP(max_cpus * smt, smp_threads),
-                                   XICS_IRQS_SPAPR, &error_fatal);
+    xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads),
+                     XICS_IRQS_SPAPR, &error_fatal);
 
     /* Set up containers for ibm,client-set-architecture negotiated options */
     spapr->ov5 = spapr_ovec_new();
@@ -2872,6 +2968,40 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
     *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
 }
 
+static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+    return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
+}
+
+static void spapr_ics_resend(XICSFabric *dev)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+    ics_resend(spapr->ics);
+}
+
+static ICPState *spapr_icp_get(XICSFabric *xi, int server)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+
+    return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL;
+}
+
+static void spapr_pic_print_info(InterruptStatsProvider *obj,
+                                 Monitor *mon)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+    int i;
+
+    for (i = 0; i < spapr->nr_servers; i++) {
+        icp_pic_print_info(&spapr->icps[i], mon);
+    }
+
+    ics_pic_print_info(spapr->ics, mon);
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2880,6 +3010,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     NMIClass *nc = NMI_CLASS(oc);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
     PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+    XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+    InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
 
     mc->desc = "pSeries Logical Partition (PAPR compliant)";
 
@@ -2891,7 +3023,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->init = ppc_spapr_init;
     mc->reset = ppc_spapr_reset;
     mc->block_default_type = IF_SCSI;
-    mc->max_cpus = 255;
+    mc->max_cpus = 1024;
     mc->no_parallel = 1;
     mc->default_boot_order = "";
     mc->default_ram_size = 512 * M_BYTE;
@@ -2913,6 +3045,14 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     nc->nmi_monitor_handler = spapr_nmi;
     smc->phb_placement = spapr_phb_placement;
     vhc->hypercall = emulate_spapr_hypercall;
+    vhc->hpt_mask = spapr_hpt_mask;
+    vhc->map_hptes = spapr_map_hptes;
+    vhc->unmap_hptes = spapr_unmap_hptes;
+    vhc->store_hpte = spapr_store_hpte;
+    xic->ics_get = spapr_ics_get;
+    xic->ics_resend = spapr_ics_resend;
+    xic->icp_get = spapr_icp_get;
+    ispc->print_info = spapr_pic_print_info;
 }
 
 static const TypeInfo spapr_machine_info = {
@@ -2929,6 +3069,8 @@ static const TypeInfo spapr_machine_info = {
         { TYPE_NMI },
         { TYPE_HOTPLUG_HANDLER },
         { TYPE_PPC_VIRTUAL_HYPERVISOR },
+        { TYPE_XICS_FABRIC },
+        { TYPE_INTERRUPT_STATS_PROVIDER },
         { }
     },
 };
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 55cd0456eb..90d682fe33 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -13,10 +13,12 @@
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
 #include "target/ppc/kvm_ppc.h"
 #include "hw/ppc/ppc.h"
 #include "target/ppc/mmu-hash64.h"
 #include "sysemu/numa.h"
+#include "qemu/error-report.h"
 
 static void spapr_cpu_reset(void *opaque)
 {
@@ -34,15 +36,26 @@ static void spapr_cpu_reset(void *opaque)
 
     env->spr[SPR_HIOR] = 0;
 
-    ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
-                                &error_fatal);
+    /*
+     * This is a hack for the benefit of KVM PR - it abuses the SDR1
+     * slot in kvm_sregs to communicate the userspace address of the
+     * HPT
+     */
+    if (kvm_enabled()) {
+        env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab
+            | (spapr->htab_shift - 18);
+        if (kvmppc_put_books_sregs(cpu) < 0) {
+            error_report("Unable to update SDR1 in KVM");
+            exit(1);
+        }
+    }
 }
 
 static void spapr_cpu_destroy(PowerPCCPU *cpu)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    xics_cpu_destroy(spapr->xics, cpu);
+    xics_cpu_destroy(XICS_FABRIC(spapr), cpu);
     qemu_unregister_reset(spapr_cpu_reset, cpu);
 }
 
@@ -57,8 +70,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
     cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
 
     /* Enable PAPR mode in TCG or KVM */
-    cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
-    cpu_ppc_set_papr(cpu);
+    cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
 
     if (cpu->max_compat) {
         Error *local_err = NULL;
@@ -76,7 +88,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
             cs->numa_node = i;
     }
 
-    xics_cpu_setup(spapr->xics, cpu);
+    xics_cpu_setup(XICS_FABRIC(spapr), cpu);
 
     qemu_register_reset(spapr_cpu_reset, cpu);
     spapr_cpu_reset(cpu);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index f85a9c32a7..24a5758e62 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -481,7 +481,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
 
     rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
                                                        RTAS_LOG_TYPE_EPOW)));
 }
@@ -574,7 +574,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
 
     rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
                                                        RTAS_LOG_TYPE_HOTPLUG)));
 }
@@ -695,7 +695,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                 spapr_event_sources_get_source(spapr->event_sources, i);
 
             g_assert(source->enabled);
-            qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq));
+            qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), source->irq));
         }
     }
 
@@ -752,7 +752,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
     spapr->event_sources = spapr_event_sources_new();
 
     spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
-                                 xics_spapr_alloc(spapr->xics, 0, false,
+                                 spapr_ics_alloc(spapr->ics, 0, false,
                                                   &error_fatal));
 
     /* NOTE: if machine supports modern/dedicated hotplug event source,
@@ -765,7 +765,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
      */
     if (spapr->use_hotplug_event_source) {
         spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
-                                     xics_spapr_alloc(spapr->xics, 0, false,
+                                     spapr_ics_alloc(spapr->ics, 0, false,
                                                       &error_fatal));
     }
 
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 42d20e0b92..f05a90ed2c 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -47,12 +47,12 @@ static bool has_spr(PowerPCCPU *cpu, int spr)
     return cpu->env.spr_cb[spr].name != NULL;
 }
 
-static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index)
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
 {
     /*
-     * hash value/pteg group index is normalized by htab_mask
+     * hash value/pteg group index is normalized by HPT mask
      */
-    if (((pte_index & ~7ULL) / HPTES_PER_GROUP) & ~env->htab_mask) {
+    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
         return false;
     }
     return true;
@@ -77,15 +77,14 @@ static bool is_ram_address(sPAPRMachineState *spapr, hwaddr addr)
 static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
-    CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong pteh = args[2];
     target_ulong ptel = args[3];
     unsigned apshift;
     target_ulong raddr;
-    target_ulong index;
-    uint64_t token;
+    target_ulong slot;
+    const ppc_hash_pte64_t *hptes;
 
     apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
     if (!apshift) {
@@ -116,36 +115,36 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     pteh &= ~0x60ULL;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
-    index = 0;
+    slot = ptex & 7ULL;
+    ptex = ptex & ~7ULL;
+
     if (likely((flags & H_EXACT) == 0)) {
-        pte_index &= ~7ULL;
-        token = ppc_hash64_start_access(cpu, pte_index);
-        for (; index < 8; index++) {
-            if (!(ppc_hash64_load_hpte0(cpu, token, index) & HPTE64_V_VALID)) {
+        hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+        for (slot = 0; slot < 8; slot++) {
+            if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
                 break;
             }
         }
-        ppc_hash64_stop_access(cpu, token);
-        if (index == 8) {
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+        if (slot == 8) {
             return H_PTEG_FULL;
         }
     } else {
-        token = ppc_hash64_start_access(cpu, pte_index);
-        if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
-            ppc_hash64_stop_access(cpu, token);
+        hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+        if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+            ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
             return H_PTEG_FULL;
         }
-        ppc_hash64_stop_access(cpu, token);
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
     }
 
-    ppc_hash64_store_hpte(cpu, pte_index + index,
-                          pteh | HPTE64_V_HPTE_DIRTY, ptel);
+    ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
 
-    args[0] = pte_index + index;
+    args[0] = ptex + slot;
     return H_SUCCESS;
 }
 
@@ -161,18 +160,17 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
                                 target_ulong flags,
                                 target_ulong *vp, target_ulong *rp)
 {
-    CPUPPCState *env = &cpu->env;
-    uint64_t token;
+    const ppc_hash_pte64_t *hptes;
     target_ulong v, r;
 
-    if (!valid_pte_index(env, ptex)) {
+    if (!valid_ptex(cpu, ptex)) {
         return REMOVE_PARM;
     }
 
-    token = ppc_hash64_start_access(cpu, ptex);
-    v = ppc_hash64_load_hpte0(cpu, token, 0);
-    r = ppc_hash64_load_hpte1(cpu, token, 0);
-    ppc_hash64_stop_access(cpu, token);
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -191,11 +189,11 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong avpn = args[2];
     RemoveResult ret;
 
-    ret = remove_hpte(cpu, pte_index, avpn, flags,
+    ret = remove_hpte(cpu, ptex, avpn, flags,
                       &args[0], &args[1]);
 
     switch (ret) {
@@ -291,19 +289,19 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong avpn = args[2];
-    uint64_t token;
+    const ppc_hash_pte64_t *hptes;
     target_ulong v, r;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
-    token = ppc_hash64_start_access(cpu, pte_index);
-    v = ppc_hash64_load_hpte0(cpu, token, 0);
-    r = ppc_hash64_load_hpte1(cpu, token, 0);
-    ppc_hash64_stop_access(cpu, token);
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
@@ -315,36 +313,35 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     r |= (flags << 55) & HPTE64_R_PP0;
     r |= (flags << 48) & HPTE64_R_KEY_HI;
     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
-    ppc_hash64_store_hpte(cpu, pte_index,
+    ppc_hash64_store_hpte(cpu, ptex,
                           (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
-    ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
     /* Flush the tlb */
     check_tlb_flush(env, true);
     /* Don't need a memory barrier, due to qemu's global lock */
-    ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
+    ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
     return H_SUCCESS;
 }
 
 static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
-    CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     uint8_t *hpte;
     int i, ridx, n_entries = 1;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
     if (flags & H_READ_4) {
         /* Clear the two low order bits */
-        pte_index &= ~(3ULL);
+        ptex &= ~(3ULL);
         n_entries = 4;
     }
 
-    hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
+    hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64);
 
     for (i = 0, ridx = 0; i < n_entries; i++) {
         args[ridx++] = ldq_p(hpte);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index fd6fc1d953..2a3499eaf8 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -43,6 +43,7 @@
 
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_ids.h"
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/kvm.h"
@@ -325,7 +326,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             return;
         }
 
-        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+        spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
         if (msi_present(pdev)) {
             spapr_msi_setmsg(pdev, 0, false, 0, 0);
         }
@@ -363,7 +364,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
+    irq = spapr_ics_alloc_block(spapr->ics, req_num, false,
                            ret_intr_type == RTAS_TYPE_MSI, &err);
     if (err) {
         error_reportf_err(err, "Can't allocate MSIs for device %x: ",
@@ -374,7 +375,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     /* Release previous MSIs */
     if (msi) {
-        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+        spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
         g_hash_table_remove(phb->msi, &config_addr);
     }
 
@@ -736,7 +737,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
 
     trace_spapr_pci_msi_write(addr, data, irq);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, irq));
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), irq));
 }
 
 static const MemoryRegionOps spapr_msi_ops = {
@@ -946,6 +947,274 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
     rp->assigned_len = assigned_idx * sizeof(ResourceFields);
 }
 
+typedef struct PCIClass PCIClass;
+typedef struct PCISubClass PCISubClass;
+typedef struct PCIIFace PCIIFace;
+
+struct PCIIFace {
+    int iface;
+    const char *name;
+};
+
+struct PCISubClass {
+    int subclass;
+    const char *name;
+    const PCIIFace *iface;
+};
+
+struct PCIClass {
+    const char *name;
+    const PCISubClass *subc;
+};
+
+static const PCISubClass undef_subclass[] = {
+    { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mass_subclass[] = {
+    { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
+    { PCI_CLASS_STORAGE_IDE, "ide", NULL },
+    { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
+    { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
+    { PCI_CLASS_STORAGE_RAID, "raid", NULL },
+    { PCI_CLASS_STORAGE_ATA, "ata", NULL },
+    { PCI_CLASS_STORAGE_SATA, "sata", NULL },
+    { PCI_CLASS_STORAGE_SAS, "sas", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass net_subclass[] = {
+    { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
+    { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
+    { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
+    { PCI_CLASS_NETWORK_ATM, "atm", NULL },
+    { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
+    { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
+    { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass displ_subclass[] = {
+    { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
+    { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
+    { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass media_subclass[] = {
+    { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
+    { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
+    { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mem_subclass[] = {
+    { PCI_CLASS_MEMORY_RAM, "memory", NULL },
+    { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass bridg_subclass[] = {
+    { PCI_CLASS_BRIDGE_HOST, "host", NULL },
+    { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
+    { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
+    { PCI_CLASS_BRIDGE_MC, "mca", NULL },
+    { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
+    { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
+    { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
+    { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
+    { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
+    { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
+    { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass comm_subclass[] = {
+    { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
+    { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
+    { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
+    { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
+    { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
+    { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
+    { 0xFF, NULL, NULL, },
+};
+
+static const PCIIFace pic_iface[] = {
+    { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
+    { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
+    { 0xFF, NULL },
+};
+
+static const PCISubClass sys_subclass[] = {
+    { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
+    { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
+    { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
+    { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
+    { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
+    { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass inp_subclass[] = {
+    { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
+    { PCI_CLASS_INPUT_PEN, "pen", NULL },
+    { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
+    { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
+    { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass dock_subclass[] = {
+    { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass cpu_subclass[] = {
+    { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
+    { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
+    { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
+    { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCIIFace usb_iface[] = {
+    { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
+    { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
+    { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
+    { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
+    { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
+    { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
+    { 0xFF, NULL },
+};
+
+static const PCISubClass ser_subclass[] = {
+    { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
+    { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
+    { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
+    { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
+    { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
+    { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
+    { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
+    { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
+    { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
+    { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass wrl_subclass[] = {
+    { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
+    { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
+    { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
+    { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
+    { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass sat_subclass[] = {
+    { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
+    { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
+    { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
+    { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass crypt_subclass[] = {
+    { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
+    { PCI_CLASS_CRYPT_ENTERTAINMENT,
+      "entertainment-encryption", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass spc_subclass[] = {
+    { PCI_CLASS_SP_DPIO, "dpio", NULL },
+    { PCI_CLASS_SP_PERF, "counter", NULL },
+    { PCI_CLASS_SP_SYNCH, "measurement", NULL },
+    { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCIClass pci_classes[] = {
+    { "legacy-device", undef_subclass },
+    { "mass-storage",  mass_subclass },
+    { "network", net_subclass },
+    { "display", displ_subclass, },
+    { "multimedia-device", media_subclass },
+    { "memory-controller", mem_subclass },
+    { "unknown-bridge", bridg_subclass },
+    { "communication-controller", comm_subclass},
+    { "system-peripheral", sys_subclass },
+    { "input-controller", inp_subclass },
+    { "docking-station", dock_subclass },
+    { "cpu", cpu_subclass },
+    { "serial-bus", ser_subclass },
+    { "wireless-controller", wrl_subclass },
+    { "intelligent-io", NULL },
+    { "satellite-device", sat_subclass },
+    { "encryption", crypt_subclass },
+    { "data-processing-controller", spc_subclass },
+};
+
+static const char *pci_find_device_name(uint8_t class, uint8_t subclass,
+                                        uint8_t iface)
+{
+    const PCIClass *pclass;
+    const PCISubClass *psubclass;
+    const PCIIFace *piface;
+    const char *name;
+
+    if (class >= ARRAY_SIZE(pci_classes)) {
+        return "pci";
+    }
+
+    pclass = pci_classes + class;
+    name = pclass->name;
+
+    if (pclass->subc == NULL) {
+        return name;
+    }
+
+    psubclass = pclass->subc;
+    while ((psubclass->subclass & 0xff) != 0xff) {
+        if ((psubclass->subclass & 0xff) == subclass) {
+            name = psubclass->name;
+            break;
+        }
+        psubclass++;
+    }
+
+    piface = psubclass->iface;
+    if (piface == NULL) {
+        return name;
+    }
+    while ((piface->iface & 0xff) != 0xff) {
+        if ((piface->iface & 0xff) == iface) {
+            name = piface->name;
+            break;
+        }
+        piface++;
+    }
+
+    return name;
+}
+
+static void pci_get_node_name(char *nodename, int len, PCIDevice *dev)
+{
+    int slot = PCI_SLOT(dev->devfn);
+    int func = PCI_FUNC(dev->devfn);
+    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+    const char *name;
+
+    name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
+                                ccode & 0xff);
+
+    if (func != 0) {
+        snprintf(nodename, len, "%s@%x,%x", name, slot, func);
+    } else {
+        snprintf(nodename, len, "%s@%x", name, slot);
+    }
+}
+
 static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
                                             PCIDevice *pdev);
 
@@ -957,6 +1226,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
     int pci_status, err;
     char *buf = NULL;
     uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
+    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
     uint32_t max_msi, max_msix;
 
     if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
@@ -971,8 +1241,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
                           pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
     _FDT(fdt_setprop_cell(fdt, offset, "revision-id",
                           pci_default_read_config(dev, PCI_REVISION_ID, 1)));
-    _FDT(fdt_setprop_cell(fdt, offset, "class-code",
-                          pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
+    _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
     if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
         _FDT(fdt_setprop_cell(fdt, offset, "interrupts",
                  pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
@@ -1013,11 +1282,10 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
         _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
     }
 
-    /* NOTE: this is normally generated by firmware via path/unit name,
-     * but in our case we must set it manually since it does not get
-     * processed by OF beforehand
-     */
-    _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
+    _FDT(fdt_setprop_string(fdt, offset, "name",
+                            pci_find_device_name((ccode >> 16) & 0xff,
+                                                 (ccode >> 8) & 0xff,
+                                                 ccode & 0xff)));
     buf = spapr_phb_get_loc_code(sphb, dev);
     if (!buf) {
         error_report("Failed setting the ibm,loc-code");
@@ -1061,15 +1329,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
                                      void *fdt, int node_offset)
 {
     int offset, ret;
-    int slot = PCI_SLOT(dev->devfn);
-    int func = PCI_FUNC(dev->devfn);
     char nodename[FDT_NAME_MAX];
 
-    if (func != 0) {
-        snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
-    } else {
-        snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
-    }
+    pci_get_node_name(nodename, FDT_NAME_MAX, dev);
     offset = fdt_add_subnode(fdt, node_offset, nodename);
     ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
 
@@ -1485,7 +1747,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
         uint32_t irq;
         Error *local_err = NULL;
 
-        irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err);
+        irq = spapr_ics_alloc_block(spapr->ics, 1, true, false, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             error_prepend(errp, "can't allocate LSIs: ");
@@ -1782,9 +2044,9 @@ static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
     s_fdt.fdt = p->fdt;
     s_fdt.node_off = offset;
     s_fdt.sphb = p->sphb;
-    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                        spapr_populate_pci_devices_dt,
-                        &s_fdt);
+    pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus),
+                                spapr_populate_pci_devices_dt,
+                                &s_fdt);
 }
 
 static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
@@ -1953,9 +2215,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     s_fdt.fdt = fdt;
     s_fdt.node_off = bus_off;
     s_fdt.sphb = phb;
-    pci_for_each_device(bus, pci_bus_num(bus),
-                        spapr_populate_pci_devices_dt,
-                        &s_fdt);
+    pci_for_each_device_reverse(bus, pci_bus_num(bus),
+                                spapr_populate_pci_devices_dt,
+                                &s_fdt);
 
     ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
                                 SPAPR_DR_CONNECTOR_TYPE_PCI);
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 8bfc5f971f..a0ee4fd265 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -454,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
         dev->qdev.id = id;
     }
 
-    dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err);
+    dev->irq = spapr_ics_alloc(spapr->ics, dev->irq, false, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 2e2664f22e..7978c7d52a 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -20,6 +20,7 @@
 #include "hw/s390x/virtio-ccw.h"
 #include "hw/s390x/css.h"
 #include "ipl.h"
+#include "qemu/error-report.h"
 
 #define KERN_IMAGE_START                0x010000UL
 #define KERN_PARM_AREA                  0x010480UL
@@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = {
     DEFINE_PROP_STRING("initrd", S390IPLState, initrd),
     DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline),
     DEFINE_PROP_STRING("firmware", S390IPLState, firmware),
+    DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw),
     DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false),
     DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration,
                      true),
@@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
                 TYPE_VIRTIO_CCW_DEVICE);
         SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st),
                                                             TYPE_SCSI_DEVICE);
+        VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st),
+                                                          TYPE_VIRTIO_NET);
+
+        if (vn) {
+            ipl->netboot = true;
+        }
         if (virtio_ccw_dev) {
             CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev);
 
@@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
     return false;
 }
 
+static int load_netboot_image(Error **errp)
+{
+    S390IPLState *ipl = get_ipl_device();
+    char *netboot_filename;
+    MemoryRegion *sysmem =  get_system_memory();
+    MemoryRegion *mr = NULL;
+    void *ram_ptr = NULL;
+    int img_size = -1;
+
+    mr = memory_region_find(sysmem, 0, 1).mr;
+    if (!mr) {
+        error_setg(errp, "Failed to find memory region at address 0");
+        return -1;
+    }
+
+    ram_ptr = memory_region_get_ram_ptr(mr);
+    if (!ram_ptr) {
+        error_setg(errp, "No RAM found");
+        goto unref_mr;
+    }
+
+    netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw);
+    if (netboot_filename == NULL) {
+        error_setg(errp, "Could not find network bootloader");
+        goto unref_mr;
+    }
+
+    img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr,
+                            NULL, NULL, 1, EM_S390, 0, 0, NULL, false);
+
+    if (img_size < 0) {
+        img_size = load_image_size(netboot_filename, ram_ptr, ram_size);
+        ipl->start_addr = KERN_IMAGE_START;
+    }
+
+    if (img_size < 0) {
+        error_setg(errp, "Failed to load network bootloader");
+    }
+
+    g_free(netboot_filename);
+
+unref_mr:
+    memory_region_unref(mr);
+    return img_size;
+}
+
+static bool is_virtio_net_device(IplParameterBlock *iplb)
+{
+    uint8_t cssid;
+    uint8_t ssid;
+    uint16_t devno;
+    uint16_t schid;
+    SubchDev *sch = NULL;
+
+    if (iplb->pbt != S390_IPL_TYPE_CCW) {
+        return false;
+    }
+
+    devno = be16_to_cpu(iplb->ccw.devno);
+    ssid = iplb->ccw.ssid & 3;
+
+    for (schid = 0; schid < MAX_SCHID; schid++) {
+        for (cssid = 0; cssid < MAX_CSSID; cssid++) {
+            sch = css_find_subch(1, cssid, ssid, schid);
+
+            if (sch && sch->devno == devno) {
+                return sch->id.cu_model == VIRTIO_ID_NET;
+            }
+        }
+    }
+    return false;
+}
+
 void s390_ipl_update_diag308(IplParameterBlock *iplb)
 {
     S390IPLState *ipl = get_ipl_device();
 
     ipl->iplb = *iplb;
     ipl->iplb_valid = true;
+    ipl->netboot = is_virtio_net_device(iplb);
 }
 
 IplParameterBlock *s390_ipl_get_iplb(void)
@@ -287,6 +369,7 @@ void s390_reipl_request(void)
 void s390_ipl_prepare_cpu(S390CPU *cpu)
 {
     S390IPLState *ipl = get_ipl_device();
+    Error *err = NULL;
 
     cpu->env.psw.addr = ipl->start_addr;
     cpu->env.psw.mask = IPL_PSW_MASK;
@@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu)
             ipl->iplb_valid = s390_gen_initial_iplb(ipl);
         }
     }
+    if (ipl->netboot) {
+        if (load_netboot_image(&err) < 0) {
+            error_report_err(err);
+            vm_stop(RUN_STATE_INTERNAL_ERROR);
+        }
+        ipl->iplb.ccw.netboot_start_addr = ipl->start_addr;
+    }
 }
 
 static void s390_ipl_reset(DeviceState *dev)
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index c89109585a..46930e4c64 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -16,7 +16,8 @@
 #include "cpu.h"
 
 struct IplBlockCcw {
-    uint8_t  reserved0[85];
+    uint64_t netboot_start_addr;
+    uint8_t  reserved0[77];
     uint8_t  ssid;
     uint16_t devno;
     uint8_t  vm_flags;
@@ -100,12 +101,14 @@ struct S390IPLState {
     IplParameterBlock iplb;
     bool iplb_valid;
     bool reipl_requested;
+    bool netboot;
 
     /*< public >*/
     char *kernel;
     char *initrd;
     char *cmdline;
     char *firmware;
+    char *netboot_fw;
     uint8_t cssid;
     uint8_t ssid;
     uint16_t devno;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 4f0d62b2d8..40914fde6f 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine)
     /* get a BUS */
     css_bus = virtual_css_bus_init();
     s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline,
-                      machine->initrd_filename, "s390-ccw.img", true);
+                      machine->initrd_filename, "s390-ccw.img",
+                      "s390-netboot.img", true);
     s390_flic_init();
 
     dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
index 9cfb09057e..afa4148e6b 100644
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -65,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
                        const char *kernel_cmdline,
                        const char *initrd_filename,
                        const char *firmware,
+                       const char *netboot_fw,
                        bool enforce_bios)
 {
     Object *new = object_new(TYPE_S390_IPL);
@@ -78,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
     }
     qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
     qdev_prop_set_string(dev, "firmware", firmware);
+    qdev_prop_set_string(dev, "netboot_fw", netboot_fw);
     qdev_prop_set_bit(dev, "enforce_bios", enforce_bios);
     object_property_add_child(qdev_get_machine(), TYPE_S390_IPL,
                               new, NULL);
diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h
index f588b80a6e..f2377a3e0e 100644
--- a/hw/s390x/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
                        const char *kernel_cmdline,
                        const char *initrd_filename,
                        const char *firmware,
+                       const char *netboot_fw,
                        bool enforce_bios);
 void s390_create_virtio_net(BusState *bus, const char *name);
 void s390_nmi(NMIState *n, int cpu_index, Error **errp);
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index bbfb5dc289..a53f058621 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque)
     }
 }
 
-static void scsi_cd_change_media_cb(void *opaque, bool load)
+static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
 {
     SCSIDiskState *s = opaque;
 
@@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
             return;
         }
     }
-    blkconf_apply_backend_options(&dev->conf);
+    blkconf_apply_backend_options(&dev->conf,
+                                  blk_is_read_only(s->qdev.conf.blk),
+                                  dev->type == TYPE_DISK, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
     if (s->qdev.conf.discard_granularity == -1) {
         s->qdev.conf.discard_granularity =
@@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
 
     if (!dev->conf.blk) {
-        dev->conf.blk = blk_new();
+        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
     }
 
     s->qdev.blocksize = 2048;
diff --git a/hw/sd/Makefile.objs b/hw/sd/Makefile.objs
index 31c83308f2..c2b7664264 100644
--- a/hw/sd/Makefile.objs
+++ b/hw/sd/Makefile.objs
@@ -6,3 +6,4 @@ common-obj-$(CONFIG_SDHCI) += sdhci.o
 obj-$(CONFIG_MILKYMIST) += milkymist-memcard.o
 obj-$(CONFIG_OMAP) += omap_mmc.o
 obj-$(CONFIG_PXA2XX) += pxa2xx_mmci.o
+obj-$(CONFIG_RASPI) += bcm2835_sdhost.o
diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c
new file mode 100644
index 0000000000..f7f4e656df
--- /dev/null
+++ b/hw/sd/bcm2835_sdhost.c
@@ -0,0 +1,429 @@
+/*
+ * Raspberry Pi (BCM2835) SD Host Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "sysemu/blockdev.h"
+#include "hw/sd/bcm2835_sdhost.h"
+
+#define TYPE_BCM2835_SDHOST_BUS "bcm2835-sdhost-bus"
+#define BCM2835_SDHOST_BUS(obj) \
+    OBJECT_CHECK(SDBus, (obj), TYPE_BCM2835_SDHOST_BUS)
+
+#define SDCMD  0x00 /* Command to SD card              - 16 R/W */
+#define SDARG  0x04 /* Argument to SD card             - 32 R/W */
+#define SDTOUT 0x08 /* Start value for timeout counter - 32 R/W */
+#define SDCDIV 0x0c /* Start value for clock divider   - 11 R/W */
+#define SDRSP0 0x10 /* SD card rsp (31:0)         - 32 R   */
+#define SDRSP1 0x14 /* SD card rsp (63:32)        - 32 R   */
+#define SDRSP2 0x18 /* SD card rsp (95:64)        - 32 R   */
+#define SDRSP3 0x1c /* SD card rsp (127:96)       - 32 R   */
+#define SDHSTS 0x20 /* SD host status                  - 11 R   */
+#define SDVDD  0x30 /* SD card power control           -  1 R/W */
+#define SDEDM  0x34 /* Emergency Debug Mode            - 13 R/W */
+#define SDHCFG 0x38 /* Host configuration              -  2 R/W */
+#define SDHBCT 0x3c /* Host byte count (debug)         - 32 R/W */
+#define SDDATA 0x40 /* Data to/from SD card            - 32 R/W */
+#define SDHBLC 0x50 /* Host block count (SDIO/SDHC)    -  9 R/W */
+
+#define SDCMD_NEW_FLAG                  0x8000
+#define SDCMD_FAIL_FLAG                 0x4000
+#define SDCMD_BUSYWAIT                  0x800
+#define SDCMD_NO_RESPONSE               0x400
+#define SDCMD_LONG_RESPONSE             0x200
+#define SDCMD_WRITE_CMD                 0x80
+#define SDCMD_READ_CMD                  0x40
+#define SDCMD_CMD_MASK                  0x3f
+
+#define SDCDIV_MAX_CDIV                 0x7ff
+
+#define SDHSTS_BUSY_IRPT                0x400
+#define SDHSTS_BLOCK_IRPT               0x200
+#define SDHSTS_SDIO_IRPT                0x100
+#define SDHSTS_REW_TIME_OUT             0x80
+#define SDHSTS_CMD_TIME_OUT             0x40
+#define SDHSTS_CRC16_ERROR              0x20
+#define SDHSTS_CRC7_ERROR               0x10
+#define SDHSTS_FIFO_ERROR               0x08
+/* Reserved */
+/* Reserved */
+#define SDHSTS_DATA_FLAG                0x01
+
+#define SDHCFG_BUSY_IRPT_EN     (1 << 10)
+#define SDHCFG_BLOCK_IRPT_EN    (1 << 8)
+#define SDHCFG_SDIO_IRPT_EN     (1 << 5)
+#define SDHCFG_DATA_IRPT_EN     (1 << 4)
+#define SDHCFG_SLOW_CARD        (1 << 3)
+#define SDHCFG_WIDE_EXT_BUS     (1 << 2)
+#define SDHCFG_WIDE_INT_BUS     (1 << 1)
+#define SDHCFG_REL_CMD_LINE     (1 << 0)
+
+#define SDEDM_FORCE_DATA_MODE   (1 << 19)
+#define SDEDM_CLOCK_PULSE       (1 << 20)
+#define SDEDM_BYPASS            (1 << 21)
+
+#define SDEDM_WRITE_THRESHOLD_SHIFT 9
+#define SDEDM_READ_THRESHOLD_SHIFT 14
+#define SDEDM_THRESHOLD_MASK     0x1f
+
+#define SDEDM_FSM_MASK           0xf
+#define SDEDM_FSM_IDENTMODE      0x0
+#define SDEDM_FSM_DATAMODE       0x1
+#define SDEDM_FSM_READDATA       0x2
+#define SDEDM_FSM_WRITEDATA      0x3
+#define SDEDM_FSM_READWAIT       0x4
+#define SDEDM_FSM_READCRC        0x5
+#define SDEDM_FSM_WRITECRC       0x6
+#define SDEDM_FSM_WRITEWAIT1     0x7
+#define SDEDM_FSM_POWERDOWN      0x8
+#define SDEDM_FSM_POWERUP        0x9
+#define SDEDM_FSM_WRITESTART1    0xa
+#define SDEDM_FSM_WRITESTART2    0xb
+#define SDEDM_FSM_GENPULSES      0xc
+#define SDEDM_FSM_WRITEWAIT2     0xd
+#define SDEDM_FSM_STARTPOWDOWN   0xf
+
+#define SDDATA_FIFO_WORDS        16
+
+static void bcm2835_sdhost_update_irq(BCM2835SDHostState *s)
+{
+    uint32_t irq = s->status &
+        (SDHSTS_BUSY_IRPT | SDHSTS_BLOCK_IRPT | SDHSTS_SDIO_IRPT);
+    qemu_set_irq(s->irq, !!irq);
+}
+
+static void bcm2835_sdhost_send_command(BCM2835SDHostState *s)
+{
+    SDRequest request;
+    uint8_t rsp[16];
+    int rlen;
+
+    request.cmd = s->cmd & SDCMD_CMD_MASK;
+    request.arg = s->cmdarg;
+
+    rlen = sdbus_do_command(&s->sdbus, &request, rsp);
+    if (rlen < 0) {
+        goto error;
+    }
+    if (!(s->cmd & SDCMD_NO_RESPONSE)) {
+#define RWORD(n) (((uint32_t)rsp[n] << 24) | (rsp[n + 1] << 16) \
+                  | (rsp[n + 2] << 8) | rsp[n + 3])
+        if (rlen == 0 || (rlen == 4 && (s->cmd & SDCMD_LONG_RESPONSE))) {
+            goto error;
+        }
+        if (rlen != 4 && rlen != 16) {
+            goto error;
+        }
+        if (rlen == 4) {
+            s->rsp[0] = RWORD(0);
+            s->rsp[1] = s->rsp[2] = s->rsp[3] = 0;
+        } else {
+            s->rsp[0] = RWORD(12);
+            s->rsp[1] = RWORD(8);
+            s->rsp[2] = RWORD(4);
+            s->rsp[3] = RWORD(0);
+        }
+#undef RWORD
+    }
+    return;
+
+error:
+    s->cmd |= SDCMD_FAIL_FLAG;
+    s->status |= SDHSTS_CMD_TIME_OUT;
+}
+
+static void bcm2835_sdhost_fifo_push(BCM2835SDHostState *s, uint32_t value)
+{
+    int n;
+
+    if (s->fifo_len == BCM2835_SDHOST_FIFO_LEN) {
+        /* FIFO overflow */
+        return;
+    }
+    n = (s->fifo_pos + s->fifo_len) & (BCM2835_SDHOST_FIFO_LEN - 1);
+    s->fifo_len++;
+    s->fifo[n] = value;
+}
+
+static uint32_t bcm2835_sdhost_fifo_pop(BCM2835SDHostState *s)
+{
+    uint32_t value;
+
+    if (s->fifo_len == 0) {
+        /* FIFO underflow */
+        return 0;
+    }
+    value = s->fifo[s->fifo_pos];
+    s->fifo_len--;
+    s->fifo_pos = (s->fifo_pos + 1) & (BCM2835_SDHOST_FIFO_LEN - 1);
+    return value;
+}
+
+static void bcm2835_sdhost_fifo_run(BCM2835SDHostState *s)
+{
+    uint32_t value = 0;
+    int n;
+    int is_read;
+
+    is_read = (s->cmd & SDCMD_READ_CMD) != 0;
+    if (s->datacnt != 0 && (!is_read || sdbus_data_ready(&s->sdbus))) {
+        if (is_read) {
+            n = 0;
+            while (s->datacnt && s->fifo_len < BCM2835_SDHOST_FIFO_LEN) {
+                value |= (uint32_t)sdbus_read_data(&s->sdbus) << (n * 8);
+                s->datacnt--;
+                n++;
+                if (n == 4) {
+                    bcm2835_sdhost_fifo_push(s, value);
+                    n = 0;
+                    value = 0;
+                }
+            }
+            if (n != 0) {
+                bcm2835_sdhost_fifo_push(s, value);
+            }
+        } else { /* write */
+            n = 0;
+            while (s->datacnt > 0 && (s->fifo_len > 0 || n > 0)) {
+                if (n == 0) {
+                    value = bcm2835_sdhost_fifo_pop(s);
+                    n = 4;
+                }
+                n--;
+                s->datacnt--;
+                sdbus_write_data(&s->sdbus, value & 0xff);
+                value >>= 8;
+            }
+        }
+    }
+    if (s->datacnt == 0) {
+        s->status |= SDHSTS_DATA_FLAG;
+
+        s->edm &= ~0xf;
+        s->edm |= SDEDM_FSM_DATAMODE;
+
+        if (s->config & SDHCFG_DATA_IRPT_EN) {
+            s->status |= SDHSTS_SDIO_IRPT;
+        }
+
+        if ((s->cmd & SDCMD_BUSYWAIT) && (s->config & SDHCFG_BUSY_IRPT_EN)) {
+            s->status |= SDHSTS_BUSY_IRPT;
+        }
+
+        if ((s->cmd & SDCMD_WRITE_CMD) && (s->config & SDHCFG_BLOCK_IRPT_EN)) {
+            s->status |= SDHSTS_BLOCK_IRPT;
+        }
+
+        bcm2835_sdhost_update_irq(s);
+    }
+
+    s->edm &= ~(0x1f << 4);
+    s->edm |= ((s->fifo_len & 0x1f) << 4);
+}
+
+static uint64_t bcm2835_sdhost_read(void *opaque, hwaddr offset,
+    unsigned size)
+{
+    BCM2835SDHostState *s = (BCM2835SDHostState *)opaque;
+    uint32_t res = 0;
+
+    switch (offset) {
+    case SDCMD:
+        res = s->cmd;
+        break;
+    case SDHSTS:
+        res = s->status;
+        break;
+    case SDRSP0:
+        res = s->rsp[0];
+        break;
+    case SDRSP1:
+        res = s->rsp[1];
+        break;
+    case SDRSP2:
+        res = s->rsp[2];
+        break;
+    case SDRSP3:
+        res = s->rsp[3];
+        break;
+    case SDEDM:
+        res = s->edm;
+        break;
+    case SDVDD:
+        res = s->vdd;
+        break;
+    case SDDATA:
+        res = bcm2835_sdhost_fifo_pop(s);
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDHBCT:
+        res = s->hbct;
+        break;
+    case SDHBLC:
+        res = s->hblc;
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                      __func__, offset);
+        res = 0;
+        break;
+    }
+
+    return res;
+}
+
+static void bcm2835_sdhost_write(void *opaque, hwaddr offset,
+    uint64_t value, unsigned size)
+{
+    BCM2835SDHostState *s = (BCM2835SDHostState *)opaque;
+
+    switch (offset) {
+    case SDCMD:
+        s->cmd = value;
+        if (value & SDCMD_NEW_FLAG) {
+            bcm2835_sdhost_send_command(s);
+            bcm2835_sdhost_fifo_run(s);
+            s->cmd &= ~SDCMD_NEW_FLAG;
+        }
+        break;
+    case SDTOUT:
+        break;
+    case SDCDIV:
+        break;
+    case SDHSTS:
+        s->status &= ~value;
+        bcm2835_sdhost_update_irq(s);
+        break;
+    case SDARG:
+        s->cmdarg = value;
+        break;
+    case SDEDM:
+        if ((value & 0xf) == 0xf) {
+            /* power down */
+            value &= ~0xf;
+        }
+        s->edm = value;
+        break;
+    case SDHCFG:
+        s->config = value;
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDVDD:
+        s->vdd = value;
+        break;
+    case SDDATA:
+        bcm2835_sdhost_fifo_push(s, value);
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDHBCT:
+        s->hbct = value;
+        break;
+    case SDHBLC:
+        s->hblc = value;
+        s->datacnt = s->hblc * s->hbct;
+        bcm2835_sdhost_fifo_run(s);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                      __func__, offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps bcm2835_sdhost_ops = {
+    .read = bcm2835_sdhost_read,
+    .write = bcm2835_sdhost_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_sdhost = {
+    .name = TYPE_BCM2835_SDHOST,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(cmd, BCM2835SDHostState),
+        VMSTATE_UINT32(cmdarg, BCM2835SDHostState),
+        VMSTATE_UINT32(status, BCM2835SDHostState),
+        VMSTATE_UINT32_ARRAY(rsp, BCM2835SDHostState, 4),
+        VMSTATE_UINT32(config, BCM2835SDHostState),
+        VMSTATE_UINT32(edm, BCM2835SDHostState),
+        VMSTATE_UINT32(vdd, BCM2835SDHostState),
+        VMSTATE_UINT32(hbct, BCM2835SDHostState),
+        VMSTATE_UINT32(hblc, BCM2835SDHostState),
+        VMSTATE_INT32(fifo_pos, BCM2835SDHostState),
+        VMSTATE_INT32(fifo_len, BCM2835SDHostState),
+        VMSTATE_UINT32_ARRAY(fifo, BCM2835SDHostState, BCM2835_SDHOST_FIFO_LEN),
+        VMSTATE_UINT32(datacnt, BCM2835SDHostState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_sdhost_init(Object *obj)
+{
+    BCM2835SDHostState *s = BCM2835_SDHOST(obj);
+
+    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
+                        TYPE_BCM2835_SDHOST_BUS, DEVICE(s), "sd-bus");
+
+    memory_region_init_io(&s->iomem, obj, &bcm2835_sdhost_ops, s,
+                          TYPE_BCM2835_SDHOST, 0x1000);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+    sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq);
+}
+
+static void bcm2835_sdhost_reset(DeviceState *dev)
+{
+    BCM2835SDHostState *s = BCM2835_SDHOST(dev);
+
+    s->cmd = 0;
+    s->cmdarg = 0;
+    s->edm = 0x0000c60f;
+    s->config = 0;
+    s->hbct = 0;
+    s->hblc = 0;
+    s->datacnt = 0;
+    s->fifo_pos = 0;
+    s->fifo_len = 0;
+}
+
+static void bcm2835_sdhost_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = bcm2835_sdhost_reset;
+    dc->vmsd = &vmstate_bcm2835_sdhost;
+}
+
+static TypeInfo bcm2835_sdhost_info = {
+    .name          = TYPE_BCM2835_SDHOST,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835SDHostState),
+    .class_init    = bcm2835_sdhost_class_init,
+    .instance_init = bcm2835_sdhost_init,
+};
+
+static const TypeInfo bcm2835_sdhost_bus_info = {
+    .name = TYPE_BCM2835_SDHOST_BUS,
+    .parent = TYPE_SD_BUS,
+    .instance_size = sizeof(SDBus),
+};
+
+static void bcm2835_sdhost_register_types(void)
+{
+    type_register_static(&bcm2835_sdhost_info);
+    type_register_static(&bcm2835_sdhost_bus_info);
+}
+
+type_init(bcm2835_sdhost_register_types)
diff --git a/hw/sd/core.c b/hw/sd/core.c
index 14c2bdf27b..295dc44ab7 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -131,6 +131,33 @@ void sdbus_set_readonly(SDBus *sdbus, bool readonly)
     }
 }
 
+void sdbus_reparent_card(SDBus *from, SDBus *to)
+{
+    SDState *card = get_card(from);
+    SDCardClass *sc;
+    bool readonly;
+
+    /* We directly reparent the card object rather than implementing this
+     * as a hotpluggable connection because we don't want to expose SD cards
+     * to users as being hotpluggable, and we can get away with it in this
+     * limited use case. This could perhaps be implemented more cleanly in
+     * future by adding support to the hotplug infrastructure for "device
+     * can be hotplugged only via code, not by user".
+     */
+
+    if (!card) {
+        return;
+    }
+
+    sc = SD_CARD_GET_CLASS(card);
+    readonly = sc->get_readonly(card);
+
+    sdbus_set_inserted(from, false);
+    qdev_set_parent_bus(DEVICE(card), &to->qbus);
+    sdbus_set_inserted(to, true);
+    sdbus_set_readonly(to, readonly);
+}
+
 static const TypeInfo sd_bus_info = {
     .name = TYPE_SD_BUS,
     .parent = TYPE_BUS,
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 8e88e8311a..ba47bff4db 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd)
     return sd->wp_switch;
 }
 
-static void sd_cardchange(void *opaque, bool load)
+static void sd_cardchange(void *opaque, bool load, Error **errp)
 {
     SDState *sd = opaque;
     DeviceState *dev = DEVICE(sd);
@@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj)
 static void sd_realize(DeviceState *dev, Error **errp)
 {
     SDState *sd = SD_CARD(dev);
+    int ret;
 
     if (sd->blk && blk_is_read_only(sd->blk)) {
         error_setg(errp, "Cannot use read-only drive as SD card");
@@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp)
     }
 
     if (sd->blk) {
+        ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
         blk_set_dev_ops(sd->blk, &sd_block_ops, sd);
     }
 }
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index da32b5f709..6d6a791ee9 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -119,6 +119,7 @@
     (SDHC_CAPAB_BASECLKFREQ << 8) | (SDHC_CAPAB_TOUNIT << 7) | \
     (SDHC_CAPAB_TOCLKFREQ))
 
+#define MASK_TRNMOD     0x0037
 #define MASKED_WRITE(reg, mask, val)  (reg = (reg & (mask)) | (val))
 
 static uint8_t sdhci_slotint(SDHCIState *s)
@@ -486,6 +487,11 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
     uint32_t boundary_chk = 1 << (((s->blksize & 0xf000) >> 12) + 12);
     uint32_t boundary_count = boundary_chk - (s->sdmasysad % boundary_chk);
 
+    if (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || !s->blkcnt) {
+        qemu_log_mask(LOG_UNIMP, "infinite transfer is not supported\n");
+        return;
+    }
+
     /* XXX: Some sd/mmc drivers (for example, u-boot-slp) do not account for
      * possible stop at page boundary if initial address is not page aligned,
      * allow them to work properly */
@@ -564,7 +570,6 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
 }
 
 /* single block SDMA transfer */
-
 static void sdhci_sdma_transfer_single_block(SDHCIState *s)
 {
     int n;
@@ -583,10 +588,7 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s)
             sdbus_write_data(&s->sdbus, s->fifo_buffer[n]);
         }
     }
-
-    if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
-        s->blkcnt--;
-    }
+    s->blkcnt--;
 
     sdhci_end_transfer(s);
 }
@@ -797,11 +799,6 @@ static void sdhci_data_transfer(void *opaque)
     if (s->trnmod & SDHC_TRNS_DMA) {
         switch (SDHC_DMA_TYPE(s->hostctl)) {
         case SDHC_CTRL_SDMA:
-            if ((s->trnmod & SDHC_TRNS_MULTI) &&
-                    (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || s->blkcnt == 0)) {
-                break;
-            }
-
             if ((s->blkcnt == 1) || !(s->trnmod & SDHC_TRNS_MULTI)) {
                 sdhci_sdma_transfer_single_block(s);
             } else {
@@ -1022,7 +1019,11 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
         /* Writing to last byte of sdmasysad might trigger transfer */
         if (!(mask & 0xFF000000) && TRANSFERRING_DATA(s->prnsts) && s->blkcnt &&
                 s->blksize && SDHC_DMA_TYPE(s->hostctl) == SDHC_CTRL_SDMA) {
-            sdhci_sdma_transfer_multi_blocks(s);
+            if (s->trnmod & SDHC_TRNS_MULTI) {
+                sdhci_sdma_transfer_multi_blocks(s);
+            } else {
+                sdhci_sdma_transfer_single_block(s);
+            }
         }
         break;
     case SDHC_BLKSIZE:
@@ -1050,7 +1051,7 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
         if (!(s->capareg & SDHC_CAN_DO_DMA)) {
             value &= ~SDHC_TRNS_DMA;
         }
-        MASKED_WRITE(s->trnmod, mask, value);
+        MASKED_WRITE(s->trnmod, mask, value & MASK_TRNMOD);
         MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16);
 
         /* Writing to the upper byte of CMDREG triggers SD command generation */
diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
index fc9966880f..dd6f27e2a3 100644
--- a/hw/timer/Makefile.objs
+++ b/hw/timer/Makefile.objs
@@ -1,5 +1,6 @@
 common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
 common-obj-$(CONFIG_ARM_MPTIMER) += arm_mptimer.o
+common-obj-$(CONFIG_ARM_V7M) += armv7m_systick.o
 common-obj-$(CONFIG_A9_GTIMER) += a9gtimer.o
 common-obj-$(CONFIG_CADENCE) += cadence_ttc.o
 common-obj-$(CONFIG_DS1338) += ds1338.o
diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c
new file mode 100644
index 0000000000..df8d2804b3
--- /dev/null
+++ b/hw/timer/armv7m_systick.c
@@ -0,0 +1,240 @@
+/*
+ * ARMv7M SysTick timer
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell
+ *
+ * This code is licensed under the GPL (version 2 or later).
+ */
+
+#include "qemu/osdep.h"
+#include "hw/timer/armv7m_systick.h"
+#include "qemu-common.h"
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
+#include "trace.h"
+
+/* qemu timers run at 1GHz.   We want something closer to 1MHz.  */
+#define SYSTICK_SCALE 1000ULL
+
+#define SYSTICK_ENABLE    (1 << 0)
+#define SYSTICK_TICKINT   (1 << 1)
+#define SYSTICK_CLKSOURCE (1 << 2)
+#define SYSTICK_COUNTFLAG (1 << 16)
+
+int system_clock_scale;
+
+/* Conversion factor from qemu timer to SysTick frequencies.  */
+static inline int64_t systick_scale(SysTickState *s)
+{
+    if (s->control & SYSTICK_CLKSOURCE) {
+        return system_clock_scale;
+    } else {
+        return 1000;
+    }
+}
+
+static void systick_reload(SysTickState *s, int reset)
+{
+    /* The Cortex-M3 Devices Generic User Guide says that "When the
+     * ENABLE bit is set to 1, the counter loads the RELOAD value from the
+     * SYST RVR register and then counts down". So, we need to check the
+     * ENABLE bit before reloading the value.
+     */
+    trace_systick_reload();
+
+    if ((s->control & SYSTICK_ENABLE) == 0) {
+        return;
+    }
+
+    if (reset) {
+        s->tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    }
+    s->tick += (s->reload + 1) * systick_scale(s);
+    timer_mod(s->timer, s->tick);
+}
+
+static void systick_timer_tick(void *opaque)
+{
+    SysTickState *s = (SysTickState *)opaque;
+
+    trace_systick_timer_tick();
+
+    s->control |= SYSTICK_COUNTFLAG;
+    if (s->control & SYSTICK_TICKINT) {
+        /* Tell the NVIC to pend the SysTick exception */
+        qemu_irq_pulse(s->irq);
+    }
+    if (s->reload == 0) {
+        s->control &= ~SYSTICK_ENABLE;
+    } else {
+        systick_reload(s, 0);
+    }
+}
+
+static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size)
+{
+    SysTickState *s = opaque;
+    uint32_t val;
+
+    switch (addr) {
+    case 0x0: /* SysTick Control and Status.  */
+        val = s->control;
+        s->control &= ~SYSTICK_COUNTFLAG;
+        break;
+    case 0x4: /* SysTick Reload Value.  */
+        val = s->reload;
+        break;
+    case 0x8: /* SysTick Current Value.  */
+    {
+        int64_t t;
+
+        if ((s->control & SYSTICK_ENABLE) == 0) {
+            val = 0;
+            break;
+        }
+        t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        if (t >= s->tick) {
+            val = 0;
+            break;
+        }
+        val = ((s->tick - (t + 1)) / systick_scale(s)) + 1;
+        /* The interrupt in triggered when the timer reaches zero.
+           However the counter is not reloaded until the next clock
+           tick.  This is a hack to return zero during the first tick.  */
+        if (val > s->reload) {
+            val = 0;
+        }
+        break;
+    }
+    case 0xc: /* SysTick Calibration Value.  */
+        val = 10000;
+        break;
+    default:
+        val = 0;
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SysTick: Bad read offset 0x%" HWADDR_PRIx "\n", addr);
+        break;
+    }
+
+    trace_systick_read(addr, val, size);
+    return val;
+}
+
+static void systick_write(void *opaque, hwaddr addr,
+                          uint64_t value, unsigned size)
+{
+    SysTickState *s = opaque;
+
+    trace_systick_write(addr, value, size);
+
+    switch (addr) {
+    case 0x0: /* SysTick Control and Status.  */
+    {
+        uint32_t oldval = s->control;
+
+        s->control &= 0xfffffff8;
+        s->control |= value & 7;
+        if ((oldval ^ value) & SYSTICK_ENABLE) {
+            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+            if (value & SYSTICK_ENABLE) {
+                if (s->tick) {
+                    s->tick += now;
+                    timer_mod(s->timer, s->tick);
+                } else {
+                    systick_reload(s, 1);
+                }
+            } else {
+                timer_del(s->timer);
+                s->tick -= now;
+                if (s->tick < 0) {
+                    s->tick = 0;
+                }
+            }
+        } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
+            /* This is a hack. Force the timer to be reloaded
+               when the reference clock is changed.  */
+            systick_reload(s, 1);
+        }
+        break;
+    }
+    case 0x4: /* SysTick Reload Value.  */
+        s->reload = value;
+        break;
+    case 0x8: /* SysTick Current Value.  Writes reload the timer.  */
+        systick_reload(s, 1);
+        s->control &= ~SYSTICK_COUNTFLAG;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SysTick: Bad write offset 0x%" HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps systick_ops = {
+    .read = systick_read,
+    .write = systick_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+};
+
+static void systick_reset(DeviceState *dev)
+{
+    SysTickState *s = SYSTICK(dev);
+
+    s->control = 0;
+    s->reload = 0;
+    s->tick = 0;
+    timer_del(s->timer);
+}
+
+static void systick_instance_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    SysTickState *s = SYSTICK(obj);
+
+    memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0);
+    sysbus_init_mmio(sbd, &s->iomem);
+    sysbus_init_irq(sbd, &s->irq);
+    s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
+}
+
+static const VMStateDescription vmstate_systick = {
+    .name = "armv7m_systick",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(control, SysTickState),
+        VMSTATE_UINT32(reload, SysTickState),
+        VMSTATE_INT64(tick, SysTickState),
+        VMSTATE_TIMER_PTR(timer, SysTickState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void systick_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_systick;
+    dc->reset = systick_reset;
+}
+
+static const TypeInfo armv7m_systick_info = {
+    .name = TYPE_SYSTICK,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_init = systick_instance_init,
+    .instance_size = sizeof(SysTickState),
+    .class_init = systick_class_init,
+};
+
+static void armv7m_systick_register_types(void)
+{
+    type_register_static(&armv7m_systick_info);
+}
+
+type_init(armv7m_systick_register_types)
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 010ccbf207..4b9b54bf2e 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -296,18 +296,23 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
     return reg_value;
 }
 
-static void imx_gpt_reset(DeviceState *dev)
-{
-    IMXGPTState *s = IMX_GPT(dev);
 
+static void imx_gpt_reset_common(IMXGPTState *s, bool is_soft_reset)
+{
     /* stop timer */
     ptimer_stop(s->timer);
 
-    /*
-     * Soft reset doesn't touch some bits; hard reset clears them
+    /* Soft reset and hard reset differ only in their handling of the CR
+     * register -- soft reset preserves the values of some bits there.
      */
-    s->cr &= ~(GPT_CR_EN|GPT_CR_ENMOD|GPT_CR_STOPEN|GPT_CR_DOZEN|
-               GPT_CR_WAITEN|GPT_CR_DBGEN);
+    if (is_soft_reset) {
+        /* Clear all CR bits except those that are preserved by soft reset. */
+        s->cr &= GPT_CR_EN | GPT_CR_ENMOD | GPT_CR_STOPEN | GPT_CR_DOZEN |
+            GPT_CR_WAITEN | GPT_CR_DBGEN |
+            (GPT_CR_CLKSRC_MASK << GPT_CR_CLKSRC_SHIFT);
+    } else {
+        s->cr = 0;
+    }
     s->sr = 0;
     s->pr = 0;
     s->ir = 0;
@@ -333,6 +338,18 @@ static void imx_gpt_reset(DeviceState *dev)
     }
 }
 
+static void imx_gpt_soft_reset(DeviceState *dev)
+{
+    IMXGPTState *s = IMX_GPT(dev);
+    imx_gpt_reset_common(s, true);
+}
+
+static void imx_gpt_reset(DeviceState *dev)
+{
+    IMXGPTState *s = IMX_GPT(dev);
+    imx_gpt_reset_common(s, false);
+}
+
 static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
                           unsigned size)
 {
@@ -348,7 +365,7 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
         s->cr = value & ~0x7c14;
         if (s->cr & GPT_CR_SWR) { /* force reset */
             /* handle the reset */
-            imx_gpt_reset(DEVICE(s));
+            imx_gpt_soft_reset(DEVICE(s));
         } else {
             /* set our freq, as the source might have changed */
             imx_gpt_set_freq(s);
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index 3495c41c18..d17cfe6b39 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -49,3 +49,9 @@ aspeed_timer_ctrl_pulse_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d"
 aspeed_timer_set_ctrl2(uint32_t value) "Value: 0x%" PRIx32
 aspeed_timer_set_value(int timer, int reg, uint32_t value) "Timer %d register %d: 0x%" PRIx32
 aspeed_timer_read(uint64_t offset, unsigned size, uint64_t value) "From 0x%" PRIx64 ": of size %u: 0x%" PRIx64
+
+# hw/timer/armv7m_systick.c
+systick_reload(void) "systick reload"
+systick_timer_tick(void) "systick reload"
+systick_read(uint64_t addr, uint32_t value, unsigned size) "systick read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
+systick_write(uint64_t addr, uint32_t value, unsigned size) "systick write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index efe4b8e1a6..24f1608b4b 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -8,7 +8,6 @@
 #include "monitor/monitor.h"
 #include "trace.h"
 #include "qemu/cutils.h"
-#include "migration/migration.h"
 
 static void usb_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent);
 
@@ -688,8 +687,6 @@ USBDevice *usbdevice_create(const char *cmdline)
     const char *params;
     int len;
     USBDevice *dev;
-    ObjectClass *klass;
-    DeviceClass *dc;
 
     params = strchr(cmdline,':');
     if (params) {
@@ -724,22 +721,6 @@ USBDevice *usbdevice_create(const char *cmdline)
         return NULL;
     }
 
-    klass = object_class_by_name(f->name);
-    if (klass == NULL) {
-        error_report("Device '%s' not found", f->name);
-        return NULL;
-    }
-
-    dc = DEVICE_CLASS(klass);
-
-    if (only_migratable) {
-        if (dc->vmsd->unmigratable) {
-            error_report("Device %s is not migratable, but --only-migratable "
-                         "was specified", f->name);
-            return NULL;
-        }
-    }
-
     if (f->usbdevice_init) {
         dev = f->usbdevice_init(bus, params);
     } else {
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index c607f7606d..8a61ec94c8 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -589,6 +589,13 @@ static const struct SCSIBusInfo usb_msd_scsi_info_bot = {
     .load_request = usb_msd_load_request,
 };
 
+static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp)
+{
+    MSDState *s = USB_STORAGE_DEV(dev);
+
+    object_unref(OBJECT(&s->bus));
+}
+
 static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
@@ -603,7 +610,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
 
     blkconf_serial(&s->conf, &dev->serial);
     blkconf_blocksizes(&s->conf);
-    blkconf_apply_backend_options(&s->conf);
+    blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
     /*
      * Hack alert: this pretends to be a block device, but it's really
@@ -635,6 +646,13 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
     s->scsi_dev = scsi_dev;
 }
 
+static void usb_msd_unrealize_bot(USBDevice *dev, Error **errp)
+{
+    MSDState *s = USB_STORAGE_DEV(dev);
+
+    object_unref(OBJECT(&s->bus));
+}
+
 static void usb_msd_realize_bot(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
@@ -755,6 +773,7 @@ static void usb_msd_class_initfn_storage(ObjectClass *klass, void *data)
     USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
 
     uc->realize = usb_msd_realize_storage;
+    uc->unrealize = usb_msd_unrealize_storage;
     dc->props = msd_properties;
 }
 
@@ -817,6 +836,7 @@ static void usb_msd_class_initfn_bot(ObjectClass *klass, void *data)
     USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
 
     uc->realize = usb_msd_realize_bot;
+    uc->unrealize = usb_msd_unrealize_bot;
     uc->attached_settable = true;
 }
 
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 3b26655889..fffc424396 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -896,6 +896,8 @@ static void usb_uas_unrealize(USBDevice *dev, Error **errp)
     UASDevice *uas = USB_UAS(dev);
 
     qemu_bh_delete(uas->status_bh);
+
+    object_unref(OBJECT(&uas->bus));
 }
 
 static void usb_uas_realize(USBDevice *dev, Error **errp)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 5ce42af9d4..b76f3f62a0 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1153,7 +1153,7 @@ static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
     PCIDevice *dev = &proxy->pci_dev;
 
-    return pci_get_address_space(dev);
+    return pci_device_iommu_address_space(dev);
 }
 
 static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 23483c752f..efce4b343a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -282,12 +282,17 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
     caches = atomic_rcu_read(&vq->vring.caches);
     pa = offsetof(VRingUsed, ring[vq->vring.num]);
     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 }
 
 void virtio_queue_set_notification(VirtQueue *vq, int enable)
 {
     vq->notification = enable;
 
+    if (!vq->vring.desc) {
+        return;
+    }
+
     rcu_read_lock();
     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
         vring_set_avail_event(vq, vring_avail_idx(vq));
@@ -1852,7 +1857,10 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         if (k->has_variable_vring_alignment) {
             qemu_put_be32(f, vdev->vq[i].vring.align);
         }
-        /* XXX virtio-1 devices */
+        /*
+         * Save desc now, the rest of the ring addresses are saved in
+         * subsections for VIRTIO-1 devices.
+         */
         qemu_put_be64(f, vdev->vq[i].vring.desc);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
         if (k->save_queue) {
@@ -1993,14 +2001,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
         vdev->vq[i].signalled_used_valid = false;
         vdev->vq[i].notification = true;
 
-        if (vdev->vq[i].vring.desc) {
-            /* XXX virtio-1 devices */
-            virtio_queue_update_rings(vdev, i);
-        } else if (vdev->vq[i].last_avail_idx) {
+        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
             error_report("VQ %d address 0x0 "
                          "inconsistent with Host index 0x%x",
                          i, vdev->vq[i].last_avail_idx);
-                return -1;
+            return -1;
         }
         if (k->load_queue) {
             ret = k->load_queue(qbus->parent, i, f);
@@ -2061,6 +2066,19 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
     for (i = 0; i < num; i++) {
         if (vdev->vq[i].vring.desc) {
             uint16_t nheads;
+
+            /*
+             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
+             * only the region cache needs to be set up.  Legacy devices need
+             * to calculate used and avail ring addresses based on the desc
+             * address.
+             */
+            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+                virtio_init_region_cache(vdev, i);
+            } else {
+                virtio_queue_update_rings(vdev, i);
+            }
+
             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
             /* Check it isn't doing strange things with descriptor numbers. */
             if (nheads > vdev->vq[i].vring.num) {
@@ -2291,7 +2309,7 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque)
     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
     bool progress;
 
-    if (virtio_queue_empty(vq)) {
+    if (!vq->vring.desc || virtio_queue_empty(vq)) {
         return false;
     }
 
diff --git a/include/block/block.h b/include/block/block.h
index bde5ebda18..c7c4a3ac3a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -82,6 +82,7 @@ typedef struct HDGeometry {
 } HDGeometry;
 
 #define BDRV_O_RDWR        0x0002
+#define BDRV_O_RESIZE      0x0004 /* request permission for resizing the node */
 #define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save writes in a snapshot */
 #define BDRV_O_TEMPORARY   0x0010 /* delete the file after use */
 #define BDRV_O_NOCACHE     0x0020 /* do not use the host page cache */
@@ -187,6 +188,42 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
+/* Block node permission constants */
+enum {
+    /**
+     * A user that has the "permission" of consistent reads is guaranteed that
+     * their view of the contents of the block device is complete and
+     * self-consistent, representing the contents of a disk at a specific
+     * point.
+     *
+     * For most block devices (including their backing files) this is true, but
+     * the property cannot be maintained in a few situations like for
+     * intermediate nodes of a commit block job.
+     */
+    BLK_PERM_CONSISTENT_READ    = 0x01,
+
+    /** This permission is required to change the visible disk contents. */
+    BLK_PERM_WRITE              = 0x02,
+
+    /**
+     * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
+     * required for writes to the block node when the caller promises that
+     * the visible disk content doesn't change.
+     */
+    BLK_PERM_WRITE_UNCHANGED    = 0x04,
+
+    /** This permission is required to change the size of a block node. */
+    BLK_PERM_RESIZE             = 0x08,
+
+    /**
+     * This permission is required to change the node that this BdrvChild
+     * points to.
+     */
+    BLK_PERM_GRAPH_MOD          = 0x10,
+
+    BLK_PERM_ALL                = 0x1f,
+};
+
 /* disk I/O throttling */
 void bdrv_init(void);
 void bdrv_init_with_whitelist(void);
@@ -199,7 +236,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
                 QemuOpts *opts, Error **errp);
 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
 BlockDriverState *bdrv_new(void);
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+                 Error **errp);
 void bdrv_replace_in_backing_chain(BlockDriverState *old,
                                    BlockDriverState *new);
 
@@ -210,7 +248,8 @@ BdrvChild *bdrv_open_child(const char *filename,
                            BlockDriverState* parent,
                            const BdrvChildRole *child_role,
                            bool allow_none, Error **errp);
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                         Error **errp);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
                            const char *bdref_key, Error **errp);
 BlockDriverState *bdrv_open(const char *filename, const char *reference,
@@ -484,7 +523,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BlockDriverState *child_bs,
                              const char *child_name,
-                             const BdrvChildRole *child_role);
+                             const BdrvChildRole *child_role,
+                             Error **errp);
 
 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1670941da9..a57c0bfb55 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -320,6 +320,59 @@ struct BlockDriver {
     void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
                            Error **errp);
 
+    /**
+     * Informs the block driver that a permission change is intended. The
+     * driver checks whether the change is permissible and may take other
+     * preparations for the change (e.g. get file system locks). This operation
+     * is always followed either by a call to either .bdrv_set_perm or
+     * .bdrv_abort_perm_update.
+     *
+     * Checks whether the requested set of cumulative permissions in @perm
+     * can be granted for accessing @bs and whether no other users are using
+     * permissions other than those given in @shared (both arguments take
+     * BLK_PERM_* bitmasks).
+     *
+     * If both conditions are met, 0 is returned. Otherwise, -errno is returned
+     * and errp is set to an error describing the conflict.
+     */
+    int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+                           uint64_t shared, Error **errp);
+
+    /**
+     * Called to inform the driver that the set of cumulative set of used
+     * permissions for @bs has changed to @perm, and the set of sharable
+     * permission to @shared. The driver can use this to propagate changes to
+     * its children (i.e. request permissions only if a parent actually needs
+     * them).
+     *
+     * This function is only invoked after bdrv_check_perm(), so block drivers
+     * may rely on preparations made in their .bdrv_check_perm implementation.
+     */
+    void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
+
+    /*
+     * Called to inform the driver that after a previous bdrv_check_perm()
+     * call, the permission update is not performed and any preparations made
+     * for it (e.g. taken file locks) need to be undone.
+     *
+     * This function can be called even for nodes that never saw a
+     * bdrv_check_perm() call. It is a no-op then.
+     */
+    void (*bdrv_abort_perm_update)(BlockDriverState *bs);
+
+    /**
+     * Returns in @nperm and @nshared the permissions that the driver for @bs
+     * needs on its child @c, based on the cumulative permissions requested by
+     * the parents in @parent_perm and @parent_shared.
+     *
+     * If @c is NULL, return the permissions for attaching a new child for the
+     * given @role.
+     */
+     void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
+                             const BdrvChildRole *role,
+                             uint64_t parent_perm, uint64_t parent_shared,
+                             uint64_t *nperm, uint64_t *nshared);
+
     QLIST_ENTRY(BlockDriver) list;
 };
 
@@ -388,6 +441,10 @@ typedef struct BdrvAioNotifier {
 } BdrvAioNotifier;
 
 struct BdrvChildRole {
+    /* If true, bdrv_replace_in_backing_chain() doesn't change the node this
+     * BdrvChild points to. */
+    bool stay_at_node;
+
     void (*inherit_options)(int *child_flags, QDict *child_options,
                             int parent_flags, QDict *parent_options);
 
@@ -399,6 +456,12 @@ struct BdrvChildRole {
      * name), or NULL if the parent can't provide a better name. */
     const char* (*get_name)(BdrvChild *child);
 
+    /* Returns a malloced string that describes the parent of the child for a
+     * human reader. This could be a node-name, BlockBackend name, qdev ID or
+     * QOM path of the device owning the BlockBackend, job type and ID etc. The
+     * caller is responsible for freeing the memory. */
+    char* (*get_parent_desc)(BdrvChild *child);
+
     /*
      * If this pair of functions is implemented, the parent doesn't issue new
      * requests after returning from .drained_begin() until .drained_end() is
@@ -409,16 +472,32 @@ struct BdrvChildRole {
      */
     void (*drained_begin)(BdrvChild *child);
     void (*drained_end)(BdrvChild *child);
+
+    void (*attach)(BdrvChild *child);
+    void (*detach)(BdrvChild *child);
 };
 
 extern const BdrvChildRole child_file;
 extern const BdrvChildRole child_format;
+extern const BdrvChildRole child_backing;
 
 struct BdrvChild {
     BlockDriverState *bs;
     char *name;
     const BdrvChildRole *role;
     void *opaque;
+
+    /**
+     * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
+     */
+    uint64_t perm;
+
+    /**
+     * Permissions that can still be granted to other users of @bs while this
+     * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
+     */
+    uint64_t shared_perm;
+
     QLIST_ENTRY(BdrvChild) next;
     QLIST_ENTRY(BdrvChild) next_parent;
 };
@@ -701,13 +780,16 @@ void stream_start(const char *job_id, BlockDriverState *bs,
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
  * @backing_file_str: String to use as the backing file in @top's overlay
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @top. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  */
 void commit_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, BlockDriverState *top, int64_t speed,
                   BlockdevOnError on_error, const char *backing_file_str,
-                  Error **errp);
+                  const char *filter_node_name, Error **errp);
 /**
  * commit_active_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
@@ -718,6 +800,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
  *                  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
@@ -727,8 +812,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 void commit_active_start(const char *job_id, BlockDriverState *bs,
                          BlockDriverState *base, int creation_flags,
                          int64_t speed, BlockdevOnError on_error,
-                         BlockCompletionFunc *cb,
-                         void *opaque, Error **errp, bool auto_complete);
+                         const char *filter_node_name,
+                         BlockCompletionFunc *cb, void *opaque, Error **errp,
+                         bool auto_complete);
 /*
  * mirror_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
@@ -745,6 +831,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
  * @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the mirror job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a mirroring operation on @bs.  Clusters that are allocated
@@ -758,7 +847,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
-                  bool unmap, Error **errp);
+                  bool unmap, const char *filter_node_name, Error **errp);
 
 /*
  * backup_job_create:
@@ -796,11 +885,36 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr);
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildRole *child_role,
-                                  void *opaque);
+                                  uint64_t perm, uint64_t shared_perm,
+                                  void *opaque, Error **errp);
 void bdrv_root_unref_child(BdrvChild *child);
 
+int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                          Error **errp);
+void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
+void bdrv_child_abort_perm_update(BdrvChild *c);
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                            Error **errp);
+
+/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
+ * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to
+ * all children */
+void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
+                               const BdrvChildRole *role,
+                               uint64_t perm, uint64_t shared,
+                               uint64_t *nperm, uint64_t *nshared);
+
+/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
+ * (non-raw) image formats: Like above for bs->backing, but for bs->file it
+ * requires WRITE | RESIZE for read-write images, always requires
+ * CONSISTENT_READ and doesn't share WRITE. */
+void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
+                               const BdrvChildRole *role,
+                               uint64_t perm, uint64_t shared,
+                               uint64_t *nperm, uint64_t *nshared);
+
 const char *bdrv_get_parent_name(const BlockDriverState *bs);
-void blk_dev_change_media_cb(BlockBackend *blk, bool load);
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
 bool blk_dev_has_removable_media(BlockBackend *blk);
 bool blk_dev_has_tray(BlockBackend *blk);
 void blk_dev_eject_request(BlockBackend *blk, bool force);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 1acb256223..9e906f7d7e 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -169,13 +169,25 @@ BlockJob *block_job_get(const char *id);
 /**
  * block_job_add_bdrv:
  * @job: A block job
+ * @name: The name to assign to the new BdrvChild
  * @bs: A BlockDriverState that is involved in @job
+ * @perm, @shared_perm: Permissions to request on the node
  *
  * Add @bs to the list of BlockDriverState that are involved in
  * @job. This means that all operations will be blocked on @bs while
  * @job exists.
  */
-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs);
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+                       uint64_t perm, uint64_t shared_perm, Error **errp);
+
+/**
+ * block_job_remove_all_bdrv:
+ * @job: The block job
+ *
+ * Remove all BlockDriverStates from the list of nodes that are involved in the
+ * job. This removes the blockers added with block_job_add_bdrv().
+ */
+void block_job_remove_all_bdrv(BlockJob *job);
 
 /**
  * block_job_set_speed:
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 82238229c6..3f86cc5acc 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -119,6 +119,7 @@ struct BlockJobDriver {
  * generated automatically.
  * @job_type: The class object for the newly-created job.
  * @bs: The block
+ * @perm, @shared_perm: Permissions to request for @bs
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
@@ -134,7 +135,8 @@ struct BlockJobDriver {
  * called from a wrapper that is specific to the job type.
  */
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed, int flags,
+                       BlockDriverState *bs, uint64_t perm,
+                       uint64_t shared_perm, int64_t speed, int flags,
                        BlockCompletionFunc *cb, void *opaque, Error **errp);
 
 /**
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index bd15853e51..8c305aa4fa 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -64,6 +64,7 @@ void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
 void qemu_ram_unset_idstr(RAMBlock *block);
 const char *qemu_ram_get_idstr(RAMBlock *rb);
 size_t qemu_ram_pagesize(RAMBlock *block);
+size_t qemu_ram_pagesize_largest(void);
 
 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
                             int len, int is_write);
@@ -105,6 +106,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
     ram_addr_t offset, ram_addr_t length, void *opaque);
 
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
 
 #endif
 
diff --git a/include/glib-compat.h b/include/glib-compat.h
index 0cd24ffbe9..863c8cf73d 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -328,4 +328,25 @@ static inline void g_source_set_name_by_id(guint tag, const char *name)
 #define g_test_subprocess() (0)
 #endif
 
+
+#if !GLIB_CHECK_VERSION(2, 34, 0)
+static inline void
+g_test_add_data_func_full(const char *path,
+                          gpointer data,
+                          gpointer fn,
+                          gpointer data_free_func)
+{
+#if GLIB_CHECK_VERSION(2, 26, 0)
+    /* back-compat casts, remove this once we can require new-enough glib */
+    g_test_add_vtable(path, 0, data, NULL,
+                      (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
+#else
+    /* back-compat casts, remove this once we can require new-enough glib */
+    g_test_add_vtable(path, 0, data, NULL,
+                      (void (*)(void)) fn, (void (*)(void)) data_free_func);
+#endif
+}
+#endif
+
+
 #endif
diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h
index 71d3c48e7d..3c2e4e95a5 100644
--- a/include/hw/acpi/acpi_dev_interface.h
+++ b/include/hw/acpi/acpi_dev_interface.h
@@ -11,6 +11,7 @@ typedef enum {
     ACPI_CPU_HOTPLUG_STATUS = 4,
     ACPI_MEMORY_HOTPLUG_STATUS = 8,
     ACPI_NVDIMM_HOTPLUG_STATUS = 16,
+    ACPI_VMGENID_CHANGE_STATUS = 32,
 } AcpiEventStatusBits;
 
 #define TYPE_ACPI_DEVICE_IF "acpi-device-interface"
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 559326cbd5..00c21f160c 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -210,6 +210,7 @@ struct AcpiBuildTables {
     GArray *table_data;
     GArray *rsdp;
     GArray *tcpalog;
+    GArray *vmgenid;
     BIOSLinker *linker;
 } AcpiBuildTables;
 
diff --git a/include/hw/acpi/bios-linker-loader.h b/include/hw/acpi/bios-linker-loader.h
index fa1e5d1a4e..efe17b0b9c 100644
--- a/include/hw/acpi/bios-linker-loader.h
+++ b/include/hw/acpi/bios-linker-loader.h
@@ -26,5 +26,12 @@ void bios_linker_loader_add_pointer(BIOSLinker *linker,
                                     const char *src_file,
                                     uint32_t src_offset);
 
+void bios_linker_loader_write_pointer(BIOSLinker *linker,
+                                      const char *dest_file,
+                                      uint32_t dst_patched_offset,
+                                      uint8_t dst_patched_size,
+                                      const char *src_file,
+                                      uint32_t src_offset);
+
 void bios_linker_loader_cleanup(BIOSLinker *linker);
 #endif
diff --git a/include/hw/acpi/vmgenid.h b/include/hw/acpi/vmgenid.h
new file mode 100644
index 0000000000..db7fa0e633
--- /dev/null
+++ b/include/hw/acpi/vmgenid.h
@@ -0,0 +1,35 @@
+#ifndef ACPI_VMGENID_H
+#define ACPI_VMGENID_H
+
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/qdev.h"
+#include "qemu/uuid.h"
+
+#define VMGENID_DEVICE           "vmgenid"
+#define VMGENID_GUID             "guid"
+#define VMGENID_GUID_FW_CFG_FILE      "etc/vmgenid_guid"
+#define VMGENID_ADDR_FW_CFG_FILE      "etc/vmgenid_addr"
+
+#define VMGENID_FW_CFG_SIZE      4096 /* Occupy a page of memory */
+#define VMGENID_GUID_OFFSET      40   /* allow space for
+                                       * OVMF SDT Header Probe Supressor
+                                       */
+
+#define VMGENID(obj) OBJECT_CHECK(VmGenIdState, (obj), VMGENID_DEVICE)
+
+typedef struct VmGenIdState {
+    DeviceClass parent_obj;
+    QemuUUID guid;                /* The 128-bit GUID seen by the guest */
+    uint8_t vmgenid_addr_le[8];   /* Address of the GUID (little-endian) */
+} VmGenIdState;
+
+static inline Object *find_vmgenid_dev(void)
+{
+    return object_resolve_path_type("", VMGENID_DEVICE, NULL);
+}
+
+void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
+                        BIOSLinker *linker);
+void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid);
+
+#endif
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
index c175c0e999..a3f79d3379 100644
--- a/include/hw/arm/arm.h
+++ b/include/hw/arm/arm.h
@@ -26,6 +26,18 @@ typedef enum {
 /* armv7m.c */
 DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
                       const char *kernel_filename, const char *cpu_model);
+/**
+ * armv7m_load_kernel:
+ * @cpu: CPU
+ * @kernel_filename: file to load
+ * @mem_size: mem_size: maximum image size to load
+ *
+ * Load the guest image for an ARMv7M system. This must be called by
+ * any ARMv7M board, either directly or via armv7m_init(). (This is
+ * necessary to ensure that the CPU resets correctly on system reset,
+ * as well as for kernel loading.)
+ */
+void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size);
 
 /*
  * struct used as a parameter of the arm_load_kernel machine init
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
new file mode 100644
index 0000000000..a9b3f2ab35
--- /dev/null
+++ b/include/hw/arm/armv7m.h
@@ -0,0 +1,63 @@
+/*
+ * ARMv7M CPU object
+ *
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This code is licensed under the GPL version 2 or later.
+ */
+
+#ifndef HW_ARM_ARMV7M_H
+#define HW_ARM_ARMV7M_H
+
+#include "hw/sysbus.h"
+#include "hw/arm/armv7m_nvic.h"
+
+#define TYPE_BITBAND "ARM,bitband-memory"
+#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
+
+typedef struct {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    AddressSpace *source_as;
+    MemoryRegion iomem;
+    uint32_t base;
+    MemoryRegion *source_memory;
+} BitBandState;
+
+#define TYPE_ARMV7M "armv7m"
+#define ARMV7M(obj) OBJECT_CHECK(ARMv7MState, (obj), TYPE_ARMV7M)
+
+#define ARMV7M_NUM_BITBANDS 2
+
+/* ARMv7M container object.
+ * + Unnamed GPIO input lines: external IRQ lines for the NVIC
+ * + Named GPIO output SYSRESETREQ: signalled for guest AIRCR.SYSRESETREQ
+ * + Property "cpu-model": CPU model to instantiate
+ * + Property "num-irq": number of external IRQ lines
+ * + Property "memory": MemoryRegion defining the physical address space
+ *   that CPU accesses see. (The NVIC, bitbanding and other CPU-internal
+ *   devices will be automatically layered on top of this view.)
+ */
+typedef struct ARMv7MState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    NVICState nvic;
+    BitBandState bitband[ARMV7M_NUM_BITBANDS];
+    ARMCPU *cpu;
+
+    /* MemoryRegion we pass to the CPU, with our devices layered on
+     * top of the ones the board provides in board_memory.
+     */
+    MemoryRegion container;
+
+    /* Properties */
+    char *cpu_model;
+    /* MemoryRegion the board provides to us (with its devices, RAM, etc) */
+    MemoryRegion *board_memory;
+} ARMv7MState;
+
+#endif
diff --git a/include/hw/arm/armv7m_nvic.h b/include/hw/arm/armv7m_nvic.h
new file mode 100644
index 0000000000..1d145fb75f
--- /dev/null
+++ b/include/hw/arm/armv7m_nvic.h
@@ -0,0 +1,62 @@
+/*
+ * ARMv7M NVIC object
+ *
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This code is licensed under the GPL version 2 or later.
+ */
+
+#ifndef HW_ARM_ARMV7M_NVIC_H
+#define HW_ARM_ARMV7M_NVIC_H
+
+#include "target/arm/cpu.h"
+#include "hw/sysbus.h"
+#include "hw/timer/armv7m_systick.h"
+
+#define TYPE_NVIC "armv7m_nvic"
+
+#define NVIC(obj) \
+    OBJECT_CHECK(NVICState, (obj), TYPE_NVIC)
+
+/* Highest permitted number of exceptions (architectural limit) */
+#define NVIC_MAX_VECTORS 512
+
+typedef struct VecInfo {
+    /* Exception priorities can range from -3 to 255; only the unmodifiable
+     * priority values for RESET, NMI and HardFault can be negative.
+     */
+    int16_t prio;
+    uint8_t enabled;
+    uint8_t pending;
+    uint8_t active;
+    uint8_t level; /* exceptions <=15 never set level */
+} VecInfo;
+
+typedef struct NVICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    ARMCPU *cpu;
+
+    VecInfo vectors[NVIC_MAX_VECTORS];
+    uint32_t prigroup;
+
+    /* vectpending and exception_prio are both cached state that can
+     * be recalculated from the vectors[] array and the prigroup field.
+     */
+    unsigned int vectpending; /* highest prio pending enabled exception */
+    int exception_prio; /* group prio of the highest prio active exception */
+
+    MemoryRegion sysregmem;
+    MemoryRegion container;
+
+    uint32_t num_irq;
+    qemu_irq excpout;
+    qemu_irq sysresetreq;
+
+    SysTickState systick;
+} NVICState;
+
+#endif
diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
index e12ae3721a..122b286de7 100644
--- a/include/hw/arm/bcm2835_peripherals.h
+++ b/include/hw/arm/bcm2835_peripherals.h
@@ -19,8 +19,11 @@
 #include "hw/dma/bcm2835_dma.h"
 #include "hw/intc/bcm2835_ic.h"
 #include "hw/misc/bcm2835_property.h"
+#include "hw/misc/bcm2835_rng.h"
 #include "hw/misc/bcm2835_mbox.h"
 #include "hw/sd/sdhci.h"
+#include "hw/sd/bcm2835_sdhost.h"
+#include "hw/gpio/bcm2835_gpio.h"
 
 #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals"
 #define BCM2835_PERIPHERALS(obj) \
@@ -41,8 +44,11 @@ typedef struct BCM2835PeripheralState {
     BCM2835DMAState dma;
     BCM2835ICState ic;
     BCM2835PropertyState property;
+    BCM2835RngState rng;
     BCM2835MboxState mboxes;
     SDHCIState sdhci;
+    BCM2835SDHostState sdhost;
+    BCM2835GpioState gpio;
 } BCM2835PeripheralState;
 
 #endif /* BCM2835_PERIPHERALS_H */
diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h
index 133214195b..e2dce1122e 100644
--- a/include/hw/arm/stm32f205_soc.h
+++ b/include/hw/arm/stm32f205_soc.h
@@ -31,6 +31,7 @@
 #include "hw/adc/stm32f2xx_adc.h"
 #include "hw/or-irq.h"
 #include "hw/ssi/stm32f2xx_spi.h"
+#include "hw/arm/armv7m.h"
 
 #define TYPE_STM32F205_SOC "stm32f205-soc"
 #define STM32F205_SOC(obj) \
@@ -51,9 +52,10 @@ typedef struct STM32F205State {
     SysBusDevice parent_obj;
     /*< public >*/
 
-    char *kernel_filename;
     char *cpu_model;
 
+    ARMv7MState armv7m;
+
     STM32F2XXSyscfgState syscfg;
     STM32F2XXUsartState usart[STM_NUM_USARTS];
     STM32F2XXTimerState timer[STM_NUM_TIMERS];
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 58ce74e0e5..33b0ff3892 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -93,6 +93,7 @@ typedef struct {
     FWCfgState *fw_cfg;
     bool secure;
     bool highmem;
+    bool its;
     bool virt;
     int32_t gic_version;
     struct arm_boot_info bootinfo;
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index df9d207d81..f3f6e8ef02 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -26,6 +26,7 @@ typedef struct BlockConf {
     /* geometry, not all devices use this */
     uint32_t cyls, heads, secs;
     OnOffAuto wce;
+    bool share_rw;
     BlockdevOnError rerror;
     BlockdevOnError werror;
 } BlockConf;
@@ -53,7 +54,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
     DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0),    \
     DEFINE_PROP_UINT32("discard_granularity", _state, \
                        _conf.discard_granularity, -1), \
-    DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, ON_OFF_AUTO_AUTO)
+    DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \
+                            ON_OFF_AUTO_AUTO), \
+    DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
 
 #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)      \
     DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
@@ -73,7 +76,8 @@ void blkconf_geometry(BlockConf *conf, int *trans,
                       unsigned cyls_max, unsigned heads_max, unsigned secs_max,
                       Error **errp);
 void blkconf_blocksizes(BlockConf *conf);
-void blkconf_apply_backend_options(BlockConf *conf);
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+                                   bool resizable, Error **errp);
 
 /* Hard disk geometry */
 
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index 25659b93be..a172a6068a 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -264,7 +264,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                               int must_swab, uint64_t *pentry,
                               uint64_t *lowaddr, uint64_t *highaddr,
                               int elf_machine, int clear_lsb, int data_swab,
-                              AddressSpace *as)
+                              AddressSpace *as, bool load_rom)
 {
     struct elfhdr ehdr;
     struct elf_phdr *phdr = NULL, *ph;
@@ -403,10 +403,15 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                 *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr;
             }
 
-            snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
+            if (load_rom) {
+                snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
 
-            /* rom_add_elf_program() seize the ownership of 'data' */
-            rom_add_elf_program(label, data, file_size, mem_size, addr, as);
+                /* rom_add_elf_program() seize the ownership of 'data' */
+                rom_add_elf_program(label, data, file_size, mem_size, addr, as);
+            } else {
+                cpu_physical_memory_write(addr, data, file_size);
+                g_free(data);
+            }
 
             total_size += mem_size;
             if (addr < low)
diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h
new file mode 100644
index 0000000000..9f8e0c720c
--- /dev/null
+++ b/include/hw/gpio/bcm2835_gpio.h
@@ -0,0 +1,39 @@
+/*
+ * Raspberry Pi (BCM2835) GPIO Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_GPIO_H
+#define BCM2835_GPIO_H
+
+#include "hw/sd/sd.h"
+
+typedef struct BCM2835GpioState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+
+    /* SDBus selector */
+    SDBus sdbus;
+    SDBus *sdbus_sdhci;
+    SDBus *sdbus_sdhost;
+
+    uint8_t fsel[54];
+    uint32_t lev0, lev1;
+    uint8_t sd_fsel;
+    qemu_irq out[54];
+} BCM2835GpioState;
+
+#define TYPE_BCM2835_GPIO "bcm2835_gpio"
+#define BCM2835_GPIO(obj) \
+    OBJECT_CHECK(BCM2835GpioState, (obj), TYPE_BCM2835_GPIO)
+
+#endif
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index 4156051d98..bccdfe17c6 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -172,6 +172,7 @@ struct GICv3CPUState {
     uint8_t gicr_ipriorityr[GIC_INTERNAL];
 
     /* CPU interface */
+    uint64_t icc_sre_el1;
     uint64_t icc_ctlr_el1[2];
     uint64_t icc_pmr_el1;
     uint64_t icc_bpr[3];
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 40c4153e58..490c9ff8e6 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -65,7 +65,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
 #define ELF_LOAD_WRONG_ENDIAN -4
 const char *load_elf_strerror(int error);
 
-/** load_elf_as:
+/** load_elf_ram:
  * @filename: Path of ELF file
  * @translate_fn: optional function to translate load addresses
  * @translate_opaque: opaque data passed to @translate_fn
@@ -81,6 +81,7 @@ const char *load_elf_strerror(int error);
  *             words and 3 for within doublewords.
  * @as: The AddressSpace to load the ELF to. The value of address_space_memory
  *      is used if nothing is supplied here.
+ * @load_rom : Load ELF binary as ROM
  *
  * Load an ELF file's contents to the emulated system's address space.
  * Clients may optionally specify a callback to perform address
@@ -93,6 +94,16 @@ const char *load_elf_strerror(int error);
  * If @elf_machine is EM_NONE then the machine type will be read from the
  * ELF header and no checks will be carried out against the machine type.
  */
+int load_elf_ram(const char *filename,
+                 uint64_t (*translate_fn)(void *, uint64_t),
+                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                 uint64_t *highaddr, int big_endian, int elf_machine,
+                 int clear_lsb, int data_swab, AddressSpace *as,
+                 bool load_rom);
+
+/** load_elf_as:
+ * Same as load_elf_ram(), but always loads the elf as ROM
+ */
 int load_elf_as(const char *filename,
                 uint64_t (*translate_fn)(void *, uint64_t),
                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
diff --git a/include/hw/misc/bcm2835_rng.h b/include/hw/misc/bcm2835_rng.h
new file mode 100644
index 0000000000..41a531bce7
--- /dev/null
+++ b/include/hw/misc/bcm2835_rng.h
@@ -0,0 +1,27 @@
+/*
+ * BCM2835 Random Number Generator emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_RNG_H
+#define BCM2835_RNG_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_BCM2835_RNG "bcm2835-rng"
+#define BCM2835_RNG(obj) \
+        OBJECT_CHECK(BCM2835RngState, (obj), TYPE_BCM2835_RNG)
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    uint32_t rng_ctrl;
+    uint32_t rng_status;
+} BCM2835RngState;
+
+#endif
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 092294ed5a..dfa76143f3 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -106,7 +106,7 @@ static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    return xics_get_qirq(spapr->xics, phb->lsi_table[pin].irq);
+    return xics_get_qirq(XICS_FABRIC(spapr), phb->lsi_table[pin].irq);
 }
 
 PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6983f13745..9349acbfb2 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -429,6 +429,10 @@ int pci_bus_numa_node(PCIBus *bus);
 void pci_for_each_device(PCIBus *bus, int bus_num,
                          void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
                          void *opaque);
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+                                 void (*fn)(PCIBus *bus, PCIDevice *d,
+                                            void *opaque),
+                                 void *opaque);
 void pci_for_each_bus_depth_first(PCIBus *bus,
                                   void *(*begin)(PCIBus *bus, void *parent_state),
                                   void (*end)(PCIBus *bus, void *state),
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index d77ca60a0e..d22ad8dd3b 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -13,41 +13,84 @@
 
 /* Device classes and subclasses */
 
-#define PCI_BASE_CLASS_STORAGE           0x01
-#define PCI_BASE_CLASS_NETWORK           0x02
+#define PCI_CLASS_NOT_DEFINED            0x0000
+#define PCI_CLASS_NOT_DEFINED_VGA        0x0001
 
+#define PCI_BASE_CLASS_STORAGE           0x01
 #define PCI_CLASS_STORAGE_SCSI           0x0100
 #define PCI_CLASS_STORAGE_IDE            0x0101
+#define PCI_CLASS_STORAGE_FLOPPY         0x0102
+#define PCI_CLASS_STORAGE_IPI            0x0103
 #define PCI_CLASS_STORAGE_RAID           0x0104
+#define PCI_CLASS_STORAGE_ATA            0x0105
 #define PCI_CLASS_STORAGE_SATA           0x0106
+#define PCI_CLASS_STORAGE_SAS            0x0107
 #define PCI_CLASS_STORAGE_EXPRESS        0x0108
 #define PCI_CLASS_STORAGE_OTHER          0x0180
 
+#define PCI_BASE_CLASS_NETWORK           0x02
 #define PCI_CLASS_NETWORK_ETHERNET       0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING     0x0201
+#define PCI_CLASS_NETWORK_FDDI           0x0202
+#define PCI_CLASS_NETWORK_ATM            0x0203
+#define PCI_CLASS_NETWORK_ISDN           0x0204
+#define PCI_CLASS_NETWORK_WORLDFIP       0x0205
+#define PCI_CLASS_NETWORK_PICMG214       0x0206
 #define PCI_CLASS_NETWORK_OTHER          0x0280
 
+#define PCI_BASE_CLASS_DISPLAY           0x03
 #define PCI_CLASS_DISPLAY_VGA            0x0300
+#define PCI_CLASS_DISPLAY_XGA            0x0301
+#define PCI_CLASS_DISPLAY_3D             0x0302
 #define PCI_CLASS_DISPLAY_OTHER          0x0380
 
+#define PCI_BASE_CLASS_MULTIMEDIA        0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO       0x0400
 #define PCI_CLASS_MULTIMEDIA_AUDIO       0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE       0x0402
+#define PCI_CLASS_MULTIMEDIA_OTHER       0x0480
 
+#define PCI_BASE_CLASS_MEMORY            0x05
 #define PCI_CLASS_MEMORY_RAM             0x0500
+#define PCI_CLASS_MEMORY_FLASH           0x0501
+#define PCI_CLASS_MEMORY_OTHER           0x0580
 
-#define PCI_CLASS_SYSTEM_SDHCI           0x0805
-#define PCI_CLASS_SYSTEM_OTHER           0x0880
-
-#define PCI_CLASS_SERIAL_USB             0x0c03
-#define PCI_CLASS_SERIAL_SMBUS           0x0c05
-
+#define PCI_BASE_CLASS_BRIDGE            0x06
 #define PCI_CLASS_BRIDGE_HOST            0x0600
 #define PCI_CLASS_BRIDGE_ISA             0x0601
+#define PCI_CLASS_BRIDGE_EISA            0x0602
+#define PCI_CLASS_BRIDGE_MC              0x0603
 #define PCI_CLASS_BRIDGE_PCI             0x0604
 #define PCI_CLASS_BRIDGE_PCI_INF_SUB     0x01
+#define PCI_CLASS_BRIDGE_PCMCIA          0x0605
+#define PCI_CLASS_BRIDGE_NUBUS           0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS         0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY         0x0608
+#define PCI_CLASS_BRIDGE_PCI_SEMITP      0x0609
+#define PCI_CLASS_BRIDGE_IB_PCI          0x060a
 #define PCI_CLASS_BRIDGE_OTHER           0x0680
 
+#define PCI_BASE_CLASS_COMMUNICATION     0x07
 #define PCI_CLASS_COMMUNICATION_SERIAL   0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM    0x0703
+#define PCI_CLASS_COMMUNICATION_GPIB     0x0704
+#define PCI_CLASS_COMMUNICATION_SC       0x0705
 #define PCI_CLASS_COMMUNICATION_OTHER    0x0780
 
+#define PCI_BASE_CLASS_SYSTEM            0x08
+#define PCI_CLASS_SYSTEM_PIC             0x0800
+#define PCI_CLASS_SYSTEM_PIC_IOAPIC      0x080010
+#define PCI_CLASS_SYSTEM_PIC_IOXAPIC     0x080020
+#define PCI_CLASS_SYSTEM_DMA             0x0801
+#define PCI_CLASS_SYSTEM_TIMER           0x0802
+#define PCI_CLASS_SYSTEM_RTC             0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG     0x0804
+#define PCI_CLASS_SYSTEM_SDHCI           0x0805
+#define PCI_CLASS_SYSTEM_OTHER           0x0880
+
+#define PCI_BASE_CLASS_INPUT             0x09
 #define PCI_CLASS_INPUT_KEYBOARD         0x0900
 #define PCI_CLASS_INPUT_PEN              0x0901
 #define PCI_CLASS_INPUT_MOUSE            0x0902
@@ -55,8 +98,59 @@
 #define PCI_CLASS_INPUT_GAMEPORT         0x0904
 #define PCI_CLASS_INPUT_OTHER            0x0980
 
-#define PCI_CLASS_PROCESSOR_CO           0x0b40
+#define PCI_BASE_CLASS_DOCKING           0x0a
+#define PCI_CLASS_DOCKING_GENERIC        0x0a00
+#define PCI_CLASS_DOCKING_OTHER          0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR         0x0b
+#define PCI_CLASS_PROCESSOR_PENTIUM      0x0b02
 #define PCI_CLASS_PROCESSOR_POWERPC      0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS         0x0b30
+#define PCI_CLASS_PROCESSOR_CO           0x0b40
+
+#define PCI_BASE_CLASS_SERIAL            0x0c
+#define PCI_CLASS_SERIAL_FIREWIRE        0x0c00
+#define PCI_CLASS_SERIAL_ACCESS          0x0c01
+#define PCI_CLASS_SERIAL_SSA             0x0c02
+#define PCI_CLASS_SERIAL_USB             0x0c03
+#define PCI_CLASS_SERIAL_USB_UHCI        0x0c0300
+#define PCI_CLASS_SERIAL_USB_OHCI        0x0c0310
+#define PCI_CLASS_SERIAL_USB_EHCI        0x0c0320
+#define PCI_CLASS_SERIAL_USB_XHCI        0x0c0330
+#define PCI_CLASS_SERIAL_USB_UNKNOWN     0x0c0380
+#define PCI_CLASS_SERIAL_USB_DEVICE      0x0c03fe
+#define PCI_CLASS_SERIAL_FIBER           0x0c04
+#define PCI_CLASS_SERIAL_SMBUS           0x0c05
+#define PCI_CLASS_SERIAL_IB              0x0c06
+#define PCI_CLASS_SERIAL_IPMI            0x0c07
+#define PCI_CLASS_SERIAL_SERCOS          0x0c08
+#define PCI_CLASS_SERIAL_CANBUS          0x0c09
+
+#define PCI_BASE_CLASS_WIRELESS          0x0d
+#define PCI_CLASS_WIRELESS_IRDA          0x0d00
+#define PCI_CLASS_WIRELESS_CIR           0x0d01
+#define PCI_CLASS_WIRELESS_RF_CONTROLLER 0x0d10
+#define PCI_CLASS_WIRELESS_BLUETOOTH     0x0d11
+#define PCI_CLASS_WIRELESS_BROADBAND     0x0d12
+#define PCI_CLASS_WIRELESS_OTHER         0x0d80
+
+#define PCI_BASE_CLASS_SATELLITE         0x0f
+#define PCI_CLASS_SATELLITE_TV           0x0f00
+#define PCI_CLASS_SATELLITE_AUDIO        0x0f01
+#define PCI_CLASS_SATELLITE_VOICE        0x0f03
+#define PCI_CLASS_SATELLITE_DATA         0x0f04
+
+#define PCI_BASE_CLASS_CRYPT             0x10
+#define PCI_CLASS_CRYPT_NETWORK          0x1000
+#define PCI_CLASS_CRYPT_ENTERTAINMENT    0x1001
+#define PCI_CLASS_CRYPT_OTHER            0x1080
+
+#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11
+#define PCI_CLASS_SP_DPIO                0x1100
+#define PCI_CLASS_SP_PERF                0x1101
+#define PCI_CLASS_SP_SYNCH               0x1110
+#define PCI_CLASS_SP_MANAGEMENT          0x1120
+#define PCI_CLASS_SP_OTHER               0x1180
 
 #define PCI_CLASS_OTHERS                 0xff
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index f9b17d860a..cfd271129d 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -58,7 +58,7 @@ struct sPAPRMachineState {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
     struct sPAPRNVRAM *nvram;
-    XICSState *xics;
+    ICSState *ics;
     DeviceState *rtc;
 
     void *htab;
@@ -94,6 +94,9 @@ struct sPAPRMachineState {
     /*< public >*/
     char *kvm_type;
     MemoryHotplugState hotplug_memory;
+
+    uint32_t nr_servers;
+    ICPState *icps;
 };
 
 #define H_SUCCESS         0
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index fc6f673ea0..2e9685a5d9 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -87,7 +87,7 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    return xics_get_qirq(spapr->xics, dev->irq);
+    return xics_get_qirq(XICS_FABRIC(spapr), dev->irq);
 }
 
 static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 3f0c31610a..1945913bf1 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -30,29 +30,6 @@
 
 #include "hw/sysbus.h"
 
-#define TYPE_XICS_COMMON "xics-common"
-#define XICS_COMMON(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_COMMON)
-
-/*
- * Retain xics as the type name to be compatible for migration. Rest all the
- * functions, class and variables are renamed as xics_spapr.
- */
-#define TYPE_XICS_SPAPR "xics"
-#define XICS_SPAPR(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_SPAPR)
-
-#define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm"
-#define XICS_SPAPR_KVM(obj) \
-     OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM)
-
-#define XICS_COMMON_CLASS(klass) \
-     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON)
-#define XICS_SPAPR_CLASS(klass) \
-     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_SPAPR)
-#define XICS_COMMON_GET_CLASS(obj) \
-     OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON)
-#define XICS_SPAPR_GET_CLASS(obj) \
-     OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR)
-
 #define XICS_IPI        0x2
 #define XICS_BUID       0x1
 #define XICS_IRQ_BASE   (XICS_BUID << 12)
@@ -62,31 +39,12 @@
  * (the kernel implementation supports more but we don't exploit
  *  that yet)
  */
-typedef struct XICSStateClass XICSStateClass;
-typedef struct XICSState XICSState;
 typedef struct ICPStateClass ICPStateClass;
 typedef struct ICPState ICPState;
 typedef struct ICSStateClass ICSStateClass;
 typedef struct ICSState ICSState;
 typedef struct ICSIRQState ICSIRQState;
-
-struct XICSStateClass {
-    DeviceClass parent_class;
-
-    void (*cpu_setup)(XICSState *icp, PowerPCCPU *cpu);
-    void (*set_nr_irqs)(XICSState *icp, uint32_t nr_irqs, Error **errp);
-    void (*set_nr_servers)(XICSState *icp, uint32_t nr_servers, Error **errp);
-};
-
-struct XICSState {
-    /*< private >*/
-    SysBusDevice parent_obj;
-    /*< public >*/
-    uint32_t nr_servers;
-    uint32_t nr_irqs;
-    ICPState *ss;
-    QLIST_HEAD(, ICSState) ics;
-};
+typedef struct XICSFabric XICSFabric;
 
 #define TYPE_ICP "icp"
 #define ICP(obj) OBJECT_CHECK(ICPState, (obj), TYPE_ICP)
@@ -104,6 +62,7 @@ struct ICPStateClass {
 
     void (*pre_save)(ICPState *s);
     int (*post_load)(ICPState *s, int version_id);
+    void (*cpu_setup)(ICPState *icp, PowerPCCPU *cpu);
 };
 
 struct ICPState {
@@ -118,7 +77,7 @@ struct ICPState {
     qemu_irq output;
     bool cap_irq_xics_enabled;
 
-    XICSState *xics;
+    XICSFabric *xics;
 };
 
 #define TYPE_ICS_BASE "ics-base"
@@ -139,6 +98,7 @@ struct ICPState {
 struct ICSStateClass {
     DeviceClass parent_class;
 
+    void (*realize)(DeviceState *dev, Error **errp);
     void (*pre_save)(ICSState *s);
     int (*post_load)(ICSState *s, int version_id);
     void (*reject)(ICSState *s, uint32_t irq);
@@ -154,8 +114,7 @@ struct ICSState {
     uint32_t offset;
     qemu_irq *qirqs;
     ICSIRQState *irqs;
-    XICSState *xics;
-    QLIST_ENTRY(ICSState) list;
+    XICSFabric *xics;
 };
 
 static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
@@ -180,19 +139,37 @@ struct ICSIRQState {
     uint8_t flags;
 };
 
+typedef struct XICSFabric {
+    Object parent;
+} XICSFabric;
+
+#define TYPE_XICS_FABRIC "xics-fabric"
+#define XICS_FABRIC(obj)                                     \
+    OBJECT_CHECK(XICSFabric, (obj), TYPE_XICS_FABRIC)
+#define XICS_FABRIC_CLASS(klass)                                     \
+    OBJECT_CLASS_CHECK(XICSFabricClass, (klass), TYPE_XICS_FABRIC)
+#define XICS_FABRIC_GET_CLASS(obj)                                   \
+    OBJECT_GET_CLASS(XICSFabricClass, (obj), TYPE_XICS_FABRIC)
+
+typedef struct XICSFabricClass {
+    InterfaceClass parent;
+    ICSState *(*ics_get)(XICSFabric *xi, int irq);
+    void (*ics_resend)(XICSFabric *xi);
+    ICPState *(*icp_get)(XICSFabric *xi, int server);
+} XICSFabricClass;
+
 #define XICS_IRQS_SPAPR               1024
 
-qemu_irq xics_get_qirq(XICSState *icp, int irq);
-int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi, Error **errp);
-int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align,
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp);
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, bool align,
                            Error **errp);
-void xics_spapr_free(XICSState *icp, int irq, int num);
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle);
+void spapr_ics_free(ICSState *ics, int irq, int num);
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle);
 
-void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
-void xics_cpu_destroy(XICSState *icp, PowerPCCPU *cpu);
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                         const char *typename, Error **errp);
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq);
+ICPState *xics_icp_get(XICSFabric *xi, int server);
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu);
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu);
 
 /* Internal XICS interfaces */
 int xics_get_cpu_index_by_dt_id(int cpu_dt_id);
@@ -207,7 +184,15 @@ void ics_simple_write_xive(ICSState *ics, int nr, int server,
                            uint8_t priority, uint8_t saved_priority);
 
 void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
+void icp_pic_print_info(ICPState *icp, Monitor *mon);
+void ics_pic_print_info(ICSState *ics, Monitor *mon);
+
+void ics_resend(ICSState *ics);
+void icp_resend(ICPState *ss);
+
+typedef struct sPAPRMachineState sPAPRMachineState;
 
-ICSState *xics_find_source(XICSState *icp, int irq);
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp);
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp);
 
 #endif /* XICS_H */
diff --git a/include/hw/ptimer.h b/include/hw/ptimer.h
index 48cccbdb51..eafc3f0a86 100644
--- a/include/hw/ptimer.h
+++ b/include/hw/ptimer.h
@@ -60,6 +60,7 @@ typedef struct ptimer_state ptimer_state;
 typedef void (*ptimer_cb)(void *opaque);
 
 ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask);
+void ptimer_free(ptimer_state *s);
 void ptimer_set_period(ptimer_state *s, int64_t period);
 void ptimer_set_freq(ptimer_state *s, uint32_t freq);
 uint64_t ptimer_get_limit(ptimer_state *s);
diff --git a/include/hw/sd/bcm2835_sdhost.h b/include/hw/sd/bcm2835_sdhost.h
new file mode 100644
index 0000000000..7520dd6507
--- /dev/null
+++ b/include/hw/sd/bcm2835_sdhost.h
@@ -0,0 +1,48 @@
+/*
+ * Raspberry Pi (BCM2835) SD Host Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_SDHOST_H
+#define BCM2835_SDHOST_H
+
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+
+#define TYPE_BCM2835_SDHOST "bcm2835-sdhost"
+#define BCM2835_SDHOST(obj) \
+        OBJECT_CHECK(BCM2835SDHostState, (obj), TYPE_BCM2835_SDHOST)
+
+#define BCM2835_SDHOST_FIFO_LEN 16
+
+typedef struct {
+    SysBusDevice busdev;
+    SDBus sdbus;
+    MemoryRegion iomem;
+
+    uint32_t cmd;
+    uint32_t cmdarg;
+    uint32_t status;
+    uint32_t rsp[4];
+    uint32_t config;
+    uint32_t edm;
+    uint32_t vdd;
+    uint32_t hbct;
+    uint32_t hblc;
+    int32_t fifo_pos;
+    int32_t fifo_len;
+    uint32_t fifo[BCM2835_SDHOST_FIFO_LEN];
+    uint32_t datacnt;
+
+    qemu_irq irq;
+} BCM2835SDHostState;
+
+#endif
diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 79909b2478..96caefe373 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -140,6 +140,17 @@ uint8_t sdbus_read_data(SDBus *sd);
 bool sdbus_data_ready(SDBus *sd);
 bool sdbus_get_inserted(SDBus *sd);
 bool sdbus_get_readonly(SDBus *sd);
+/**
+ * sdbus_reparent_card: Reparent an SD card from one controller to another
+ * @from: controller bus to remove card from
+ * @to: controller bus to move card to
+ *
+ * Reparent an SD card, effectively unplugging it from one controller
+ * and inserting it into another. This is useful for SoCs like the
+ * bcm2835 which have two SD controllers and connect a single SD card
+ * to them, selected by the guest reprogramming GPIO line routing.
+ */
+void sdbus_reparent_card(SDBus *from, SDBus *to);
 
 /* Functions to be used by SD devices to report back to qdevified controllers */
 void sdbus_set_inserted(SDBus *sd, bool inserted);
diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h
new file mode 100644
index 0000000000..cca04defd8
--- /dev/null
+++ b/include/hw/timer/armv7m_systick.h
@@ -0,0 +1,34 @@
+/*
+ * ARMv7M SysTick timer
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell
+ *
+ * This code is licensed under the GPL (version 2 or later).
+ */
+
+#ifndef HW_TIMER_ARMV7M_SYSTICK_H
+#define HW_TIMER_ARMV7M_SYSTICK_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_SYSTICK "armv7m_systick"
+
+#define SYSTICK(obj) OBJECT_CHECK(SysTickState, (obj), TYPE_SYSTICK)
+
+typedef struct SysTickState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t control;
+    uint32_t reload;
+    int64_t tick;
+    QEMUTimer *timer;
+    MemoryRegion iomem;
+    qemu_irq irq;
+} SysTickState;
+
+#endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1735d66512..5720c884f4 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -22,6 +22,7 @@
 #include "qapi-types.h"
 #include "exec/cpu-common.h"
 #include "qemu/coroutine_int.h"
+#include "qom/object.h"
 
 #define QEMU_VM_FILE_MAGIC           0x5145564d
 #define QEMU_VM_FILE_VERSION_COMPAT  0x00000002
@@ -92,6 +93,7 @@ struct MigrationIncomingState {
      */
     QemuEvent main_thread_load_event;
 
+    size_t         largest_page_size;
     bool           have_fault_thread;
     QemuThread     fault_thread;
     QemuSemaphore  fault_thread_sem;
@@ -107,6 +109,7 @@ struct MigrationIncomingState {
     QEMUFile *to_src_file;
     QemuMutex rp_mutex;    /* We send replies from multiple threads */
     void     *postcopy_tmp_page;
+    void     *postcopy_tmp_zero_page;
 
     QEMUBH *bh;
 
@@ -313,6 +316,8 @@ int migrate_add_blocker(Error *reason, Error **errp);
  */
 void migrate_del_blocker(Error *reason);
 
+int check_migratable(Object *obj, Error **err);
+
 bool migrate_release_ram(void);
 bool migrate_postcopy_ram(void);
 bool migrate_zero_blocks(void);
@@ -375,6 +380,7 @@ void global_state_store_running(void);
 void flush_page_queue(MigrationState *ms);
 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
                          ram_addr_t start, ram_addr_t len);
+uint64_t ram_pagesize_summary(void);
 
 PostcopyState postcopy_state_get(void);
 /* Set the state and return the old state */
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index b6a7491f2d..8e036b95a2 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -35,13 +35,6 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
 
 /*
- * Discard the contents of 'length' bytes from 'start'
- * We can assume that if we've been called postcopy_ram_hosttest returned true
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
-                               size_t length);
-
-/*
  * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
  * however leaving it until after precopy means that most of the precopy
  * data is still THPd
@@ -81,13 +74,15 @@ void postcopy_discard_send_finish(MigrationState *ms,
  *    to use other postcopy_ routines to allocate.
  * returns 0 on success
  */
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from);
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        size_t pagesize);
 
 /*
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host);
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+                             size_t pagesize);
 
 /*
  * Allocate a page of memory that can be mapped at a later point in time
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 63e7b02e05..f2dbf8410a 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -253,6 +253,10 @@ extern const VMStateInfo vmstate_info_uint16;
 extern const VMStateInfo vmstate_info_uint32;
 extern const VMStateInfo vmstate_info_uint64;
 
+/** Put this in the stream when migrating a null pointer.*/
+#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
+extern const VMStateInfo vmstate_info_nullptr;
+
 extern const VMStateInfo vmstate_info_float64;
 extern const VMStateInfo vmstate_info_cpudouble;
 
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 1430390eb6..d218821c14 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -19,7 +19,7 @@
 #include "qemu/option.h"
 
 /* Copyright string for -version arguments, About dialogs, etc */
-#define QEMU_COPYRIGHT "Copyright (c) 2003-2016 " \
+#define QEMU_COPYRIGHT "Copyright (c) 2003-2017 " \
     "Fabrice Bellard and the QEMU Project developers"
 
 /* main function, renamed */
diff --git a/include/qemu-io.h b/include/qemu-io.h
index 4d402b9b01..196fde0f3a 100644
--- a/include/qemu-io.h
+++ b/include/qemu-io.h
@@ -36,6 +36,7 @@ typedef struct cmdinfo {
     const char  *args;
     const char  *oneline;
     helpfunc_t  help;
+    uint64_t    perm;
 } cmdinfo_t;
 
 extern bool qemuio_misalign;
diff --git a/include/qemu/throttle-options.h b/include/qemu/throttle-options.h
new file mode 100644
index 0000000000..3133d1ca40
--- /dev/null
+++ b/include/qemu/throttle-options.h
@@ -0,0 +1,92 @@
+/*
+ * QEMU throttling command line options
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+#ifndef THROTTLE_OPTIONS_H
+#define THROTTLE_OPTIONS_H
+
+#define THROTTLE_OPTS \
+          { \
+            .name = "throttling.iops-total",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit total I/O operations per second",\
+        },{ \
+            .name = "throttling.iops-read",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit read operations per second",\
+        },{ \
+            .name = "throttling.iops-write",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit write operations per second",\
+        },{ \
+            .name = "throttling.bps-total",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit total bytes per second",\
+        },{ \
+            .name = "throttling.bps-read",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit read bytes per second",\
+        },{ \
+            .name = "throttling.bps-write",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit write bytes per second",\
+        },{ \
+            .name = "throttling.iops-total-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "I/O operations burst",\
+        },{ \
+            .name = "throttling.iops-read-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "I/O operations read burst",\
+        },{ \
+            .name = "throttling.iops-write-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "I/O operations write burst",\
+        },{ \
+            .name = "throttling.bps-total-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "total bytes burst",\
+        },{ \
+            .name = "throttling.bps-read-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "total bytes read burst",\
+        },{ \
+            .name = "throttling.bps-write-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "total bytes write burst",\
+        },{ \
+            .name = "throttling.iops-total-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the iops-total-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.iops-read-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the iops-read-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.iops-write-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the iops-write-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.bps-total-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the bps-total-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.bps-read-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the bps-read-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.bps-write-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the bps-write-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.iops-size",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "when limiting by iops max size of an I/O in bytes",\
+        }
+
+#endif
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9abed51ae8..26e628584c 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -610,7 +610,10 @@ void timer_deinit(QEMUTimer *ts);
  *
  * Free a timer (it must not be on the active list)
  */
-void timer_free(QEMUTimer *ts);
+static inline void timer_free(QEMUTimer *ts)
+{
+    g_free(ts);
+}
 
 /**
  * timer_del:
diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h
index 47b38fb816..eca9a2ca22 100644
--- a/include/standard-headers/asm-x86/hyperv.h
+++ b/include/standard-headers/asm-x86/hyperv.h
@@ -73,6 +73,9 @@
   */
 #define HV_X64_MSR_STAT_PAGES_AVAILABLE		(1 << 8)
 
+/* Crash MSR available */
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
+
 /*
  * Feature identification: EBX indicates which flags were specified at
  * partition creation. The format is the same as the partition creation
@@ -144,6 +147,11 @@
  */
 #define HV_X64_RELAXED_TIMING_RECOMMENDED	(1 << 5)
 
+/*
+ * Crash notification flag.
+ */
+#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
+
 /* MSR used to identify the guest OS. */
 #define HV_X64_MSR_GUEST_OS_ID			0x40000000
 
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 5c10f7e25d..c8b3338375 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -640,7 +640,7 @@
  * Control a data application associated with the currently viewed channel,
  * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.)
  */
-#define KEY_DATA			0x275
+#define KEY_DATA			0x277
 
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index e5a2e68b22..634c9c44ed 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -23,6 +23,14 @@
 #define LINUX_PCI_REGS_H
 
 /*
+ * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of
+ * configuration space.  PCI-X Mode 2 and PCIe devices have 4096 bytes of
+ * configuration space.
+ */
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
+
+/*
  * Under PCI, each device has 256 bytes of configuration address space,
  * of which the first 64 bytes are standardized as follows:
  */
@@ -674,6 +682,7 @@
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
+#define PCI_EXT_CAP_ID_L1SS	0x1E	/* L1 PM Substates */
 #define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
 #define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PTM
 
@@ -965,6 +974,7 @@
 #define PCI_EXP_DPC_STATUS		8	/* DPC Status */
 #define  PCI_EXP_DPC_STATUS_TRIGGER	0x01	/* Trigger Status */
 #define  PCI_EXP_DPC_STATUS_INTERRUPT	0x08	/* Interrupt Status */
+#define  PCI_EXP_DPC_RP_BUSY		0x10	/* Root Port Busy */
 
 #define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
 
@@ -977,4 +987,19 @@
 #define  PCI_PTM_CTRL_ENABLE		0x00000001  /* PTM enable */
 #define  PCI_PTM_CTRL_ROOT		0x00000002  /* Root select */
 
+/* L1 PM Substates */
+#define PCI_L1SS_CAP		    4	/* capability register */
+#define  PCI_L1SS_CAP_PCIPM_L1_2	 1	/* PCI PM L1.2 Support */
+#define  PCI_L1SS_CAP_PCIPM_L1_1	 2	/* PCI PM L1.1 Support */
+#define  PCI_L1SS_CAP_ASPM_L1_2		 4	/* ASPM L1.2 Support */
+#define  PCI_L1SS_CAP_ASPM_L1_1		 8	/* ASPM L1.1 Support */
+#define  PCI_L1SS_CAP_L1_PM_SS		16	/* L1 PM Substates Support */
+#define PCI_L1SS_CTL1		    8	/* Control Register 1 */
+#define  PCI_L1SS_CTL1_PCIPM_L1_2	1	/* PCI PM L1.2 Enable */
+#define  PCI_L1SS_CTL1_PCIPM_L1_1	2	/* PCI PM L1.1 Support */
+#define  PCI_L1SS_CTL1_ASPM_L1_2	4	/* ASPM L1.2 Support */
+#define  PCI_L1SS_CTL1_ASPM_L1_1	8	/* ASPM L1.1 Support */
+#define  PCI_L1SS_CTL1_L1SS_MASK	0x0000000F
+#define PCI_L1SS_CTL2		    0xC	/* Control Register 2 */
+
 #endif /* LINUX_PCI_REGS_H */
diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
index fe74e422d4..6d5c3b2d4f 100644
--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
@@ -43,4 +43,5 @@
 #define VIRTIO_ID_INPUT        18 /* virtio input */
 #define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
 #define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
+
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index f365a51acf..096c17fce0 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -34,7 +34,7 @@ typedef struct BlockDevOps {
      * changes.  Sure would be useful if it did.
      * Device models with removable media must implement this callback.
      */
-    void (*change_media_cb)(void *opaque, bool load);
+    void (*change_media_cb)(void *opaque, bool load, Error **errp);
     /*
      * Runs when an eject request is issued from the monitor, the tray
      * is closed, and the medium is locked.
@@ -84,7 +84,7 @@ typedef struct BlockBackendPublic {
     QLIST_ENTRY(BlockBackendPublic) round_robin;
 } BlockBackendPublic;
 
-BlockBackend *blk_new(void);
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm);
 BlockBackend *blk_new_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp);
 int blk_get_refcnt(BlockBackend *blk);
@@ -102,9 +102,12 @@ BlockBackend *blk_by_public(BlockBackendPublic *public);
 
 BlockDriverState *blk_bs(BlockBackend *blk);
 void blk_remove_bs(BlockBackend *blk);
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
 bool bdrv_has_blk(BlockDriverState *bs);
 bool bdrv_is_root_node(BlockDriverState *bs);
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                 Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
 
 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
 void blk_iostatus_enable(BlockBackend *blk);
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 7aad20b07f..f1c0712795 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -152,6 +152,13 @@ void replay_unregister_net(ReplayNetState *rns);
 void replay_net_packet_event(ReplayNetState *rns, unsigned flags,
                              const struct iovec *iov, int iovcnt);
 
+/* Audio */
+
+/*! Saves/restores number of played samples of audio out operation. */
+void replay_audio_out(int *played);
+/*! Saves/restores recorded samples of audio in operation. */
+void replay_audio_in(int *recorded, void *samples, int *wpos, int size);
+
 /* VM state operations */
 
 /*! Called at the start of execution.
diff --git a/include/ui/console.h b/include/ui/console.h
index af6350e96f..ac2895ca93 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -215,10 +215,14 @@ typedef struct DisplayChangeListenerOps {
                                    QEMUGLContext ctx);
     QEMUGLContext (*dpy_gl_ctx_get_current)(DisplayChangeListener *dcl);
 
-    void (*dpy_gl_scanout)(DisplayChangeListener *dcl,
-                           uint32_t backing_id, bool backing_y_0_top,
-                           uint32_t backing_width, uint32_t backing_height,
-                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+    void (*dpy_gl_scanout_disable)(DisplayChangeListener *dcl);
+    void (*dpy_gl_scanout_texture)(DisplayChangeListener *dcl,
+                                   uint32_t backing_id,
+                                   bool backing_y_0_top,
+                                   uint32_t backing_width,
+                                   uint32_t backing_height,
+                                   uint32_t x, uint32_t y,
+                                   uint32_t w, uint32_t h);
     void (*dpy_gl_update)(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
@@ -284,10 +288,11 @@ bool dpy_cursor_define_supported(QemuConsole *con);
 bool dpy_gfx_check_format(QemuConsole *con,
                           pixman_format_code_t format);
 
-void dpy_gl_scanout(QemuConsole *con,
-                    uint32_t backing_id, bool backing_y_0_top,
-                    uint32_t backing_width, uint32_t backing_height,
-                    uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+void dpy_gl_scanout_disable(QemuConsole *con);
+void dpy_gl_scanout_texture(QemuConsole *con,
+                            uint32_t backing_id, bool backing_y_0_top,
+                            uint32_t backing_width, uint32_t backing_height,
+                            uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void dpy_gl_update(QemuConsole *con,
                    uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 47ffddb5b4..ca9a2268de 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -103,11 +103,14 @@ void gd_egl_switch(DisplayChangeListener *dcl,
                    DisplaySurface *surface);
 QEMUGLContext gd_egl_create_context(DisplayChangeListener *dcl,
                                     QEMUGLParams *params);
-void gd_egl_scanout(DisplayChangeListener *dcl,
-                    uint32_t backing_id, bool backing_y_0_top,
-                    uint32_t backing_width, uint32_t backing_height,
-                    uint32_t x, uint32_t y,
-                    uint32_t w, uint32_t h);
+void gd_egl_scanout_disable(DisplayChangeListener *dcl);
+void gd_egl_scanout_texture(DisplayChangeListener *dcl,
+                            uint32_t backing_id,
+                            bool backing_y_0_top,
+                            uint32_t backing_width,
+                            uint32_t backing_height,
+                            uint32_t x, uint32_t y,
+                            uint32_t w, uint32_t h);
 void gd_egl_scanout_flush(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_egl_init(void);
@@ -126,11 +129,13 @@ QEMUGLContext gd_gl_area_create_context(DisplayChangeListener *dcl,
                                         QEMUGLParams *params);
 void gd_gl_area_destroy_context(DisplayChangeListener *dcl,
                                 QEMUGLContext ctx);
-void gd_gl_area_scanout(DisplayChangeListener *dcl,
-                        uint32_t backing_id, bool backing_y_0_top,
-                        uint32_t backing_width, uint32_t backing_height,
-                        uint32_t x, uint32_t y,
-                        uint32_t w, uint32_t h);
+void gd_gl_area_scanout_texture(DisplayChangeListener *dcl,
+                                uint32_t backing_id,
+                                bool backing_y_0_top,
+                                uint32_t backing_width,
+                                uint32_t backing_height,
+                                uint32_t x, uint32_t y,
+                                uint32_t w, uint32_t h);
 void gd_gl_area_scanout_flush(DisplayChangeListener *dcl,
                               uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_gl_area_init(void);
diff --git a/include/ui/sdl2.h b/include/ui/sdl2.h
index 683bb6af2e..aaf226c2c0 100644
--- a/include/ui/sdl2.h
+++ b/include/ui/sdl2.h
@@ -62,11 +62,14 @@ int sdl2_gl_make_context_current(DisplayChangeListener *dcl,
                                  QEMUGLContext ctx);
 QEMUGLContext sdl2_gl_get_current_context(DisplayChangeListener *dcl);
 
-void sdl2_gl_scanout(DisplayChangeListener *dcl,
-                     uint32_t backing_id, bool backing_y_0_top,
-                     uint32_t backing_width, uint32_t backing_height,
-                     uint32_t x, uint32_t y,
-                     uint32_t w, uint32_t h);
+void sdl2_gl_scanout_disable(DisplayChangeListener *dcl);
+void sdl2_gl_scanout_texture(DisplayChangeListener *dcl,
+                             uint32_t backing_id,
+                             bool backing_y_0_top,
+                             uint32_t backing_width,
+                             uint32_t backing_height,
+                             uint32_t x, uint32_t y,
+                             uint32_t w, uint32_t h);
 void sdl2_gl_scanout_flush(DisplayChangeListener *dcl,
                            uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 2fb7859465..1101d55d2f 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -87,9 +87,11 @@ struct kvm_regs {
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+#define KVM_VGIC_ITS_ADDR_TYPE		4
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE		(2 * SZ_64K)
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
@@ -179,10 +181,23 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+			(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL       4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL	0
+
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
 
 /* KVM_IRQ_LINE irq field index values */
diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h
new file mode 100644
index 0000000000..13a74afd02
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-common.h
@@ -0,0 +1,357 @@
+#ifndef _ASM_ARM_UNISTD_COMMON_H
+#define _ASM_ARM_UNISTD_COMMON_H 1
+
+#define __NR_restart_syscall (__NR_SYSCALL_BASE + 0)
+#define __NR_exit (__NR_SYSCALL_BASE + 1)
+#define __NR_fork (__NR_SYSCALL_BASE + 2)
+#define __NR_read (__NR_SYSCALL_BASE + 3)
+#define __NR_write (__NR_SYSCALL_BASE + 4)
+#define __NR_open (__NR_SYSCALL_BASE + 5)
+#define __NR_close (__NR_SYSCALL_BASE + 6)
+#define __NR_creat (__NR_SYSCALL_BASE + 8)
+#define __NR_link (__NR_SYSCALL_BASE + 9)
+#define __NR_unlink (__NR_SYSCALL_BASE + 10)
+#define __NR_execve (__NR_SYSCALL_BASE + 11)
+#define __NR_chdir (__NR_SYSCALL_BASE + 12)
+#define __NR_mknod (__NR_SYSCALL_BASE + 14)
+#define __NR_chmod (__NR_SYSCALL_BASE + 15)
+#define __NR_lchown (__NR_SYSCALL_BASE + 16)
+#define __NR_lseek (__NR_SYSCALL_BASE + 19)
+#define __NR_getpid (__NR_SYSCALL_BASE + 20)
+#define __NR_mount (__NR_SYSCALL_BASE + 21)
+#define __NR_setuid (__NR_SYSCALL_BASE + 23)
+#define __NR_getuid (__NR_SYSCALL_BASE + 24)
+#define __NR_ptrace (__NR_SYSCALL_BASE + 26)
+#define __NR_pause (__NR_SYSCALL_BASE + 29)
+#define __NR_access (__NR_SYSCALL_BASE + 33)
+#define __NR_nice (__NR_SYSCALL_BASE + 34)
+#define __NR_sync (__NR_SYSCALL_BASE + 36)
+#define __NR_kill (__NR_SYSCALL_BASE + 37)
+#define __NR_rename (__NR_SYSCALL_BASE + 38)
+#define __NR_mkdir (__NR_SYSCALL_BASE + 39)
+#define __NR_rmdir (__NR_SYSCALL_BASE + 40)
+#define __NR_dup (__NR_SYSCALL_BASE + 41)
+#define __NR_pipe (__NR_SYSCALL_BASE + 42)
+#define __NR_times (__NR_SYSCALL_BASE + 43)
+#define __NR_brk (__NR_SYSCALL_BASE + 45)
+#define __NR_setgid (__NR_SYSCALL_BASE + 46)
+#define __NR_getgid (__NR_SYSCALL_BASE + 47)
+#define __NR_geteuid (__NR_SYSCALL_BASE + 49)
+#define __NR_getegid (__NR_SYSCALL_BASE + 50)
+#define __NR_acct (__NR_SYSCALL_BASE + 51)
+#define __NR_umount2 (__NR_SYSCALL_BASE + 52)
+#define __NR_ioctl (__NR_SYSCALL_BASE + 54)
+#define __NR_fcntl (__NR_SYSCALL_BASE + 55)
+#define __NR_setpgid (__NR_SYSCALL_BASE + 57)
+#define __NR_umask (__NR_SYSCALL_BASE + 60)
+#define __NR_chroot (__NR_SYSCALL_BASE + 61)
+#define __NR_ustat (__NR_SYSCALL_BASE + 62)
+#define __NR_dup2 (__NR_SYSCALL_BASE + 63)
+#define __NR_getppid (__NR_SYSCALL_BASE + 64)
+#define __NR_getpgrp (__NR_SYSCALL_BASE + 65)
+#define __NR_setsid (__NR_SYSCALL_BASE + 66)
+#define __NR_sigaction (__NR_SYSCALL_BASE + 67)
+#define __NR_setreuid (__NR_SYSCALL_BASE + 70)
+#define __NR_setregid (__NR_SYSCALL_BASE + 71)
+#define __NR_sigsuspend (__NR_SYSCALL_BASE + 72)
+#define __NR_sigpending (__NR_SYSCALL_BASE + 73)
+#define __NR_sethostname (__NR_SYSCALL_BASE + 74)
+#define __NR_setrlimit (__NR_SYSCALL_BASE + 75)
+#define __NR_getrusage (__NR_SYSCALL_BASE + 77)
+#define __NR_gettimeofday (__NR_SYSCALL_BASE + 78)
+#define __NR_settimeofday (__NR_SYSCALL_BASE + 79)
+#define __NR_getgroups (__NR_SYSCALL_BASE + 80)
+#define __NR_setgroups (__NR_SYSCALL_BASE + 81)
+#define __NR_symlink (__NR_SYSCALL_BASE + 83)
+#define __NR_readlink (__NR_SYSCALL_BASE + 85)
+#define __NR_uselib (__NR_SYSCALL_BASE + 86)
+#define __NR_swapon (__NR_SYSCALL_BASE + 87)
+#define __NR_reboot (__NR_SYSCALL_BASE + 88)
+#define __NR_munmap (__NR_SYSCALL_BASE + 91)
+#define __NR_truncate (__NR_SYSCALL_BASE + 92)
+#define __NR_ftruncate (__NR_SYSCALL_BASE + 93)
+#define __NR_fchmod (__NR_SYSCALL_BASE + 94)
+#define __NR_fchown (__NR_SYSCALL_BASE + 95)
+#define __NR_getpriority (__NR_SYSCALL_BASE + 96)
+#define __NR_setpriority (__NR_SYSCALL_BASE + 97)
+#define __NR_statfs (__NR_SYSCALL_BASE + 99)
+#define __NR_fstatfs (__NR_SYSCALL_BASE + 100)
+#define __NR_syslog (__NR_SYSCALL_BASE + 103)
+#define __NR_setitimer (__NR_SYSCALL_BASE + 104)
+#define __NR_getitimer (__NR_SYSCALL_BASE + 105)
+#define __NR_stat (__NR_SYSCALL_BASE + 106)
+#define __NR_lstat (__NR_SYSCALL_BASE + 107)
+#define __NR_fstat (__NR_SYSCALL_BASE + 108)
+#define __NR_vhangup (__NR_SYSCALL_BASE + 111)
+#define __NR_wait4 (__NR_SYSCALL_BASE + 114)
+#define __NR_swapoff (__NR_SYSCALL_BASE + 115)
+#define __NR_sysinfo (__NR_SYSCALL_BASE + 116)
+#define __NR_fsync (__NR_SYSCALL_BASE + 118)
+#define __NR_sigreturn (__NR_SYSCALL_BASE + 119)
+#define __NR_clone (__NR_SYSCALL_BASE + 120)
+#define __NR_setdomainname (__NR_SYSCALL_BASE + 121)
+#define __NR_uname (__NR_SYSCALL_BASE + 122)
+#define __NR_adjtimex (__NR_SYSCALL_BASE + 124)
+#define __NR_mprotect (__NR_SYSCALL_BASE + 125)
+#define __NR_sigprocmask (__NR_SYSCALL_BASE + 126)
+#define __NR_init_module (__NR_SYSCALL_BASE + 128)
+#define __NR_delete_module (__NR_SYSCALL_BASE + 129)
+#define __NR_quotactl (__NR_SYSCALL_BASE + 131)
+#define __NR_getpgid (__NR_SYSCALL_BASE + 132)
+#define __NR_fchdir (__NR_SYSCALL_BASE + 133)
+#define __NR_bdflush (__NR_SYSCALL_BASE + 134)
+#define __NR_sysfs (__NR_SYSCALL_BASE + 135)
+#define __NR_personality (__NR_SYSCALL_BASE + 136)
+#define __NR_setfsuid (__NR_SYSCALL_BASE + 138)
+#define __NR_setfsgid (__NR_SYSCALL_BASE + 139)
+#define __NR__llseek (__NR_SYSCALL_BASE + 140)
+#define __NR_getdents (__NR_SYSCALL_BASE + 141)
+#define __NR__newselect (__NR_SYSCALL_BASE + 142)
+#define __NR_flock (__NR_SYSCALL_BASE + 143)
+#define __NR_msync (__NR_SYSCALL_BASE + 144)
+#define __NR_readv (__NR_SYSCALL_BASE + 145)
+#define __NR_writev (__NR_SYSCALL_BASE + 146)
+#define __NR_getsid (__NR_SYSCALL_BASE + 147)
+#define __NR_fdatasync (__NR_SYSCALL_BASE + 148)
+#define __NR__sysctl (__NR_SYSCALL_BASE + 149)
+#define __NR_mlock (__NR_SYSCALL_BASE + 150)
+#define __NR_munlock (__NR_SYSCALL_BASE + 151)
+#define __NR_mlockall (__NR_SYSCALL_BASE + 152)
+#define __NR_munlockall (__NR_SYSCALL_BASE + 153)
+#define __NR_sched_setparam (__NR_SYSCALL_BASE + 154)
+#define __NR_sched_getparam (__NR_SYSCALL_BASE + 155)
+#define __NR_sched_setscheduler (__NR_SYSCALL_BASE + 156)
+#define __NR_sched_getscheduler (__NR_SYSCALL_BASE + 157)
+#define __NR_sched_yield (__NR_SYSCALL_BASE + 158)
+#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE + 159)
+#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE + 160)
+#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE + 161)
+#define __NR_nanosleep (__NR_SYSCALL_BASE + 162)
+#define __NR_mremap (__NR_SYSCALL_BASE + 163)
+#define __NR_setresuid (__NR_SYSCALL_BASE + 164)
+#define __NR_getresuid (__NR_SYSCALL_BASE + 165)
+#define __NR_poll (__NR_SYSCALL_BASE + 168)
+#define __NR_nfsservctl (__NR_SYSCALL_BASE + 169)
+#define __NR_setresgid (__NR_SYSCALL_BASE + 170)
+#define __NR_getresgid (__NR_SYSCALL_BASE + 171)
+#define __NR_prctl (__NR_SYSCALL_BASE + 172)
+#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173)
+#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175)
+#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176)
+#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE + 177)
+#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE + 178)
+#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179)
+#define __NR_pread64 (__NR_SYSCALL_BASE + 180)
+#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181)
+#define __NR_chown (__NR_SYSCALL_BASE + 182)
+#define __NR_getcwd (__NR_SYSCALL_BASE + 183)
+#define __NR_capget (__NR_SYSCALL_BASE + 184)
+#define __NR_capset (__NR_SYSCALL_BASE + 185)
+#define __NR_sigaltstack (__NR_SYSCALL_BASE + 186)
+#define __NR_sendfile (__NR_SYSCALL_BASE + 187)
+#define __NR_vfork (__NR_SYSCALL_BASE + 190)
+#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191)
+#define __NR_mmap2 (__NR_SYSCALL_BASE + 192)
+#define __NR_truncate64 (__NR_SYSCALL_BASE + 193)
+#define __NR_ftruncate64 (__NR_SYSCALL_BASE + 194)
+#define __NR_stat64 (__NR_SYSCALL_BASE + 195)
+#define __NR_lstat64 (__NR_SYSCALL_BASE + 196)
+#define __NR_fstat64 (__NR_SYSCALL_BASE + 197)
+#define __NR_lchown32 (__NR_SYSCALL_BASE + 198)
+#define __NR_getuid32 (__NR_SYSCALL_BASE + 199)
+#define __NR_getgid32 (__NR_SYSCALL_BASE + 200)
+#define __NR_geteuid32 (__NR_SYSCALL_BASE + 201)
+#define __NR_getegid32 (__NR_SYSCALL_BASE + 202)
+#define __NR_setreuid32 (__NR_SYSCALL_BASE + 203)
+#define __NR_setregid32 (__NR_SYSCALL_BASE + 204)
+#define __NR_getgroups32 (__NR_SYSCALL_BASE + 205)
+#define __NR_setgroups32 (__NR_SYSCALL_BASE + 206)
+#define __NR_fchown32 (__NR_SYSCALL_BASE + 207)
+#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208)
+#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209)
+#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210)
+#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211)
+#define __NR_chown32 (__NR_SYSCALL_BASE + 212)
+#define __NR_setuid32 (__NR_SYSCALL_BASE + 213)
+#define __NR_setgid32 (__NR_SYSCALL_BASE + 214)
+#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215)
+#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216)
+#define __NR_getdents64 (__NR_SYSCALL_BASE + 217)
+#define __NR_pivot_root (__NR_SYSCALL_BASE + 218)
+#define __NR_mincore (__NR_SYSCALL_BASE + 219)
+#define __NR_madvise (__NR_SYSCALL_BASE + 220)
+#define __NR_fcntl64 (__NR_SYSCALL_BASE + 221)
+#define __NR_gettid (__NR_SYSCALL_BASE + 224)
+#define __NR_readahead (__NR_SYSCALL_BASE + 225)
+#define __NR_setxattr (__NR_SYSCALL_BASE + 226)
+#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227)
+#define __NR_fsetxattr (__NR_SYSCALL_BASE + 228)
+#define __NR_getxattr (__NR_SYSCALL_BASE + 229)
+#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230)
+#define __NR_fgetxattr (__NR_SYSCALL_BASE + 231)
+#define __NR_listxattr (__NR_SYSCALL_BASE + 232)
+#define __NR_llistxattr (__NR_SYSCALL_BASE + 233)
+#define __NR_flistxattr (__NR_SYSCALL_BASE + 234)
+#define __NR_removexattr (__NR_SYSCALL_BASE + 235)
+#define __NR_lremovexattr (__NR_SYSCALL_BASE + 236)
+#define __NR_fremovexattr (__NR_SYSCALL_BASE + 237)
+#define __NR_tkill (__NR_SYSCALL_BASE + 238)
+#define __NR_sendfile64 (__NR_SYSCALL_BASE + 239)
+#define __NR_futex (__NR_SYSCALL_BASE + 240)
+#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242)
+#define __NR_io_setup (__NR_SYSCALL_BASE + 243)
+#define __NR_io_destroy (__NR_SYSCALL_BASE + 244)
+#define __NR_io_getevents (__NR_SYSCALL_BASE + 245)
+#define __NR_io_submit (__NR_SYSCALL_BASE + 246)
+#define __NR_io_cancel (__NR_SYSCALL_BASE + 247)
+#define __NR_exit_group (__NR_SYSCALL_BASE + 248)
+#define __NR_lookup_dcookie (__NR_SYSCALL_BASE + 249)
+#define __NR_epoll_create (__NR_SYSCALL_BASE + 250)
+#define __NR_epoll_ctl (__NR_SYSCALL_BASE + 251)
+#define __NR_epoll_wait (__NR_SYSCALL_BASE + 252)
+#define __NR_remap_file_pages (__NR_SYSCALL_BASE + 253)
+#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256)
+#define __NR_timer_create (__NR_SYSCALL_BASE + 257)
+#define __NR_timer_settime (__NR_SYSCALL_BASE + 258)
+#define __NR_timer_gettime (__NR_SYSCALL_BASE + 259)
+#define __NR_timer_getoverrun (__NR_SYSCALL_BASE + 260)
+#define __NR_timer_delete (__NR_SYSCALL_BASE + 261)
+#define __NR_clock_settime (__NR_SYSCALL_BASE + 262)
+#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263)
+#define __NR_clock_getres (__NR_SYSCALL_BASE + 264)
+#define __NR_clock_nanosleep (__NR_SYSCALL_BASE + 265)
+#define __NR_statfs64 (__NR_SYSCALL_BASE + 266)
+#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267)
+#define __NR_tgkill (__NR_SYSCALL_BASE + 268)
+#define __NR_utimes (__NR_SYSCALL_BASE + 269)
+#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE + 270)
+#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE + 271)
+#define __NR_pciconfig_read (__NR_SYSCALL_BASE + 272)
+#define __NR_pciconfig_write (__NR_SYSCALL_BASE + 273)
+#define __NR_mq_open (__NR_SYSCALL_BASE + 274)
+#define __NR_mq_unlink (__NR_SYSCALL_BASE + 275)
+#define __NR_mq_timedsend (__NR_SYSCALL_BASE + 276)
+#define __NR_mq_timedreceive (__NR_SYSCALL_BASE + 277)
+#define __NR_mq_notify (__NR_SYSCALL_BASE + 278)
+#define __NR_mq_getsetattr (__NR_SYSCALL_BASE + 279)
+#define __NR_waitid (__NR_SYSCALL_BASE + 280)
+#define __NR_socket (__NR_SYSCALL_BASE + 281)
+#define __NR_bind (__NR_SYSCALL_BASE + 282)
+#define __NR_connect (__NR_SYSCALL_BASE + 283)
+#define __NR_listen (__NR_SYSCALL_BASE + 284)
+#define __NR_accept (__NR_SYSCALL_BASE + 285)
+#define __NR_getsockname (__NR_SYSCALL_BASE + 286)
+#define __NR_getpeername (__NR_SYSCALL_BASE + 287)
+#define __NR_socketpair (__NR_SYSCALL_BASE + 288)
+#define __NR_send (__NR_SYSCALL_BASE + 289)
+#define __NR_sendto (__NR_SYSCALL_BASE + 290)
+#define __NR_recv (__NR_SYSCALL_BASE + 291)
+#define __NR_recvfrom (__NR_SYSCALL_BASE + 292)
+#define __NR_shutdown (__NR_SYSCALL_BASE + 293)
+#define __NR_setsockopt (__NR_SYSCALL_BASE + 294)
+#define __NR_getsockopt (__NR_SYSCALL_BASE + 295)
+#define __NR_sendmsg (__NR_SYSCALL_BASE + 296)
+#define __NR_recvmsg (__NR_SYSCALL_BASE + 297)
+#define __NR_semop (__NR_SYSCALL_BASE + 298)
+#define __NR_semget (__NR_SYSCALL_BASE + 299)
+#define __NR_semctl (__NR_SYSCALL_BASE + 300)
+#define __NR_msgsnd (__NR_SYSCALL_BASE + 301)
+#define __NR_msgrcv (__NR_SYSCALL_BASE + 302)
+#define __NR_msgget (__NR_SYSCALL_BASE + 303)
+#define __NR_msgctl (__NR_SYSCALL_BASE + 304)
+#define __NR_shmat (__NR_SYSCALL_BASE + 305)
+#define __NR_shmdt (__NR_SYSCALL_BASE + 306)
+#define __NR_shmget (__NR_SYSCALL_BASE + 307)
+#define __NR_shmctl (__NR_SYSCALL_BASE + 308)
+#define __NR_add_key (__NR_SYSCALL_BASE + 309)
+#define __NR_request_key (__NR_SYSCALL_BASE + 310)
+#define __NR_keyctl (__NR_SYSCALL_BASE + 311)
+#define __NR_semtimedop (__NR_SYSCALL_BASE + 312)
+#define __NR_vserver (__NR_SYSCALL_BASE + 313)
+#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314)
+#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315)
+#define __NR_inotify_init (__NR_SYSCALL_BASE + 316)
+#define __NR_inotify_add_watch (__NR_SYSCALL_BASE + 317)
+#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE + 318)
+#define __NR_mbind (__NR_SYSCALL_BASE + 319)
+#define __NR_get_mempolicy (__NR_SYSCALL_BASE + 320)
+#define __NR_set_mempolicy (__NR_SYSCALL_BASE + 321)
+#define __NR_openat (__NR_SYSCALL_BASE + 322)
+#define __NR_mkdirat (__NR_SYSCALL_BASE + 323)
+#define __NR_mknodat (__NR_SYSCALL_BASE + 324)
+#define __NR_fchownat (__NR_SYSCALL_BASE + 325)
+#define __NR_futimesat (__NR_SYSCALL_BASE + 326)
+#define __NR_fstatat64 (__NR_SYSCALL_BASE + 327)
+#define __NR_unlinkat (__NR_SYSCALL_BASE + 328)
+#define __NR_renameat (__NR_SYSCALL_BASE + 329)
+#define __NR_linkat (__NR_SYSCALL_BASE + 330)
+#define __NR_symlinkat (__NR_SYSCALL_BASE + 331)
+#define __NR_readlinkat (__NR_SYSCALL_BASE + 332)
+#define __NR_fchmodat (__NR_SYSCALL_BASE + 333)
+#define __NR_faccessat (__NR_SYSCALL_BASE + 334)
+#define __NR_pselect6 (__NR_SYSCALL_BASE + 335)
+#define __NR_ppoll (__NR_SYSCALL_BASE + 336)
+#define __NR_unshare (__NR_SYSCALL_BASE + 337)
+#define __NR_set_robust_list (__NR_SYSCALL_BASE + 338)
+#define __NR_get_robust_list (__NR_SYSCALL_BASE + 339)
+#define __NR_splice (__NR_SYSCALL_BASE + 340)
+#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE + 341)
+#define __NR_tee (__NR_SYSCALL_BASE + 342)
+#define __NR_vmsplice (__NR_SYSCALL_BASE + 343)
+#define __NR_move_pages (__NR_SYSCALL_BASE + 344)
+#define __NR_getcpu (__NR_SYSCALL_BASE + 345)
+#define __NR_epoll_pwait (__NR_SYSCALL_BASE + 346)
+#define __NR_kexec_load (__NR_SYSCALL_BASE + 347)
+#define __NR_utimensat (__NR_SYSCALL_BASE + 348)
+#define __NR_signalfd (__NR_SYSCALL_BASE + 349)
+#define __NR_timerfd_create (__NR_SYSCALL_BASE + 350)
+#define __NR_eventfd (__NR_SYSCALL_BASE + 351)
+#define __NR_fallocate (__NR_SYSCALL_BASE + 352)
+#define __NR_timerfd_settime (__NR_SYSCALL_BASE + 353)
+#define __NR_timerfd_gettime (__NR_SYSCALL_BASE + 354)
+#define __NR_signalfd4 (__NR_SYSCALL_BASE + 355)
+#define __NR_eventfd2 (__NR_SYSCALL_BASE + 356)
+#define __NR_epoll_create1 (__NR_SYSCALL_BASE + 357)
+#define __NR_dup3 (__NR_SYSCALL_BASE + 358)
+#define __NR_pipe2 (__NR_SYSCALL_BASE + 359)
+#define __NR_inotify_init1 (__NR_SYSCALL_BASE + 360)
+#define __NR_preadv (__NR_SYSCALL_BASE + 361)
+#define __NR_pwritev (__NR_SYSCALL_BASE + 362)
+#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE + 363)
+#define __NR_perf_event_open (__NR_SYSCALL_BASE + 364)
+#define __NR_recvmmsg (__NR_SYSCALL_BASE + 365)
+#define __NR_accept4 (__NR_SYSCALL_BASE + 366)
+#define __NR_fanotify_init (__NR_SYSCALL_BASE + 367)
+#define __NR_fanotify_mark (__NR_SYSCALL_BASE + 368)
+#define __NR_prlimit64 (__NR_SYSCALL_BASE + 369)
+#define __NR_name_to_handle_at (__NR_SYSCALL_BASE + 370)
+#define __NR_open_by_handle_at (__NR_SYSCALL_BASE + 371)
+#define __NR_clock_adjtime (__NR_SYSCALL_BASE + 372)
+#define __NR_syncfs (__NR_SYSCALL_BASE + 373)
+#define __NR_sendmmsg (__NR_SYSCALL_BASE + 374)
+#define __NR_setns (__NR_SYSCALL_BASE + 375)
+#define __NR_process_vm_readv (__NR_SYSCALL_BASE + 376)
+#define __NR_process_vm_writev (__NR_SYSCALL_BASE + 377)
+#define __NR_kcmp (__NR_SYSCALL_BASE + 378)
+#define __NR_finit_module (__NR_SYSCALL_BASE + 379)
+#define __NR_sched_setattr (__NR_SYSCALL_BASE + 380)
+#define __NR_sched_getattr (__NR_SYSCALL_BASE + 381)
+#define __NR_renameat2 (__NR_SYSCALL_BASE + 382)
+#define __NR_seccomp (__NR_SYSCALL_BASE + 383)
+#define __NR_getrandom (__NR_SYSCALL_BASE + 384)
+#define __NR_memfd_create (__NR_SYSCALL_BASE + 385)
+#define __NR_bpf (__NR_SYSCALL_BASE + 386)
+#define __NR_execveat (__NR_SYSCALL_BASE + 387)
+#define __NR_userfaultfd (__NR_SYSCALL_BASE + 388)
+#define __NR_membarrier (__NR_SYSCALL_BASE + 389)
+#define __NR_mlock2 (__NR_SYSCALL_BASE + 390)
+#define __NR_copy_file_range (__NR_SYSCALL_BASE + 391)
+#define __NR_preadv2 (__NR_SYSCALL_BASE + 392)
+#define __NR_pwritev2 (__NR_SYSCALL_BASE + 393)
+#define __NR_pkey_mprotect (__NR_SYSCALL_BASE + 394)
+#define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395)
+#define __NR_pkey_free (__NR_SYSCALL_BASE + 396)
+
+#endif /* _ASM_ARM_UNISTD_COMMON_H */
diff --git a/linux-headers/asm-arm/unistd-eabi.h b/linux-headers/asm-arm/unistd-eabi.h
new file mode 100644
index 0000000000..266f1fcdfb
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-eabi.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_ARM_UNISTD_EABI_H
+#define _ASM_ARM_UNISTD_EABI_H 1
+
+
+#endif /* _ASM_ARM_UNISTD_EABI_H */
diff --git a/linux-headers/asm-arm/unistd-oabi.h b/linux-headers/asm-arm/unistd-oabi.h
new file mode 100644
index 0000000000..47d9afb96d
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-oabi.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_ARM_UNISTD_OABI_H
+#define _ASM_ARM_UNISTD_OABI_H 1
+
+#define __NR_time (__NR_SYSCALL_BASE + 13)
+#define __NR_umount (__NR_SYSCALL_BASE + 22)
+#define __NR_stime (__NR_SYSCALL_BASE + 25)
+#define __NR_alarm (__NR_SYSCALL_BASE + 27)
+#define __NR_utime (__NR_SYSCALL_BASE + 30)
+#define __NR_getrlimit (__NR_SYSCALL_BASE + 76)
+#define __NR_select (__NR_SYSCALL_BASE + 82)
+#define __NR_readdir (__NR_SYSCALL_BASE + 89)
+#define __NR_mmap (__NR_SYSCALL_BASE + 90)
+#define __NR_socketcall (__NR_SYSCALL_BASE + 102)
+#define __NR_syscall (__NR_SYSCALL_BASE + 113)
+#define __NR_ipc (__NR_SYSCALL_BASE + 117)
+
+#endif /* _ASM_ARM_UNISTD_OABI_H */
diff --git a/linux-headers/asm-arm/unistd.h b/linux-headers/asm-arm/unistd.h
index ceb5450c81..155571b874 100644
--- a/linux-headers/asm-arm/unistd.h
+++ b/linux-headers/asm-arm/unistd.h
@@ -17,409 +17,14 @@
 
 #if defined(__thumb__) || defined(__ARM_EABI__)
 #define __NR_SYSCALL_BASE	0
+#include <asm/unistd-eabi.h>
 #else
 #define __NR_SYSCALL_BASE	__NR_OABI_SYSCALL_BASE
+#include <asm/unistd-oabi.h>
 #endif
 
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_restart_syscall		(__NR_SYSCALL_BASE+  0)
-#define __NR_exit			(__NR_SYSCALL_BASE+  1)
-#define __NR_fork			(__NR_SYSCALL_BASE+  2)
-#define __NR_read			(__NR_SYSCALL_BASE+  3)
-#define __NR_write			(__NR_SYSCALL_BASE+  4)
-#define __NR_open			(__NR_SYSCALL_BASE+  5)
-#define __NR_close			(__NR_SYSCALL_BASE+  6)
-					/* 7 was sys_waitpid */
-#define __NR_creat			(__NR_SYSCALL_BASE+  8)
-#define __NR_link			(__NR_SYSCALL_BASE+  9)
-#define __NR_unlink			(__NR_SYSCALL_BASE+ 10)
-#define __NR_execve			(__NR_SYSCALL_BASE+ 11)
-#define __NR_chdir			(__NR_SYSCALL_BASE+ 12)
-#define __NR_time			(__NR_SYSCALL_BASE+ 13)
-#define __NR_mknod			(__NR_SYSCALL_BASE+ 14)
-#define __NR_chmod			(__NR_SYSCALL_BASE+ 15)
-#define __NR_lchown			(__NR_SYSCALL_BASE+ 16)
-					/* 17 was sys_break */
-					/* 18 was sys_stat */
-#define __NR_lseek			(__NR_SYSCALL_BASE+ 19)
-#define __NR_getpid			(__NR_SYSCALL_BASE+ 20)
-#define __NR_mount			(__NR_SYSCALL_BASE+ 21)
-#define __NR_umount			(__NR_SYSCALL_BASE+ 22)
-#define __NR_setuid			(__NR_SYSCALL_BASE+ 23)
-#define __NR_getuid			(__NR_SYSCALL_BASE+ 24)
-#define __NR_stime			(__NR_SYSCALL_BASE+ 25)
-#define __NR_ptrace			(__NR_SYSCALL_BASE+ 26)
-#define __NR_alarm			(__NR_SYSCALL_BASE+ 27)
-					/* 28 was sys_fstat */
-#define __NR_pause			(__NR_SYSCALL_BASE+ 29)
-#define __NR_utime			(__NR_SYSCALL_BASE+ 30)
-					/* 31 was sys_stty */
-					/* 32 was sys_gtty */
-#define __NR_access			(__NR_SYSCALL_BASE+ 33)
-#define __NR_nice			(__NR_SYSCALL_BASE+ 34)
-					/* 35 was sys_ftime */
-#define __NR_sync			(__NR_SYSCALL_BASE+ 36)
-#define __NR_kill			(__NR_SYSCALL_BASE+ 37)
-#define __NR_rename			(__NR_SYSCALL_BASE+ 38)
-#define __NR_mkdir			(__NR_SYSCALL_BASE+ 39)
-#define __NR_rmdir			(__NR_SYSCALL_BASE+ 40)
-#define __NR_dup			(__NR_SYSCALL_BASE+ 41)
-#define __NR_pipe			(__NR_SYSCALL_BASE+ 42)
-#define __NR_times			(__NR_SYSCALL_BASE+ 43)
-					/* 44 was sys_prof */
-#define __NR_brk			(__NR_SYSCALL_BASE+ 45)
-#define __NR_setgid			(__NR_SYSCALL_BASE+ 46)
-#define __NR_getgid			(__NR_SYSCALL_BASE+ 47)
-					/* 48 was sys_signal */
-#define __NR_geteuid			(__NR_SYSCALL_BASE+ 49)
-#define __NR_getegid			(__NR_SYSCALL_BASE+ 50)
-#define __NR_acct			(__NR_SYSCALL_BASE+ 51)
-#define __NR_umount2			(__NR_SYSCALL_BASE+ 52)
-					/* 53 was sys_lock */
-#define __NR_ioctl			(__NR_SYSCALL_BASE+ 54)
-#define __NR_fcntl			(__NR_SYSCALL_BASE+ 55)
-					/* 56 was sys_mpx */
-#define __NR_setpgid			(__NR_SYSCALL_BASE+ 57)
-					/* 58 was sys_ulimit */
-					/* 59 was sys_olduname */
-#define __NR_umask			(__NR_SYSCALL_BASE+ 60)
-#define __NR_chroot			(__NR_SYSCALL_BASE+ 61)
-#define __NR_ustat			(__NR_SYSCALL_BASE+ 62)
-#define __NR_dup2			(__NR_SYSCALL_BASE+ 63)
-#define __NR_getppid			(__NR_SYSCALL_BASE+ 64)
-#define __NR_getpgrp			(__NR_SYSCALL_BASE+ 65)
-#define __NR_setsid			(__NR_SYSCALL_BASE+ 66)
-#define __NR_sigaction			(__NR_SYSCALL_BASE+ 67)
-					/* 68 was sys_sgetmask */
-					/* 69 was sys_ssetmask */
-#define __NR_setreuid			(__NR_SYSCALL_BASE+ 70)
-#define __NR_setregid			(__NR_SYSCALL_BASE+ 71)
-#define __NR_sigsuspend			(__NR_SYSCALL_BASE+ 72)
-#define __NR_sigpending			(__NR_SYSCALL_BASE+ 73)
-#define __NR_sethostname		(__NR_SYSCALL_BASE+ 74)
-#define __NR_setrlimit			(__NR_SYSCALL_BASE+ 75)
-#define __NR_getrlimit			(__NR_SYSCALL_BASE+ 76)	/* Back compat 2GB limited rlimit */
-#define __NR_getrusage			(__NR_SYSCALL_BASE+ 77)
-#define __NR_gettimeofday		(__NR_SYSCALL_BASE+ 78)
-#define __NR_settimeofday		(__NR_SYSCALL_BASE+ 79)
-#define __NR_getgroups			(__NR_SYSCALL_BASE+ 80)
-#define __NR_setgroups			(__NR_SYSCALL_BASE+ 81)
-#define __NR_select			(__NR_SYSCALL_BASE+ 82)
-#define __NR_symlink			(__NR_SYSCALL_BASE+ 83)
-					/* 84 was sys_lstat */
-#define __NR_readlink			(__NR_SYSCALL_BASE+ 85)
-#define __NR_uselib			(__NR_SYSCALL_BASE+ 86)
-#define __NR_swapon			(__NR_SYSCALL_BASE+ 87)
-#define __NR_reboot			(__NR_SYSCALL_BASE+ 88)
-#define __NR_readdir			(__NR_SYSCALL_BASE+ 89)
-#define __NR_mmap			(__NR_SYSCALL_BASE+ 90)
-#define __NR_munmap			(__NR_SYSCALL_BASE+ 91)
-#define __NR_truncate			(__NR_SYSCALL_BASE+ 92)
-#define __NR_ftruncate			(__NR_SYSCALL_BASE+ 93)
-#define __NR_fchmod			(__NR_SYSCALL_BASE+ 94)
-#define __NR_fchown			(__NR_SYSCALL_BASE+ 95)
-#define __NR_getpriority		(__NR_SYSCALL_BASE+ 96)
-#define __NR_setpriority		(__NR_SYSCALL_BASE+ 97)
-					/* 98 was sys_profil */
-#define __NR_statfs			(__NR_SYSCALL_BASE+ 99)
-#define __NR_fstatfs			(__NR_SYSCALL_BASE+100)
-					/* 101 was sys_ioperm */
-#define __NR_socketcall			(__NR_SYSCALL_BASE+102)
-#define __NR_syslog			(__NR_SYSCALL_BASE+103)
-#define __NR_setitimer			(__NR_SYSCALL_BASE+104)
-#define __NR_getitimer			(__NR_SYSCALL_BASE+105)
-#define __NR_stat			(__NR_SYSCALL_BASE+106)
-#define __NR_lstat			(__NR_SYSCALL_BASE+107)
-#define __NR_fstat			(__NR_SYSCALL_BASE+108)
-					/* 109 was sys_uname */
-					/* 110 was sys_iopl */
-#define __NR_vhangup			(__NR_SYSCALL_BASE+111)
-					/* 112 was sys_idle */
-#define __NR_syscall			(__NR_SYSCALL_BASE+113) /* syscall to call a syscall! */
-#define __NR_wait4			(__NR_SYSCALL_BASE+114)
-#define __NR_swapoff			(__NR_SYSCALL_BASE+115)
-#define __NR_sysinfo			(__NR_SYSCALL_BASE+116)
-#define __NR_ipc			(__NR_SYSCALL_BASE+117)
-#define __NR_fsync			(__NR_SYSCALL_BASE+118)
-#define __NR_sigreturn			(__NR_SYSCALL_BASE+119)
-#define __NR_clone			(__NR_SYSCALL_BASE+120)
-#define __NR_setdomainname		(__NR_SYSCALL_BASE+121)
-#define __NR_uname			(__NR_SYSCALL_BASE+122)
-					/* 123 was sys_modify_ldt */
-#define __NR_adjtimex			(__NR_SYSCALL_BASE+124)
-#define __NR_mprotect			(__NR_SYSCALL_BASE+125)
-#define __NR_sigprocmask		(__NR_SYSCALL_BASE+126)
-					/* 127 was sys_create_module */
-#define __NR_init_module		(__NR_SYSCALL_BASE+128)
-#define __NR_delete_module		(__NR_SYSCALL_BASE+129)
-					/* 130 was sys_get_kernel_syms */
-#define __NR_quotactl			(__NR_SYSCALL_BASE+131)
-#define __NR_getpgid			(__NR_SYSCALL_BASE+132)
-#define __NR_fchdir			(__NR_SYSCALL_BASE+133)
-#define __NR_bdflush			(__NR_SYSCALL_BASE+134)
-#define __NR_sysfs			(__NR_SYSCALL_BASE+135)
-#define __NR_personality		(__NR_SYSCALL_BASE+136)
-					/* 137 was sys_afs_syscall */
-#define __NR_setfsuid			(__NR_SYSCALL_BASE+138)
-#define __NR_setfsgid			(__NR_SYSCALL_BASE+139)
-#define __NR__llseek			(__NR_SYSCALL_BASE+140)
-#define __NR_getdents			(__NR_SYSCALL_BASE+141)
-#define __NR__newselect			(__NR_SYSCALL_BASE+142)
-#define __NR_flock			(__NR_SYSCALL_BASE+143)
-#define __NR_msync			(__NR_SYSCALL_BASE+144)
-#define __NR_readv			(__NR_SYSCALL_BASE+145)
-#define __NR_writev			(__NR_SYSCALL_BASE+146)
-#define __NR_getsid			(__NR_SYSCALL_BASE+147)
-#define __NR_fdatasync			(__NR_SYSCALL_BASE+148)
-#define __NR__sysctl			(__NR_SYSCALL_BASE+149)
-#define __NR_mlock			(__NR_SYSCALL_BASE+150)
-#define __NR_munlock			(__NR_SYSCALL_BASE+151)
-#define __NR_mlockall			(__NR_SYSCALL_BASE+152)
-#define __NR_munlockall			(__NR_SYSCALL_BASE+153)
-#define __NR_sched_setparam		(__NR_SYSCALL_BASE+154)
-#define __NR_sched_getparam		(__NR_SYSCALL_BASE+155)
-#define __NR_sched_setscheduler		(__NR_SYSCALL_BASE+156)
-#define __NR_sched_getscheduler		(__NR_SYSCALL_BASE+157)
-#define __NR_sched_yield		(__NR_SYSCALL_BASE+158)
-#define __NR_sched_get_priority_max	(__NR_SYSCALL_BASE+159)
-#define __NR_sched_get_priority_min	(__NR_SYSCALL_BASE+160)
-#define __NR_sched_rr_get_interval	(__NR_SYSCALL_BASE+161)
-#define __NR_nanosleep			(__NR_SYSCALL_BASE+162)
-#define __NR_mremap			(__NR_SYSCALL_BASE+163)
-#define __NR_setresuid			(__NR_SYSCALL_BASE+164)
-#define __NR_getresuid			(__NR_SYSCALL_BASE+165)
-					/* 166 was sys_vm86 */
-					/* 167 was sys_query_module */
-#define __NR_poll			(__NR_SYSCALL_BASE+168)
-#define __NR_nfsservctl			(__NR_SYSCALL_BASE+169)
-#define __NR_setresgid			(__NR_SYSCALL_BASE+170)
-#define __NR_getresgid			(__NR_SYSCALL_BASE+171)
-#define __NR_prctl			(__NR_SYSCALL_BASE+172)
-#define __NR_rt_sigreturn		(__NR_SYSCALL_BASE+173)
-#define __NR_rt_sigaction		(__NR_SYSCALL_BASE+174)
-#define __NR_rt_sigprocmask		(__NR_SYSCALL_BASE+175)
-#define __NR_rt_sigpending		(__NR_SYSCALL_BASE+176)
-#define __NR_rt_sigtimedwait		(__NR_SYSCALL_BASE+177)
-#define __NR_rt_sigqueueinfo		(__NR_SYSCALL_BASE+178)
-#define __NR_rt_sigsuspend		(__NR_SYSCALL_BASE+179)
-#define __NR_pread64			(__NR_SYSCALL_BASE+180)
-#define __NR_pwrite64			(__NR_SYSCALL_BASE+181)
-#define __NR_chown			(__NR_SYSCALL_BASE+182)
-#define __NR_getcwd			(__NR_SYSCALL_BASE+183)
-#define __NR_capget			(__NR_SYSCALL_BASE+184)
-#define __NR_capset			(__NR_SYSCALL_BASE+185)
-#define __NR_sigaltstack		(__NR_SYSCALL_BASE+186)
-#define __NR_sendfile			(__NR_SYSCALL_BASE+187)
-					/* 188 reserved */
-					/* 189 reserved */
-#define __NR_vfork			(__NR_SYSCALL_BASE+190)
-#define __NR_ugetrlimit			(__NR_SYSCALL_BASE+191)	/* SuS compliant getrlimit */
-#define __NR_mmap2			(__NR_SYSCALL_BASE+192)
-#define __NR_truncate64			(__NR_SYSCALL_BASE+193)
-#define __NR_ftruncate64		(__NR_SYSCALL_BASE+194)
-#define __NR_stat64			(__NR_SYSCALL_BASE+195)
-#define __NR_lstat64			(__NR_SYSCALL_BASE+196)
-#define __NR_fstat64			(__NR_SYSCALL_BASE+197)
-#define __NR_lchown32			(__NR_SYSCALL_BASE+198)
-#define __NR_getuid32			(__NR_SYSCALL_BASE+199)
-#define __NR_getgid32			(__NR_SYSCALL_BASE+200)
-#define __NR_geteuid32			(__NR_SYSCALL_BASE+201)
-#define __NR_getegid32			(__NR_SYSCALL_BASE+202)
-#define __NR_setreuid32			(__NR_SYSCALL_BASE+203)
-#define __NR_setregid32			(__NR_SYSCALL_BASE+204)
-#define __NR_getgroups32		(__NR_SYSCALL_BASE+205)
-#define __NR_setgroups32		(__NR_SYSCALL_BASE+206)
-#define __NR_fchown32			(__NR_SYSCALL_BASE+207)
-#define __NR_setresuid32		(__NR_SYSCALL_BASE+208)
-#define __NR_getresuid32		(__NR_SYSCALL_BASE+209)
-#define __NR_setresgid32		(__NR_SYSCALL_BASE+210)
-#define __NR_getresgid32		(__NR_SYSCALL_BASE+211)
-#define __NR_chown32			(__NR_SYSCALL_BASE+212)
-#define __NR_setuid32			(__NR_SYSCALL_BASE+213)
-#define __NR_setgid32			(__NR_SYSCALL_BASE+214)
-#define __NR_setfsuid32			(__NR_SYSCALL_BASE+215)
-#define __NR_setfsgid32			(__NR_SYSCALL_BASE+216)
-#define __NR_getdents64			(__NR_SYSCALL_BASE+217)
-#define __NR_pivot_root			(__NR_SYSCALL_BASE+218)
-#define __NR_mincore			(__NR_SYSCALL_BASE+219)
-#define __NR_madvise			(__NR_SYSCALL_BASE+220)
-#define __NR_fcntl64			(__NR_SYSCALL_BASE+221)
-					/* 222 for tux */
-					/* 223 is unused */
-#define __NR_gettid			(__NR_SYSCALL_BASE+224)
-#define __NR_readahead			(__NR_SYSCALL_BASE+225)
-#define __NR_setxattr			(__NR_SYSCALL_BASE+226)
-#define __NR_lsetxattr			(__NR_SYSCALL_BASE+227)
-#define __NR_fsetxattr			(__NR_SYSCALL_BASE+228)
-#define __NR_getxattr			(__NR_SYSCALL_BASE+229)
-#define __NR_lgetxattr			(__NR_SYSCALL_BASE+230)
-#define __NR_fgetxattr			(__NR_SYSCALL_BASE+231)
-#define __NR_listxattr			(__NR_SYSCALL_BASE+232)
-#define __NR_llistxattr			(__NR_SYSCALL_BASE+233)
-#define __NR_flistxattr			(__NR_SYSCALL_BASE+234)
-#define __NR_removexattr		(__NR_SYSCALL_BASE+235)
-#define __NR_lremovexattr		(__NR_SYSCALL_BASE+236)
-#define __NR_fremovexattr		(__NR_SYSCALL_BASE+237)
-#define __NR_tkill			(__NR_SYSCALL_BASE+238)
-#define __NR_sendfile64			(__NR_SYSCALL_BASE+239)
-#define __NR_futex			(__NR_SYSCALL_BASE+240)
-#define __NR_sched_setaffinity		(__NR_SYSCALL_BASE+241)
-#define __NR_sched_getaffinity		(__NR_SYSCALL_BASE+242)
-#define __NR_io_setup			(__NR_SYSCALL_BASE+243)
-#define __NR_io_destroy			(__NR_SYSCALL_BASE+244)
-#define __NR_io_getevents		(__NR_SYSCALL_BASE+245)
-#define __NR_io_submit			(__NR_SYSCALL_BASE+246)
-#define __NR_io_cancel			(__NR_SYSCALL_BASE+247)
-#define __NR_exit_group			(__NR_SYSCALL_BASE+248)
-#define __NR_lookup_dcookie		(__NR_SYSCALL_BASE+249)
-#define __NR_epoll_create		(__NR_SYSCALL_BASE+250)
-#define __NR_epoll_ctl			(__NR_SYSCALL_BASE+251)
-#define __NR_epoll_wait			(__NR_SYSCALL_BASE+252)
-#define __NR_remap_file_pages		(__NR_SYSCALL_BASE+253)
-					/* 254 for set_thread_area */
-					/* 255 for get_thread_area */
-#define __NR_set_tid_address		(__NR_SYSCALL_BASE+256)
-#define __NR_timer_create		(__NR_SYSCALL_BASE+257)
-#define __NR_timer_settime		(__NR_SYSCALL_BASE+258)
-#define __NR_timer_gettime		(__NR_SYSCALL_BASE+259)
-#define __NR_timer_getoverrun		(__NR_SYSCALL_BASE+260)
-#define __NR_timer_delete		(__NR_SYSCALL_BASE+261)
-#define __NR_clock_settime		(__NR_SYSCALL_BASE+262)
-#define __NR_clock_gettime		(__NR_SYSCALL_BASE+263)
-#define __NR_clock_getres		(__NR_SYSCALL_BASE+264)
-#define __NR_clock_nanosleep		(__NR_SYSCALL_BASE+265)
-#define __NR_statfs64			(__NR_SYSCALL_BASE+266)
-#define __NR_fstatfs64			(__NR_SYSCALL_BASE+267)
-#define __NR_tgkill			(__NR_SYSCALL_BASE+268)
-#define __NR_utimes			(__NR_SYSCALL_BASE+269)
-#define __NR_arm_fadvise64_64		(__NR_SYSCALL_BASE+270)
-#define __NR_pciconfig_iobase		(__NR_SYSCALL_BASE+271)
-#define __NR_pciconfig_read		(__NR_SYSCALL_BASE+272)
-#define __NR_pciconfig_write		(__NR_SYSCALL_BASE+273)
-#define __NR_mq_open			(__NR_SYSCALL_BASE+274)
-#define __NR_mq_unlink			(__NR_SYSCALL_BASE+275)
-#define __NR_mq_timedsend		(__NR_SYSCALL_BASE+276)
-#define __NR_mq_timedreceive		(__NR_SYSCALL_BASE+277)
-#define __NR_mq_notify			(__NR_SYSCALL_BASE+278)
-#define __NR_mq_getsetattr		(__NR_SYSCALL_BASE+279)
-#define __NR_waitid			(__NR_SYSCALL_BASE+280)
-#define __NR_socket			(__NR_SYSCALL_BASE+281)
-#define __NR_bind			(__NR_SYSCALL_BASE+282)
-#define __NR_connect			(__NR_SYSCALL_BASE+283)
-#define __NR_listen			(__NR_SYSCALL_BASE+284)
-#define __NR_accept			(__NR_SYSCALL_BASE+285)
-#define __NR_getsockname		(__NR_SYSCALL_BASE+286)
-#define __NR_getpeername		(__NR_SYSCALL_BASE+287)
-#define __NR_socketpair			(__NR_SYSCALL_BASE+288)
-#define __NR_send			(__NR_SYSCALL_BASE+289)
-#define __NR_sendto			(__NR_SYSCALL_BASE+290)
-#define __NR_recv			(__NR_SYSCALL_BASE+291)
-#define __NR_recvfrom			(__NR_SYSCALL_BASE+292)
-#define __NR_shutdown			(__NR_SYSCALL_BASE+293)
-#define __NR_setsockopt			(__NR_SYSCALL_BASE+294)
-#define __NR_getsockopt			(__NR_SYSCALL_BASE+295)
-#define __NR_sendmsg			(__NR_SYSCALL_BASE+296)
-#define __NR_recvmsg			(__NR_SYSCALL_BASE+297)
-#define __NR_semop			(__NR_SYSCALL_BASE+298)
-#define __NR_semget			(__NR_SYSCALL_BASE+299)
-#define __NR_semctl			(__NR_SYSCALL_BASE+300)
-#define __NR_msgsnd			(__NR_SYSCALL_BASE+301)
-#define __NR_msgrcv			(__NR_SYSCALL_BASE+302)
-#define __NR_msgget			(__NR_SYSCALL_BASE+303)
-#define __NR_msgctl			(__NR_SYSCALL_BASE+304)
-#define __NR_shmat			(__NR_SYSCALL_BASE+305)
-#define __NR_shmdt			(__NR_SYSCALL_BASE+306)
-#define __NR_shmget			(__NR_SYSCALL_BASE+307)
-#define __NR_shmctl			(__NR_SYSCALL_BASE+308)
-#define __NR_add_key			(__NR_SYSCALL_BASE+309)
-#define __NR_request_key		(__NR_SYSCALL_BASE+310)
-#define __NR_keyctl			(__NR_SYSCALL_BASE+311)
-#define __NR_semtimedop			(__NR_SYSCALL_BASE+312)
-#define __NR_vserver			(__NR_SYSCALL_BASE+313)
-#define __NR_ioprio_set			(__NR_SYSCALL_BASE+314)
-#define __NR_ioprio_get			(__NR_SYSCALL_BASE+315)
-#define __NR_inotify_init		(__NR_SYSCALL_BASE+316)
-#define __NR_inotify_add_watch		(__NR_SYSCALL_BASE+317)
-#define __NR_inotify_rm_watch		(__NR_SYSCALL_BASE+318)
-#define __NR_mbind			(__NR_SYSCALL_BASE+319)
-#define __NR_get_mempolicy		(__NR_SYSCALL_BASE+320)
-#define __NR_set_mempolicy		(__NR_SYSCALL_BASE+321)
-#define __NR_openat			(__NR_SYSCALL_BASE+322)
-#define __NR_mkdirat			(__NR_SYSCALL_BASE+323)
-#define __NR_mknodat			(__NR_SYSCALL_BASE+324)
-#define __NR_fchownat			(__NR_SYSCALL_BASE+325)
-#define __NR_futimesat			(__NR_SYSCALL_BASE+326)
-#define __NR_fstatat64			(__NR_SYSCALL_BASE+327)
-#define __NR_unlinkat			(__NR_SYSCALL_BASE+328)
-#define __NR_renameat			(__NR_SYSCALL_BASE+329)
-#define __NR_linkat			(__NR_SYSCALL_BASE+330)
-#define __NR_symlinkat			(__NR_SYSCALL_BASE+331)
-#define __NR_readlinkat			(__NR_SYSCALL_BASE+332)
-#define __NR_fchmodat			(__NR_SYSCALL_BASE+333)
-#define __NR_faccessat			(__NR_SYSCALL_BASE+334)
-#define __NR_pselect6			(__NR_SYSCALL_BASE+335)
-#define __NR_ppoll			(__NR_SYSCALL_BASE+336)
-#define __NR_unshare			(__NR_SYSCALL_BASE+337)
-#define __NR_set_robust_list		(__NR_SYSCALL_BASE+338)
-#define __NR_get_robust_list		(__NR_SYSCALL_BASE+339)
-#define __NR_splice			(__NR_SYSCALL_BASE+340)
-#define __NR_arm_sync_file_range	(__NR_SYSCALL_BASE+341)
+#include <asm/unistd-common.h>
 #define __NR_sync_file_range2		__NR_arm_sync_file_range
-#define __NR_tee			(__NR_SYSCALL_BASE+342)
-#define __NR_vmsplice			(__NR_SYSCALL_BASE+343)
-#define __NR_move_pages			(__NR_SYSCALL_BASE+344)
-#define __NR_getcpu			(__NR_SYSCALL_BASE+345)
-#define __NR_epoll_pwait		(__NR_SYSCALL_BASE+346)
-#define __NR_kexec_load			(__NR_SYSCALL_BASE+347)
-#define __NR_utimensat			(__NR_SYSCALL_BASE+348)
-#define __NR_signalfd			(__NR_SYSCALL_BASE+349)
-#define __NR_timerfd_create		(__NR_SYSCALL_BASE+350)
-#define __NR_eventfd			(__NR_SYSCALL_BASE+351)
-#define __NR_fallocate			(__NR_SYSCALL_BASE+352)
-#define __NR_timerfd_settime		(__NR_SYSCALL_BASE+353)
-#define __NR_timerfd_gettime		(__NR_SYSCALL_BASE+354)
-#define __NR_signalfd4			(__NR_SYSCALL_BASE+355)
-#define __NR_eventfd2			(__NR_SYSCALL_BASE+356)
-#define __NR_epoll_create1		(__NR_SYSCALL_BASE+357)
-#define __NR_dup3			(__NR_SYSCALL_BASE+358)
-#define __NR_pipe2			(__NR_SYSCALL_BASE+359)
-#define __NR_inotify_init1		(__NR_SYSCALL_BASE+360)
-#define __NR_preadv			(__NR_SYSCALL_BASE+361)
-#define __NR_pwritev			(__NR_SYSCALL_BASE+362)
-#define __NR_rt_tgsigqueueinfo		(__NR_SYSCALL_BASE+363)
-#define __NR_perf_event_open		(__NR_SYSCALL_BASE+364)
-#define __NR_recvmmsg			(__NR_SYSCALL_BASE+365)
-#define __NR_accept4			(__NR_SYSCALL_BASE+366)
-#define __NR_fanotify_init		(__NR_SYSCALL_BASE+367)
-#define __NR_fanotify_mark		(__NR_SYSCALL_BASE+368)
-#define __NR_prlimit64			(__NR_SYSCALL_BASE+369)
-#define __NR_name_to_handle_at		(__NR_SYSCALL_BASE+370)
-#define __NR_open_by_handle_at		(__NR_SYSCALL_BASE+371)
-#define __NR_clock_adjtime		(__NR_SYSCALL_BASE+372)
-#define __NR_syncfs			(__NR_SYSCALL_BASE+373)
-#define __NR_sendmmsg			(__NR_SYSCALL_BASE+374)
-#define __NR_setns			(__NR_SYSCALL_BASE+375)
-#define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
-#define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
-#define __NR_kcmp			(__NR_SYSCALL_BASE+378)
-#define __NR_finit_module		(__NR_SYSCALL_BASE+379)
-#define __NR_sched_setattr		(__NR_SYSCALL_BASE+380)
-#define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
-#define __NR_renameat2			(__NR_SYSCALL_BASE+382)
-#define __NR_seccomp			(__NR_SYSCALL_BASE+383)
-#define __NR_getrandom			(__NR_SYSCALL_BASE+384)
-#define __NR_memfd_create		(__NR_SYSCALL_BASE+385)
-#define __NR_bpf			(__NR_SYSCALL_BASE+386)
-#define __NR_execveat			(__NR_SYSCALL_BASE+387)
-#define __NR_userfaultfd		(__NR_SYSCALL_BASE+388)
-#define __NR_membarrier			(__NR_SYSCALL_BASE+389)
-#define __NR_mlock2			(__NR_SYSCALL_BASE+390)
-#define __NR_copy_file_range		(__NR_SYSCALL_BASE+391)
-#define __NR_preadv2			(__NR_SYSCALL_BASE+392)
-#define __NR_pwritev2			(__NR_SYSCALL_BASE+393)
 
 /*
  * The following SWIs are ARM private.
@@ -431,22 +36,4 @@
 #define __ARM_NR_usr32			(__ARM_NR_BASE+4)
 #define __ARM_NR_set_tls		(__ARM_NR_BASE+5)
 
-/*
- * The following syscalls are obsolete and no longer available for EABI.
- */
-#if defined(__ARM_EABI__)
-#undef __NR_time
-#undef __NR_umount
-#undef __NR_stime
-#undef __NR_alarm
-#undef __NR_utime
-#undef __NR_getrlimit
-#undef __NR_select
-#undef __NR_readdir
-#undef __NR_mmap
-#undef __NR_socketcall
-#undef __NR_syscall
-#undef __NR_ipc
-#endif
-
 #endif /* __ASM_ARM_UNISTD_H */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index fd5a2761a5..651ec30040 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+			(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL	0
+
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
 
 /* Device Control API on vcpu fd */
diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index c93cf35ce3..4edbe4bb0e 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -413,6 +413,26 @@ struct kvm_get_htab_header {
 	__u16	n_invalid;
 };
 
+/* For KVM_PPC_CONFIGURE_V3_MMU */
+struct kvm_ppc_mmuv3_cfg {
+	__u64	flags;
+	__u64	process_table;	/* second doubleword of partition table entry */
+};
+
+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
+#define KVM_PPC_MMUV3_RADIX	1	/* 1 = radix mode, 0 = HPT */
+#define KVM_PPC_MMUV3_GTSE	2	/* global translation shootdown enb. */
+
+/* For KVM_PPC_GET_RMMU_INFO */
+struct kvm_ppc_rmmu_info {
+	struct kvm_ppc_radix_geom {
+		__u8	page_shift;
+		__u8	level_bits[4];
+		__u8	pad[3];
+	}	geometries[8];
+	__u32	ap_encodings[8];
+};
+
 /* Per-vcpu XICS interrupt controller state */
 #define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
 
@@ -573,6 +593,10 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_SPRG9	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
 #define KVM_REG_PPC_DBSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
 
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
@@ -596,6 +620,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TM_VSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
 #define KVM_REG_PPC_TM_DSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
 #define KVM_REG_PPC_TM_TAR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
 
 /* PPC64 eXternal Interrupt Controller Specification */
 #define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
@@ -608,5 +633,7 @@ struct kvm_get_htab_header {
 #define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
 #define  KVM_XICS_MASKED		(1ULL << 41)
 #define  KVM_XICS_PENDING		(1ULL << 42)
+#define  KVM_XICS_PRESENTED		(1ULL << 43)
+#define  KVM_XICS_QUEUED		(1ULL << 44)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h
index 1e66eba4c6..598043c7b6 100644
--- a/linux-headers/asm-powerpc/unistd.h
+++ b/linux-headers/asm-powerpc/unistd.h
@@ -392,5 +392,6 @@
 #define __NR_copy_file_range	379
 #define __NR_preadv2		380
 #define __NR_pwritev2		381
+#define __NR_kexec_file_load	382
 
 #endif /* _ASM_POWERPC_UNISTD_H_ */
diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
index e41c5c1a28..3a5397988e 100644
--- a/linux-headers/asm-x86/kvm_para.h
+++ b/linux-headers/asm-x86/kvm_para.h
@@ -45,7 +45,18 @@ struct kvm_steal_time {
 	__u64 steal;
 	__u32 version;
 	__u32 flags;
-	__u32 pad[12];
+	__u8  preempted;
+	__u8  u8_pad[3];
+	__u32 pad[11];
+};
+
+#define KVM_CLOCK_PAIRING_WALLCLOCK 0
+struct kvm_clock_pairing {
+	__s64 sec;
+	__s64 nsec;
+	__u64 tsc;
+	__u32 flags;
+	__u32 pad[9];
 };
 
 #define KVM_STEAL_ALIGNMENT_BITS 5
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index bb0ed71223..4e082a81b4 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -218,7 +218,8 @@ struct kvm_hyperv_exit {
 struct kvm_run {
 	/* in */
 	__u8 request_interrupt_window;
-	__u8 padding1[7];
+	__u8 immediate_exit;
+	__u8 padding1[6];
 
 	/* out */
 	__u32 exit_reason;
@@ -651,6 +652,9 @@ struct kvm_enable_cap {
 };
 
 /* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
+
 struct kvm_ppc_pvinfo {
 	/* out */
 	__u32 flags;
@@ -682,7 +686,12 @@ struct kvm_ppc_smmu_info {
 	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+	__u64 flags;
+	__u32 shift;
+	__u32 pad;
+};
 
 #define KVMIO 0xAE
 
@@ -870,6 +879,10 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_USER_INSTR0 130
 #define KVM_CAP_MSI_DEVID 131
 #define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_SPAPR_RESIZE_HPT 133
+#define KVM_CAP_PPC_MMU_RADIX 134
+#define KVM_CAP_PPC_MMU_HASH_V3 135
+#define KVM_CAP_IMMEDIATE_EXIT 136
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1186,6 +1199,13 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
 /* Available with KVM_CAP_PPC_RTAS */
 #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO,  0xac, struct kvm_rtas_token_args)
+/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
+#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
+#define KVM_PPC_RESIZE_HPT_COMMIT  _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
+/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
+#define KVM_PPC_CONFIGURE_V3_MMU  _IOW(KVMIO,  0xaf, struct kvm_ppc_mmuv3_cfg)
+/* Available with KVM_CAP_PPC_RADIX_MMU */
+#define KVM_PPC_GET_RMMU_INFO	  _IOW(KVMIO,  0xb0, struct kvm_ppc_rmmu_info)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
index e61661edf3..15b24ff6cf 100644
--- a/linux-headers/linux/kvm_para.h
+++ b/linux-headers/linux/kvm_para.h
@@ -14,6 +14,7 @@
 #define KVM_EFAULT		EFAULT
 #define KVM_E2BIG		E2BIG
 #define KVM_EPERM		EPERM
+#define KVM_EOPNOTSUPP		95
 
 #define KVM_HC_VAPIC_POLL_IRQ		1
 #define KVM_HC_MMU_OP			2
@@ -23,6 +24,7 @@
 #define KVM_HC_MIPS_GET_CLOCK_FREQ	6
 #define KVM_HC_MIPS_EXIT_VM		7
 #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
+#define KVM_HC_CLOCK_PAIRING		9
 
 /*
  * hypercalls use architecture specific
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index 19e8453249..2ed5dc3775 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -11,13 +11,18 @@
 
 #include <linux/types.h>
 
-#define UFFD_API ((__u64)0xAA)
 /*
- * After implementing the respective features it will become:
- * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
- *			      UFFD_FEATURE_EVENT_FORK)
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR.  In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
  */
-#define UFFD_API_FEATURES (0)
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK |		\
+			   UFFD_FEATURE_EVENT_REMAP |		\
+			   UFFD_FEATURE_EVENT_MADVDONTNEED |	\
+			   UFFD_FEATURE_MISSING_HUGETLBFS |	\
+			   UFFD_FEATURE_MISSING_SHMEM)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -26,6 +31,9 @@
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
 	 (__u64)1 << _UFFDIO_ZEROPAGE)
+#define UFFD_API_RANGE_IOCTLS_BASIC		\
+	((__u64)1 << _UFFDIO_WAKE |		\
+	 (__u64)1 << _UFFDIO_COPY)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to
@@ -72,6 +80,21 @@ struct uffd_msg {
 		} pagefault;
 
 		struct {
+			__u32	ufd;
+		} fork;
+
+		struct {
+			__u64	from;
+			__u64	to;
+			__u64	len;
+		} remap;
+
+		struct {
+			__u64	start;
+			__u64	end;
+		} madv_dn;
+
+		struct {
 			/* unused reserved fields */
 			__u64	reserved1;
 			__u64	reserved2;
@@ -84,9 +107,9 @@ struct uffd_msg {
  * Start at 0x12 and not at 0 to be more strict against bugs.
  */
 #define UFFD_EVENT_PAGEFAULT	0x12
-#if 0 /* not available yet */
 #define UFFD_EVENT_FORK		0x13
-#endif
+#define UFFD_EVENT_REMAP	0x14
+#define UFFD_EVENT_MADVDONTNEED	0x15
 
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
@@ -104,11 +127,37 @@ struct uffdio_api {
 	 * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
 	 * are to be considered implicitly always enabled in all kernels as
 	 * long as the uffdio_api.api requested matches UFFD_API.
+	 *
+	 * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+	 * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+	 * hugetlbfs virtual memory ranges. Adding or not adding
+	 * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+	 * no real functional effect after UFFDIO_API returns, but
+	 * it's only useful for an initial feature set probe at
+	 * UFFDIO_API time. There are two ways to use it:
+	 *
+	 * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+	 *    uffdio_api.features before calling UFFDIO_API, an error
+	 *    will be returned by UFFDIO_API on a kernel without
+	 *    hugetlbfs missing support
+	 *
+	 * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+	 *    uffdio_api.features and instead it will be set by the
+	 *    kernel in the uffdio_api.features if the kernel supports
+	 *    it, so userland can later check if the feature flag is
+	 *    present in uffdio_api.features after UFFDIO_API
+	 *    succeeded.
+	 *
+	 * UFFD_FEATURE_MISSING_SHMEM works the same as
+	 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+	 * (i.e. tmpfs and other shmem based APIs).
 	 */
-#if 0 /* not available yet */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
-#endif
+#define UFFD_FEATURE_EVENT_REMAP		(1<<2)
+#define UFFD_FEATURE_EVENT_MADVDONTNEED		(1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS		(1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM		(1<<5)
 	__u64 features;
 
 	__u64 ioctls;
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 759b850a3e..531cb2eda9 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -203,6 +203,16 @@ struct vfio_device_info {
 };
 #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
 
+/*
+ * Vendor driver using Mediated device framework should provide device_api
+ * attribute in supported type attribute groups. Device API string should be one
+ * of the following corresponding to device flags in vfio_device_info structure.
+ */
+
+#define VFIO_DEVICE_API_PCI_STRING		"vfio-pci"
+#define VFIO_DEVICE_API_PLATFORM_STRING		"vfio-platform"
+#define VFIO_DEVICE_API_AMBA_STRING		"vfio-amba"
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *				       struct vfio_region_info)
diff --git a/linux-user/main.c b/linux-user/main.c
index 9645122aa6..10a3bb3a12 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -574,6 +574,7 @@ void cpu_loop(CPUARMState *env)
         switch(trapnr) {
         case EXCP_UDEF:
         case EXCP_NOCP:
+        case EXCP_INVSTATE:
             {
                 TaskState *ts = cs->opaque;
                 uint32_t opcode;
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 8209539555..a67db04e1a 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -254,7 +254,7 @@ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset)
 }
 
 #if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \
-    !defined(TARGET_X86_64) && !defined(TARGET_NIOS2)
+    !defined(TARGET_NIOS2)
 /* Just set the guest's signal mask to the specified value; the
  * caller is assumed to have called block_signals() already.
  */
@@ -512,7 +512,7 @@ void signal_init(void)
     }
 }
 
-#if !(defined(TARGET_X86_64) || defined(TARGET_UNICORE32))
+#ifndef TARGET_UNICORE32
 /* Force a synchronously taken signal. The kernel force_sig() function
  * also forces the signal to "not blocked, not ignored", but for QEMU
  * that work is done in process_pending_signals().
@@ -819,9 +819,8 @@ int do_sigaction(int sig, const struct target_sigaction *act,
     return ret;
 }
 
-#if defined(TARGET_I386) && TARGET_ABI_BITS == 32
-
-/* from the Linux kernel */
+#if defined(TARGET_I386)
+/* from the Linux kernel - /arch/x86/include/uapi/asm/sigcontext.h */
 
 struct target_fpreg {
     uint16_t significand[4];
@@ -835,58 +834,120 @@ struct target_fpxreg {
 };
 
 struct target_xmmreg {
-    abi_ulong element[4];
+    uint32_t element[4];
 };
 
-struct target_fpstate {
+struct target_fpstate_32 {
     /* Regular FPU environment */
-    abi_ulong cw;
-    abi_ulong sw;
-    abi_ulong tag;
-    abi_ulong ipoff;
-    abi_ulong cssel;
-    abi_ulong dataoff;
-    abi_ulong datasel;
-    struct target_fpreg _st[8];
+    uint32_t cw;
+    uint32_t sw;
+    uint32_t tag;
+    uint32_t ipoff;
+    uint32_t cssel;
+    uint32_t dataoff;
+    uint32_t datasel;
+    struct target_fpreg st[8];
     uint16_t  status;
     uint16_t  magic;          /* 0xffff = regular FPU data only */
 
     /* FXSR FPU environment */
-    abi_ulong _fxsr_env[6];   /* FXSR FPU env is ignored */
-    abi_ulong mxcsr;
-    abi_ulong reserved;
-    struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
-    struct target_xmmreg _xmm[8];
-    abi_ulong padding[56];
+    uint32_t _fxsr_env[6];   /* FXSR FPU env is ignored */
+    uint32_t mxcsr;
+    uint32_t reserved;
+    struct target_fpxreg fxsr_st[8]; /* FXSR FPU reg data is ignored */
+    struct target_xmmreg xmm[8];
+    uint32_t padding[56];
 };
 
-#define X86_FXSR_MAGIC		0x0000
+struct target_fpstate_64 {
+    /* FXSAVE format */
+    uint16_t cw;
+    uint16_t sw;
+    uint16_t twd;
+    uint16_t fop;
+    uint64_t rip;
+    uint64_t rdp;
+    uint32_t mxcsr;
+    uint32_t mxcsr_mask;
+    uint32_t st_space[32];
+    uint32_t xmm_space[64];
+    uint32_t reserved[24];
+};
 
-struct target_sigcontext {
+#ifndef TARGET_X86_64
+# define target_fpstate target_fpstate_32
+#else
+# define target_fpstate target_fpstate_64
+#endif
+
+struct target_sigcontext_32 {
     uint16_t gs, __gsh;
     uint16_t fs, __fsh;
     uint16_t es, __esh;
     uint16_t ds, __dsh;
-    abi_ulong edi;
-    abi_ulong esi;
-    abi_ulong ebp;
-    abi_ulong esp;
-    abi_ulong ebx;
-    abi_ulong edx;
-    abi_ulong ecx;
-    abi_ulong eax;
-    abi_ulong trapno;
-    abi_ulong err;
-    abi_ulong eip;
+    uint32_t edi;
+    uint32_t esi;
+    uint32_t ebp;
+    uint32_t esp;
+    uint32_t ebx;
+    uint32_t edx;
+    uint32_t ecx;
+    uint32_t eax;
+    uint32_t trapno;
+    uint32_t err;
+    uint32_t eip;
     uint16_t cs, __csh;
-    abi_ulong eflags;
-    abi_ulong esp_at_signal;
+    uint32_t eflags;
+    uint32_t esp_at_signal;
     uint16_t ss, __ssh;
-    abi_ulong fpstate; /* pointer */
-    abi_ulong oldmask;
-    abi_ulong cr2;
+    uint32_t fpstate; /* pointer */
+    uint32_t oldmask;
+    uint32_t cr2;
+};
+
+struct target_sigcontext_64 {
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+
+    uint64_t rdi;
+    uint64_t rsi;
+    uint64_t rbp;
+    uint64_t rbx;
+    uint64_t rdx;
+    uint64_t rax;
+    uint64_t rcx;
+    uint64_t rsp;
+    uint64_t rip;
+
+    uint64_t eflags;
+
+    uint16_t cs;
+    uint16_t gs;
+    uint16_t fs;
+    uint16_t ss;
+
+    uint64_t err;
+    uint64_t trapno;
+    uint64_t oldmask;
+    uint64_t cr2;
+
+    uint64_t fpstate; /* pointer */
+    uint64_t padding[8];
 };
 
+#ifndef TARGET_X86_64
+# define target_sigcontext target_sigcontext_32
+#else
+# define target_sigcontext target_sigcontext_64
+#endif
+
+/* see Linux/include/uapi/asm-generic/ucontext.h */
 struct target_ucontext {
     abi_ulong         tuc_flags;
     abi_ulong         tuc_link;
@@ -895,8 +956,8 @@ struct target_ucontext {
     target_sigset_t   tuc_sigmask;  /* mask last for extensibility */
 };
 
-struct sigframe
-{
+#ifndef TARGET_X86_64
+struct sigframe {
     abi_ulong pretcode;
     int sig;
     struct target_sigcontext sc;
@@ -905,8 +966,7 @@ struct sigframe
     char retcode[8];
 };
 
-struct rt_sigframe
-{
+struct rt_sigframe {
     abi_ulong pretcode;
     int sig;
     abi_ulong pinfo;
@@ -917,6 +977,17 @@ struct rt_sigframe
     char retcode[8];
 };
 
+#else
+
+struct rt_sigframe {
+    abi_ulong pretcode;
+    struct target_ucontext uc;
+    struct target_siginfo info;
+    struct target_fpstate fpstate;
+};
+
+#endif
+
 /*
  * Set up a signal frame.
  */
@@ -927,6 +998,7 @@ static void setup_sigcontext(struct target_sigcontext *sc,
         abi_ulong fpstate_addr)
 {
     CPUState *cs = CPU(x86_env_get_cpu(env));
+#ifndef TARGET_X86_64
     uint16_t magic;
 
     /* already locked in setup_frame() */
@@ -959,6 +1031,44 @@ static void setup_sigcontext(struct target_sigcontext *sc,
     /* non-iBCS2 extensions.. */
     __put_user(mask, &sc->oldmask);
     __put_user(env->cr[2], &sc->cr2);
+#else
+    __put_user(env->regs[R_EDI], &sc->rdi);
+    __put_user(env->regs[R_ESI], &sc->rsi);
+    __put_user(env->regs[R_EBP], &sc->rbp);
+    __put_user(env->regs[R_ESP], &sc->rsp);
+    __put_user(env->regs[R_EBX], &sc->rbx);
+    __put_user(env->regs[R_EDX], &sc->rdx);
+    __put_user(env->regs[R_ECX], &sc->rcx);
+    __put_user(env->regs[R_EAX], &sc->rax);
+
+    __put_user(env->regs[8], &sc->r8);
+    __put_user(env->regs[9], &sc->r9);
+    __put_user(env->regs[10], &sc->r10);
+    __put_user(env->regs[11], &sc->r11);
+    __put_user(env->regs[12], &sc->r12);
+    __put_user(env->regs[13], &sc->r13);
+    __put_user(env->regs[14], &sc->r14);
+    __put_user(env->regs[15], &sc->r15);
+
+    __put_user(cs->exception_index, &sc->trapno);
+    __put_user(env->error_code, &sc->err);
+    __put_user(env->eip, &sc->rip);
+
+    __put_user(env->eflags, &sc->eflags);
+    __put_user(env->segs[R_CS].selector, &sc->cs);
+    __put_user((uint16_t)0, &sc->gs);
+    __put_user((uint16_t)0, &sc->fs);
+    __put_user(env->segs[R_SS].selector, &sc->ss);
+
+    __put_user(mask, &sc->oldmask);
+    __put_user(env->cr[2], &sc->cr2);
+
+    /* fpstate_addr must be 16 byte aligned for fxsave */
+    assert(!(fpstate_addr & 0xf));
+
+    cpu_x86_fxsave(env, fpstate_addr);
+    __put_user(fpstate_addr, &sc->fpstate);
+#endif
 }
 
 /*
@@ -972,23 +1082,34 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t frame_size)
 
     /* Default to using normal stack */
     esp = env->regs[R_ESP];
+#ifdef TARGET_X86_64
+    esp -= 128; /* this is the redzone */
+#endif
+
     /* This is the X/Open sanctioned signal stack switching.  */
     if (ka->sa_flags & TARGET_SA_ONSTACK) {
         if (sas_ss_flags(esp) == 0) {
             esp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
         }
     } else {
-
+#ifndef TARGET_X86_64
         /* This is the legacy signal stack switching. */
         if ((env->segs[R_SS].selector & 0xffff) != __USER_DS &&
                 !(ka->sa_flags & TARGET_SA_RESTORER) &&
                 ka->sa_restorer) {
             esp = (unsigned long) ka->sa_restorer;
         }
+#endif
     }
+
+#ifndef TARGET_X86_64
     return (esp - frame_size) & -8ul;
+#else
+    return ((esp - frame_size) & (~15ul)) - 8;
+#endif
 }
 
+#ifndef TARGET_X86_64
 /* compare linux/arch/i386/kernel/signal.c:setup_frame() */
 static void setup_frame(int sig, struct target_sigaction *ka,
                         target_sigset_t *set, CPUX86State *env)
@@ -1029,7 +1150,6 @@ static void setup_frame(int sig, struct target_sigaction *ka,
         __put_user(val16, (uint16_t *)(frame->retcode+6));
     }
 
-
     /* Set up registers for signal handler */
     env->regs[R_ESP] = frame_addr;
     env->eip = ka->_sa_handler;
@@ -1047,13 +1167,17 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 give_sigsegv:
     force_sigsegv(sig);
 }
+#endif
 
-/* compare linux/arch/i386/kernel/signal.c:setup_rt_frame() */
+/* compare linux/arch/x86/kernel/signal.c:setup_rt_frame() */
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
                            target_siginfo_t *info,
                            target_sigset_t *set, CPUX86State *env)
 {
-    abi_ulong frame_addr, addr;
+    abi_ulong frame_addr;
+#ifndef TARGET_X86_64
+    abi_ulong addr;
+#endif
     struct rt_sigframe *frame;
     int i;
 
@@ -1063,12 +1187,17 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
         goto give_sigsegv;
 
+    /* These fields are only in rt_sigframe on 32 bit */
+#ifndef TARGET_X86_64
     __put_user(sig, &frame->sig);
     addr = frame_addr + offsetof(struct rt_sigframe, info);
     __put_user(addr, &frame->pinfo);
     addr = frame_addr + offsetof(struct rt_sigframe, uc);
     __put_user(addr, &frame->puc);
-    tswap_siginfo(&frame->info, info);
+#endif
+    if (ka->sa_flags & TARGET_SA_SIGINFO) {
+        tswap_siginfo(&frame->info, info);
+    }
 
     /* Create the ucontext.  */
     __put_user(0, &frame->uc.tuc_flags);
@@ -1087,6 +1216,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     /* Set up to return from userspace.  If provided, use a stub
        already in userspace.  */
+#ifndef TARGET_X86_64
     if (ka->sa_flags & TARGET_SA_RESTORER) {
         __put_user(ka->sa_restorer, &frame->pretcode);
     } else {
@@ -1099,15 +1229,31 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
         val16 = 0x80cd;
         __put_user(val16, (uint16_t *)(frame->retcode+5));
     }
+#else
+    /* XXX: Would be slightly better to return -EFAULT here if test fails
+       assert(ka->sa_flags & TARGET_SA_RESTORER); */
+    __put_user(ka->sa_restorer, &frame->pretcode);
+#endif
 
     /* Set up registers for signal handler */
     env->regs[R_ESP] = frame_addr;
     env->eip = ka->_sa_handler;
 
+#ifndef TARGET_X86_64
+    env->regs[R_EAX] = sig;
+    env->regs[R_EDX] = (unsigned long)&frame->info;
+    env->regs[R_ECX] = (unsigned long)&frame->uc;
+#else
+    env->regs[R_EAX] = 0;
+    env->regs[R_EDI] = sig;
+    env->regs[R_ESI] = (unsigned long)&frame->info;
+    env->regs[R_EDX] = (unsigned long)&frame->uc;
+#endif
+
     cpu_x86_load_seg(env, R_DS, __USER_DS);
     cpu_x86_load_seg(env, R_ES, __USER_DS);
-    cpu_x86_load_seg(env, R_SS, __USER_DS);
     cpu_x86_load_seg(env, R_CS, __USER_CS);
+    cpu_x86_load_seg(env, R_SS, __USER_DS);
     env->eflags &= ~TF_MASK;
 
     unlock_user_struct(frame, frame_addr, 1);
@@ -1125,6 +1271,7 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
     abi_ulong fpstate_addr;
     unsigned int tmpflags;
 
+#ifndef TARGET_X86_64
     cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
     cpu_x86_load_seg(env, R_FS, tswap16(sc->fs));
     cpu_x86_load_seg(env, R_ES, tswap16(sc->es));
@@ -1138,7 +1285,29 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
     env->regs[R_EDX] = tswapl(sc->edx);
     env->regs[R_ECX] = tswapl(sc->ecx);
     env->regs[R_EAX] = tswapl(sc->eax);
+
     env->eip = tswapl(sc->eip);
+#else
+    env->regs[8] = tswapl(sc->r8);
+    env->regs[9] = tswapl(sc->r9);
+    env->regs[10] = tswapl(sc->r10);
+    env->regs[11] = tswapl(sc->r11);
+    env->regs[12] = tswapl(sc->r12);
+    env->regs[13] = tswapl(sc->r13);
+    env->regs[14] = tswapl(sc->r14);
+    env->regs[15] = tswapl(sc->r15);
+
+    env->regs[R_EDI] = tswapl(sc->rdi);
+    env->regs[R_ESI] = tswapl(sc->rsi);
+    env->regs[R_EBP] = tswapl(sc->rbp);
+    env->regs[R_EBX] = tswapl(sc->rbx);
+    env->regs[R_EDX] = tswapl(sc->rdx);
+    env->regs[R_EAX] = tswapl(sc->rax);
+    env->regs[R_ECX] = tswapl(sc->rcx);
+    env->regs[R_ESP] = tswapl(sc->rsp);
+
+    env->eip = tswapl(sc->rip);
+#endif
 
     cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3);
     cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3);
@@ -1152,7 +1321,11 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
         if (!access_ok(VERIFY_READ, fpstate_addr,
                        sizeof(struct target_fpstate)))
             goto badframe;
+#ifndef TARGET_X86_64
         cpu_x86_frstor(env, fpstate_addr, 1);
+#else
+        cpu_x86_fxrstor(env, fpstate_addr);
+#endif
     }
 
     return err;
@@ -1160,6 +1333,8 @@ badframe:
     return 1;
 }
 
+/* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */
+#ifndef TARGET_X86_64
 long do_sigreturn(CPUX86State *env)
 {
     struct sigframe *frame;
@@ -1191,6 +1366,7 @@ badframe:
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+#endif
 
 long do_rt_sigreturn(CPUX86State *env)
 {
@@ -1198,7 +1374,7 @@ long do_rt_sigreturn(CPUX86State *env)
     struct rt_sigframe *frame;
     sigset_t set;
 
-    frame_addr = env->regs[R_ESP] - 4;
+    frame_addr = env->regs[R_ESP] - sizeof(abi_ulong);
     trace_user_do_rt_sigreturn(env, frame_addr);
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
         goto badframe;
@@ -5500,6 +5676,7 @@ static inline int target_rt_setup_ucontext(struct target_ucontext *uc,
                                            CPUM68KState *env)
 {
     target_greg_t *gregs = uc->tuc_mcontext.gregs;
+    uint32_t sr = cpu_m68k_get_ccr(env);
 
     __put_user(TARGET_MCONTEXT_VERSION, &uc->tuc_mcontext.version);
     __put_user(env->dregs[0], &gregs[0]);
@@ -5519,7 +5696,7 @@ static inline int target_rt_setup_ucontext(struct target_ucontext *uc,
     __put_user(env->aregs[6], &gregs[14]);
     __put_user(env->aregs[7], &gregs[15]);
     __put_user(env->pc, &gregs[16]);
-    __put_user(env->sr, &gregs[17]);
+    __put_user(sr, &gregs[17]);
 
     return 0;
 }
@@ -5553,7 +5730,7 @@ static inline int target_rt_restore_ucontext(CPUM68KState *env,
     __get_user(env->aregs[7], &gregs[15]);
     __get_user(env->pc, &gregs[16]);
     __get_user(temp, &gregs[17]);
-    env->sr = (env->sr & 0xff00) | (temp & 0xff);
+    cpu_m68k_set_ccr(env, temp);
 
     return 0;
 
@@ -5674,14 +5851,13 @@ long do_rt_sigreturn(CPUM68KState *env)
 {
     struct target_rt_sigframe *frame;
     abi_ulong frame_addr = env->aregs[7] - 4;
-    target_sigset_t target_set;
     sigset_t set;
 
     trace_user_do_rt_sigreturn(env, frame_addr);
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
         goto badframe;
 
-    target_to_host_sigset_internal(&set, &target_set);
+    target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
     set_sigmask(&set);
 
     /* restore registers */
@@ -6418,7 +6594,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig,
 #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \
         || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX) \
         || defined(TARGET_PPC64) || defined(TARGET_HPPA) \
-        || defined(TARGET_NIOS2)
+        || defined(TARGET_NIOS2) || defined(TARGET_X86_64)
         /* These targets do not have traditional signals.  */
         setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env);
 #else
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index f569f827fc..cec8428589 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -57,6 +57,8 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <netinet/tcp.h>
 #include <linux/wireless.h>
 #include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/errqueue.h>
 #include "qemu-common.h"
 #ifdef CONFIG_TIMERFD
 #include <sys/timerfd.h>
@@ -1634,6 +1636,11 @@ static inline abi_long host_to_target_sockaddr(abi_ulong target_addr,
         struct sockaddr_ll *target_ll = (struct sockaddr_ll *)target_saddr;
         target_ll->sll_ifindex = tswap32(target_ll->sll_ifindex);
         target_ll->sll_hatype = tswap16(target_ll->sll_hatype);
+    } else if (addr->sa_family == AF_INET6 &&
+               len >= sizeof(struct target_sockaddr_in6)) {
+        struct target_sockaddr_in6 *target_in6 =
+               (struct target_sockaddr_in6 *)target_saddr;
+        target_in6->sin6_scope_id = tswap16(target_in6->sin6_scope_id);
     }
     unlock_user(target_saddr, target_addr, len);
 
@@ -1839,6 +1846,78 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh,
             }
             break;
 
+        case SOL_IP:
+            switch (cmsg->cmsg_type) {
+            case IP_TTL:
+            {
+                uint32_t *v = (uint32_t *)data;
+                uint32_t *t_int = (uint32_t *)target_data;
+
+                __put_user(*v, t_int);
+                break;
+            }
+            case IP_RECVERR:
+            {
+                struct errhdr_t {
+                   struct sock_extended_err ee;
+                   struct sockaddr_in offender;
+                };
+                struct errhdr_t *errh = (struct errhdr_t *)data;
+                struct errhdr_t *target_errh =
+                    (struct errhdr_t *)target_data;
+
+                __put_user(errh->ee.ee_errno, &target_errh->ee.ee_errno);
+                __put_user(errh->ee.ee_origin, &target_errh->ee.ee_origin);
+                __put_user(errh->ee.ee_type,  &target_errh->ee.ee_type);
+                __put_user(errh->ee.ee_code, &target_errh->ee.ee_code);
+                __put_user(errh->ee.ee_pad, &target_errh->ee.ee_pad);
+                __put_user(errh->ee.ee_info, &target_errh->ee.ee_info);
+                __put_user(errh->ee.ee_data, &target_errh->ee.ee_data);
+                host_to_target_sockaddr((unsigned long) &target_errh->offender,
+                    (void *) &errh->offender, sizeof(errh->offender));
+                break;
+            }
+            default:
+                goto unimplemented;
+            }
+            break;
+
+        case SOL_IPV6:
+            switch (cmsg->cmsg_type) {
+            case IPV6_HOPLIMIT:
+            {
+                uint32_t *v = (uint32_t *)data;
+                uint32_t *t_int = (uint32_t *)target_data;
+
+                __put_user(*v, t_int);
+                break;
+            }
+            case IPV6_RECVERR:
+            {
+                struct errhdr6_t {
+                   struct sock_extended_err ee;
+                   struct sockaddr_in6 offender;
+                };
+                struct errhdr6_t *errh = (struct errhdr6_t *)data;
+                struct errhdr6_t *target_errh =
+                    (struct errhdr6_t *)target_data;
+
+                __put_user(errh->ee.ee_errno, &target_errh->ee.ee_errno);
+                __put_user(errh->ee.ee_origin, &target_errh->ee.ee_origin);
+                __put_user(errh->ee.ee_type,  &target_errh->ee.ee_type);
+                __put_user(errh->ee.ee_code, &target_errh->ee.ee_code);
+                __put_user(errh->ee.ee_pad, &target_errh->ee.ee_pad);
+                __put_user(errh->ee.ee_info, &target_errh->ee.ee_info);
+                __put_user(errh->ee.ee_data, &target_errh->ee.ee_data);
+                host_to_target_sockaddr((unsigned long) &target_errh->offender,
+                    (void *) &errh->offender, sizeof(errh->offender));
+                break;
+            }
+            default:
+                goto unimplemented;
+            }
+            break;
+
         default:
         unimplemented:
             gemu_log("Unsupported ancillary data: %d/%d\n",
@@ -2768,6 +2847,7 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
         case IP_PKTINFO:
         case IP_MTU_DISCOVER:
         case IP_RECVERR:
+        case IP_RECVTTL:
         case IP_RECVTOS:
 #ifdef IP_FREEBIND
         case IP_FREEBIND:
@@ -2817,6 +2897,11 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
         case IPV6_MTU:
         case IPV6_V6ONLY:
         case IPV6_RECVPKTINFO:
+        case IPV6_UNICAST_HOPS:
+        case IPV6_RECVERR:
+        case IPV6_RECVHOPLIMIT:
+        case IPV6_2292HOPLIMIT:
+        case IPV6_CHECKSUM:
             val = 0;
             if (optlen < sizeof(uint32_t)) {
                 return -TARGET_EINVAL;
@@ -2827,6 +2912,50 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
             ret = get_errno(setsockopt(sockfd, level, optname,
                                        &val, sizeof(val)));
             break;
+        case IPV6_PKTINFO:
+        {
+            struct in6_pktinfo pki;
+
+            if (optlen < sizeof(pki)) {
+                return -TARGET_EINVAL;
+            }
+
+            if (copy_from_user(&pki, optval_addr, sizeof(pki))) {
+                return -TARGET_EFAULT;
+            }
+
+            pki.ipi6_ifindex = tswap32(pki.ipi6_ifindex);
+
+            ret = get_errno(setsockopt(sockfd, level, optname,
+                                       &pki, sizeof(pki)));
+            break;
+        }
+        default:
+            goto unimplemented;
+        }
+        break;
+    case SOL_ICMPV6:
+        switch (optname) {
+        case ICMPV6_FILTER:
+        {
+            struct icmp6_filter icmp6f;
+
+            if (optlen > sizeof(icmp6f)) {
+                optlen = sizeof(icmp6f);
+            }
+
+            if (copy_from_user(&icmp6f, optval_addr, optlen)) {
+                return -TARGET_EFAULT;
+            }
+
+            for (val = 0; val < 8; val++) {
+                icmp6f.data[val] = tswap32(icmp6f.data[val]);
+            }
+
+            ret = get_errno(setsockopt(sockfd, level, optname,
+                                       &icmp6f, optlen));
+            break;
+        }
         default:
             goto unimplemented;
         }
@@ -2834,7 +2963,8 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
     case SOL_RAW:
         switch (optname) {
         case ICMP_FILTER:
-            /* struct icmp_filter takes an u32 value */
+        case IPV6_CHECKSUM:
+            /* those take an u32 value */
             if (optlen < sizeof(uint32_t)) {
                 return -TARGET_EINVAL;
             }
@@ -7680,7 +7810,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #ifdef TARGET_NR_fork
     case TARGET_NR_fork:
-        ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, 0, 0, 0));
+        ret = get_errno(do_fork(cpu_env, TARGET_SIGCHLD, 0, 0, 0, 0));
         break;
 #endif
 #ifdef TARGET_NR_waitpid
@@ -10490,7 +10620,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_vfork
     case TARGET_NR_vfork:
-        ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD,
+        ret = get_errno(do_fork(cpu_env,
+                        CLONE_VFORK | CLONE_VM | TARGET_SIGCHLD,
                         0, 0, 0, 0));
         break;
 #endif
@@ -11063,11 +11194,16 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_mincore:
         {
             void *a;
+            ret = -TARGET_ENOMEM;
+            a = lock_user(VERIFY_READ, arg1, arg2, 0);
+            if (!a) {
+                goto fail;
+            }
             ret = -TARGET_EFAULT;
-            if (!(a = lock_user(VERIFY_READ, arg1,arg2, 0)))
-                goto efault;
-            if (!(p = lock_user_string(arg3)))
+            p = lock_user_string(arg3);
+            if (!p) {
                 goto mincore_fail;
+            }
             ret = get_errno(mincore(a, arg2, p));
             unlock_user(p, arg3, ret);
             mincore_fail:
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 72ca5b11d6..40c5027e93 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -164,6 +164,14 @@ struct target_sockaddr_in {
                 sizeof(struct target_in_addr)];
 };
 
+struct target_sockaddr_in6 {
+    uint16_t sin6_family;
+    uint16_t sin6_port; /* big endian */
+    uint32_t sin6_flowinfo; /* big endian */
+    struct in6_addr sin6_addr; /* IPv6 address, big endian */
+    uint32_t sin6_scope_id;
+};
+
 struct target_sock_filter {
     abi_ushort code;
     uint8_t jt;
diff --git a/migration/block.c b/migration/block.c
index ebc10e628d..1941bc2402 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -379,7 +379,7 @@ static void unset_dirty_tracking(void)
     }
 }
 
-static void init_blk_migration(QEMUFile *f)
+static int init_blk_migration(QEMUFile *f)
 {
     BlockDriverState *bs;
     BlkMigDevState *bmds;
@@ -390,6 +390,8 @@ static void init_blk_migration(QEMUFile *f)
         BlkMigDevState *bmds;
         BlockDriverState *bs;
     } *bmds_bs;
+    Error *local_err = NULL;
+    int ret;
 
     block_mig_state.submitted = 0;
     block_mig_state.read_done = 0;
@@ -411,11 +413,12 @@ static void init_blk_migration(QEMUFile *f)
 
         sectors = bdrv_nb_sectors(bs);
         if (sectors <= 0) {
+            ret = sectors;
             goto out;
         }
 
         bmds = g_new0(BlkMigDevState, 1);
-        bmds->blk = blk_new();
+        bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
         bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
         bmds->bulk_completed = 0;
         bmds->total_sectors = sectors;
@@ -445,7 +448,11 @@ static void init_blk_migration(QEMUFile *f)
         BlockDriverState *bs = bmds_bs[i].bs;
 
         if (bmds) {
-            blk_insert_bs(bmds->blk, bs);
+            ret = blk_insert_bs(bmds->blk, bs, &local_err);
+            if (ret < 0) {
+                error_report_err(local_err);
+                goto out;
+            }
 
             alloc_aio_bitmap(bmds);
             error_setg(&bmds->blocker, "block device is in use by migration");
@@ -453,8 +460,10 @@ static void init_blk_migration(QEMUFile *f)
         }
     }
 
+    ret = 0;
 out:
     g_free(bmds_bs);
+    return ret;
 }
 
 /* Called with no lock taken.  */
@@ -705,7 +714,11 @@ static int block_save_setup(QEMUFile *f, void *opaque)
             block_mig_state.submitted, block_mig_state.transferred);
 
     qemu_mutex_lock_iothread();
-    init_blk_migration(f);
+    ret = init_blk_migration(f);
+    if (ret < 0) {
+        qemu_mutex_unlock_iothread();
+        return ret;
+    }
 
     /* start track dirty blocks */
     ret = set_dirty_tracking();
diff --git a/migration/colo.c b/migration/colo.c
index 712308ed5e..c19eb3f073 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -19,6 +19,8 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "migration/failover.h"
+#include "replication.h"
+#include "qmp-commands.h"
 
 static bool vmstate_loading;
 
@@ -147,6 +149,53 @@ void colo_do_failover(MigrationState *s)
     }
 }
 
+void qmp_xen_set_replication(bool enable, bool primary,
+                             bool has_failover, bool failover,
+                             Error **errp)
+{
+    ReplicationMode mode = primary ?
+                           REPLICATION_MODE_PRIMARY :
+                           REPLICATION_MODE_SECONDARY;
+
+    if (has_failover && enable) {
+        error_setg(errp, "Parameter 'failover' is only for"
+                   " stopping replication");
+        return;
+    }
+
+    if (enable) {
+        replication_start_all(mode, errp);
+    } else {
+        if (!has_failover) {
+            failover = NULL;
+        }
+        replication_stop_all(failover, failover ? NULL : errp);
+    }
+}
+
+ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
+{
+    Error *err = NULL;
+    ReplicationStatus *s = g_new0(ReplicationStatus, 1);
+
+    replication_get_error_all(&err);
+    if (err) {
+        s->error = true;
+        s->has_desc = true;
+        s->desc = g_strdup(error_get_pretty(err));
+    } else {
+        s->error = false;
+    }
+
+    error_free(err);
+    return s;
+}
+
+void qmp_xen_colo_do_checkpoint(Error **errp)
+{
+    replication_do_checkpoint_all(errp);
+}
+
 static void colo_send_message(QEMUFile *f, COLOMessage msg,
                               Error **errp)
 {
diff --git a/migration/migration.c b/migration/migration.c
index c6ae69d371..3dab6845b1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -49,6 +49,10 @@
  * for sending the last part */
 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
 
+/* Maximum migrate downtime set to 2000 seconds */
+#define MAX_MIGRATE_DOWNTIME_SECONDS 2000
+#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
+
 /* Default compression thread count */
 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
 /* Default decompression thread count, usually decompression is at
@@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque)
     int ret;
 
     mis->from_src_file = f;
+    mis->largest_page_size = qemu_ram_pagesize_largest();
     postcopy_state_set(POSTCOPY_INCOMING_NONE);
     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
                       MIGRATION_STATUS_ACTIVE);
@@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
         return;
     }
     if (params->has_downtime_limit &&
-        (params->downtime_limit < 0 || params->downtime_limit > 2000000)) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                   "downtime_limit",
-                   "an integer in the range of 0 to 2000000 milliseconds");
+        (params->downtime_limit < 0 ||
+         params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
+        error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+                         "the range of 0 to %d milliseconds",
+                         MAX_MIGRATE_DOWNTIME);
         return;
     }
     if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
@@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason)
     migration_blockers = g_slist_remove(migration_blockers, reason);
 }
 
+int check_migratable(Object *obj, Error **err)
+{
+    DeviceClass *dc = DEVICE_GET_CLASS(obj);
+    if (only_migratable && dc->vmsd) {
+        if (dc->vmsd->unmigratable) {
+            error_setg(err, "Device %s is not migratable, but "
+                       "--only-migratable was specified",
+                       object_get_typename(obj));
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 void qmp_migrate_incoming(const char *uri, Error **errp)
 {
     Error *local_err = NULL;
@@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp)
 
 void qmp_migrate_set_downtime(double value, Error **errp)
 {
+    if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
+        error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+                         "the range of 0 to %d seconds",
+                         MAX_MIGRATE_DOWNTIME_SECONDS);
+        return;
+    }
+
     value *= 1000; /* Convert to milliseconds */
     value = MAX(0, MIN(INT64_MAX, value));
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a40dddbaf6..effbeb64fb 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd)
         return false;
     }
 
-    return true;
-}
-
-/*
- * Check for things that postcopy won't support; returns 0 if the block
- * is fine.
- */
-static int check_range(const char *block_name, void *host_addr,
-                      ram_addr_t offset, ram_addr_t length, void *opaque)
-{
-    RAMBlock *rb = qemu_ram_block_by_name(block_name);
-
-    if (qemu_ram_pagesize(rb) > getpagesize()) {
-        error_report("Postcopy doesn't support large page sizes yet (%s)",
-                     block_name);
-        return -E2BIG;
+    if (getpagesize() != ram_pagesize_summary()) {
+        bool have_hp = false;
+        /* We've got a huge page */
+#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
+        have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
+#endif
+        if (!have_hp) {
+            error_report("Userfault on this host does not support huge pages");
+            return false;
+        }
     }
-
-    return 0;
+    return true;
 }
 
 /*
@@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void)
         goto out;
     }
 
-    /* Check for anything about the RAMBlocks we don't support */
-    if (qemu_ram_foreach_block(check_range, NULL)) {
-        /* check_range will have printed its own error */
-        goto out;
-    }
-
     ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
     if (ufd == -1) {
         error_report("%s: userfaultfd not available: %s", __func__,
@@ -200,27 +187,6 @@ out:
     return ret;
 }
 
-/**
- * postcopy_ram_discard_range: Discard a range of memory.
- * We can assume that if we've been called postcopy_ram_hosttest returned true.
- *
- * @mis: Current incoming migration state.
- * @start, @length: range of memory to discard.
- *
- * returns: 0 on success.
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
-                               size_t length)
-{
-    trace_postcopy_ram_discard_range(start, length);
-    if (madvise(start, length, MADV_DONTNEED)) {
-        error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
-        return -1;
-    }
-
-    return 0;
-}
-
 /*
  * Setup an area of RAM so that it *can* be used for postcopy later; this
  * must be done right at the start prior to pre-copy.
@@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr,
      * - we're going to get the copy from the source anyway.
      * (Precopy will just overwrite this data, so doesn't need the discard)
      */
-    if (postcopy_ram_discard_range(mis, host_addr, length)) {
+    if (ram_discard_range(mis, block_name, 0, length)) {
         return -1;
     }
 
@@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
     migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
 
     if (mis->postcopy_tmp_page) {
-        munmap(mis->postcopy_tmp_page, getpagesize());
+        munmap(mis->postcopy_tmp_page, mis->largest_page_size);
         mis->postcopy_tmp_page = NULL;
     }
+    if (mis->postcopy_tmp_zero_page) {
+        munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
+        mis->postcopy_tmp_zero_page = NULL;
+    }
     trace_postcopy_ram_incoming_cleanup_exit();
     return 0;
 }
@@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
         error_report("%s userfault register: %s", __func__, strerror(errno));
         return -1;
     }
+    if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
+        error_report("%s userfault: Region doesn't support COPY", __func__);
+        return -1;
+    }
 
     return 0;
 }
@@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
     MigrationIncomingState *mis = opaque;
     struct uffd_msg msg;
     int ret;
-    size_t hostpagesize = getpagesize();
     RAMBlock *rb = NULL;
     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 
@@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
             break;
         }
 
-        rb_offset &= ~(hostpagesize - 1);
+        rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
         trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
                                                 qemu_ram_get_idstr(rb),
                                                 rb_offset);
@@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque)
         if (rb != last_rb) {
             last_rb = rb;
             migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
-                                     rb_offset, hostpagesize);
+                                     rb_offset, qemu_ram_pagesize(rb));
         } else {
             /* Save some space */
             migrate_send_rp_req_pages(mis, NULL,
-                                     rb_offset, hostpagesize);
+                                     rb_offset, qemu_ram_pagesize(rb));
         }
     }
     trace_postcopy_ram_fault_thread_exit();
@@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * Place a host page (from) at (host) atomically
  * returns 0 on success
  */
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        size_t pagesize)
 {
     struct uffdio_copy copy_struct;
 
     copy_struct.dst = (uint64_t)(uintptr_t)host;
     copy_struct.src = (uint64_t)(uintptr_t)from;
-    copy_struct.len = getpagesize();
+    copy_struct.len = pagesize;
     copy_struct.mode = 0;
 
     /* copy also acks to the kernel waking the stalled thread up
@@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
      */
     if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
         int e = errno;
-        error_report("%s: %s copy host: %p from: %p",
-                     __func__, strerror(e), host, from);
+        error_report("%s: %s copy host: %p from: %p (size: %zd)",
+                     __func__, strerror(e), host, from, pagesize);
 
         return -e;
     }
@@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+                             size_t pagesize)
 {
-    struct uffdio_zeropage zero_struct;
+    trace_postcopy_place_page_zero(host);
 
-    zero_struct.range.start = (uint64_t)(uintptr_t)host;
-    zero_struct.range.len = getpagesize();
-    zero_struct.mode = 0;
+    if (pagesize == getpagesize()) {
+        struct uffdio_zeropage zero_struct;
+        zero_struct.range.start = (uint64_t)(uintptr_t)host;
+        zero_struct.range.len = getpagesize();
+        zero_struct.mode = 0;
 
-    if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
-        int e = errno;
-        error_report("%s: %s zero host: %p",
-                     __func__, strerror(e), host);
+        if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
+            int e = errno;
+            error_report("%s: %s zero host: %p",
+                         __func__, strerror(e), host);
 
-        return -e;
+            return -e;
+        }
+    } else {
+        /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
+        if (!mis->postcopy_tmp_zero_page) {
+            mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+                                               PROT_READ | PROT_WRITE,
+                                               MAP_PRIVATE | MAP_ANONYMOUS,
+                                               -1, 0);
+            if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+                int e = errno;
+                mis->postcopy_tmp_zero_page = NULL;
+                error_report("%s: %s mapping large zero page",
+                             __func__, strerror(e));
+                return -e;
+            }
+            memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+        }
+        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
+                                   pagesize);
     }
 
-    trace_postcopy_place_page_zero(host);
     return 0;
 }
 
@@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
 void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 {
     if (!mis->postcopy_tmp_page) {
-        mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
+        mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
                              PROT_READ | PROT_WRITE, MAP_PRIVATE |
                              MAP_ANONYMOUS, -1, 0);
         if (mis->postcopy_tmp_page == MAP_FAILED) {
@@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
     return -1;
 }
 
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
-                               size_t length)
-{
-    assert(0);
-    return -1;
-}
-
 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 {
     assert(0);
@@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
     return -1;
 }
 
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        size_t pagesize)
 {
     assert(0);
     return -1;
 }
 
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+                        size_t pagesize)
 {
     assert(0);
     return -1;
diff --git a/migration/ram.c b/migration/ram.c
index f289fcddd5..719425b9b8 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void)
     iterations_prev = 0;
 }
 
+/* Returns a summary bitmap of the page sizes of all RAMBlocks;
+ * for VMs with just normal pages this is equivalent to the
+ * host page size.  If it's got some huge pages then it's the OR
+ * of all the different page sizes.
+ */
+uint64_t ram_pagesize_summary(void)
+{
+    RAMBlock *block;
+    uint64_t summary = 0;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        summary |= block->page_size;
+    }
+
+    return summary;
+}
+
 static void migration_bitmap_sync(void)
 {
     RAMBlock *block;
@@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
  *                     offset to point into the middle of a host page
  *                     in which case the remainder of the hostpage is sent.
  *                     Only dirty target pages are sent.
+ *                     Note that the host page size may be a huge page for this
+ *                     block.
  *
  * Returns: Number of pages written.
  *
@@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
                               ram_addr_t dirty_ram_abs)
 {
     int tmppages, pages = 0;
+    size_t pagesize = qemu_ram_pagesize(pss->block);
+
     do {
         tmppages = ram_save_target_page(ms, f, pss, last_stage,
                                         bytes_transferred, dirty_ram_abs);
@@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
         pages += tmppages;
         pss->offset += TARGET_PAGE_SIZE;
         dirty_ram_abs += TARGET_PAGE_SIZE;
-    } while (pss->offset & (qemu_host_page_size - 1));
+    } while (pss->offset & (pagesize - 1));
 
     /* The offset we leave with is the last one we looked at */
     pss->offset -= TARGET_PAGE_SIZE;
@@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
 {
     unsigned long *bitmap;
     unsigned long *unsentmap;
-    unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
+    unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
     unsigned long first = block->offset >> TARGET_PAGE_BITS;
     unsigned long len = block->used_length >> TARGET_PAGE_BITS;
     unsigned long last = first + (len - 1);
     unsigned long run_start;
 
+    if (block->page_size == TARGET_PAGE_SIZE) {
+        /* Easy case - TPS==HPS for a non-huge page RAMBlock */
+        return;
+    }
+
     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
     unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
 
@@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
  * Utility for the outgoing postcopy code.
  *
  * Discard any partially sent host-page size chunks, mark any partially
- * dirty host-page size chunks as all dirty.
+ * dirty host-page size chunks as all dirty.  In this case the host-page
+ * is the host-page for the particular RAMBlock, i.e. it might be a huge page
  *
  * Returns: 0 on success
  */
@@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms)
 {
     struct RAMBlock *block;
 
-    if (qemu_host_page_size == TARGET_PAGE_SIZE) {
-        /* Easy case - TPS==HPS - nothing to be done */
-        return 0;
-    }
-
     /* Easiest way to make sure we don't resume in the middle of a host-page */
     last_seen_block = NULL;
     last_sent_block = NULL;
@@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
         return -EINVAL;
     }
 
-    /* Deal with TPS != HPS */
+    /* Deal with TPS != HPS and huge pages */
     ret = postcopy_chunk_hostpages(ms);
     if (ret) {
         rcu_read_unlock();
@@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis,
 {
     int ret = -1;
 
+    trace_ram_discard_range(block_name, start, length);
+
     rcu_read_lock();
     RAMBlock *rb = qemu_ram_block_by_name(block_name);
 
@@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis,
         goto err;
     }
 
-    uint8_t *host_startaddr = rb->host + start;
-
-    if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
-        error_report("ram_discard_range: Unaligned start address: %p",
-                     host_startaddr);
-        goto err;
-    }
-
-    if ((start + length) <= rb->used_length) {
-        uint8_t *host_endaddr = host_startaddr + length;
-        if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
-            error_report("ram_discard_range: Unaligned end address: %p",
-                         host_endaddr);
-            goto err;
-        }
-        ret = postcopy_ram_discard_range(mis, host_startaddr, length);
-    } else {
-        error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
-                     "/%zx/" RAM_ADDR_FMT")",
-                     block_name, start, length, rb->used_length);
-    }
+    ret = ram_block_discard_range(rb, start, length);
 
 err:
     rcu_read_unlock();
@@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
         qemu_put_byte(f, strlen(block->idstr));
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
         qemu_put_be64(f, block->used_length);
+        if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
+            qemu_put_be64(f, block->page_size);
+        }
     }
 
     rcu_read_unlock();
@@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f)
 {
     int flags = 0, ret = 0;
     bool place_needed = false;
-    bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
+    bool matching_page_sizes = false;
     MigrationIncomingState *mis = migration_incoming_get_current();
     /* Temporary page that is later 'placed' */
     void *postcopy_host_page = postcopy_get_tmp_page(mis);
@@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f)
         void *host = NULL;
         void *page_buffer = NULL;
         void *place_source = NULL;
+        RAMBlock *block = NULL;
         uint8_t ch;
 
         addr = qemu_get_be64(f);
@@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f)
         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
         place_needed = false;
         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
-            RAMBlock *block = ram_block_from_stream(f, flags);
+            block = ram_block_from_stream(f, flags);
 
             host = host_from_ram_block_offset(block, addr);
             if (!host) {
@@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f)
                 ret = -EINVAL;
                 break;
             }
+            matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
             /*
-             * Postcopy requires that we place whole host pages atomically.
+             * Postcopy requires that we place whole host pages atomically;
+             * these may be huge pages for RAMBlocks that are backed by
+             * hugetlbfs.
              * To make it atomic, the data is read into a temporary page
              * that's moved into place later.
              * The migration protocol uses,  possibly smaller, target-pages
@@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f)
              * of a host page in order.
              */
             page_buffer = postcopy_host_page +
-                          ((uintptr_t)host & ~qemu_host_page_mask);
+                          ((uintptr_t)host & (block->page_size - 1));
             /* If all TP are zero then we can optimise the place */
-            if (!((uintptr_t)host & ~qemu_host_page_mask)) {
+            if (!((uintptr_t)host & (block->page_size - 1))) {
                 all_zero = true;
             } else {
                 /* not the 1st TP within the HP */
@@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f)
              * page
              */
             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
-                                     ~qemu_host_page_mask) == 0;
+                                     (block->page_size - 1)) == 0;
             place_source = postcopy_host_page;
         }
         last_host = host;
@@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f)
 
         if (place_needed) {
             /* This gets called at the last target page in the host page */
+            void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
+
             if (all_zero) {
-                ret = postcopy_place_page_zero(mis,
-                                               host + TARGET_PAGE_SIZE -
-                                               qemu_host_page_size);
+                ret = postcopy_place_page_zero(mis, place_dest,
+                                               block->page_size);
             } else {
-                ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
-                                               qemu_host_page_size,
-                                               place_source);
+                ret = postcopy_place_page(mis, place_dest,
+                                          place_source, block->page_size);
             }
         }
         if (!ret) {
@@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
      * be atomic
      */
     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
+    /* ADVISE is earlier, it shows the source has the postcopy capability on */
+    bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
 
     seq_iter++;
 
@@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                             error_report_err(local_err);
                         }
                     }
+                    /* For postcopy we need to check hugepage sizes match */
+                    if (postcopy_advised &&
+                        block->page_size != qemu_host_page_size) {
+                        uint64_t remote_page_size = qemu_get_be64(f);
+                        if (remote_page_size != block->page_size) {
+                            error_report("Mismatched RAM page size %s "
+                                         "(local) %zd != %" PRId64,
+                                         id, block->page_size,
+                                         remote_page_size);
+                            ret = -EINVAL;
+                        }
+                    }
                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
                                           block->idstr);
                 } else {
diff --git a/migration/savevm.c b/migration/savevm.c
index 5ecd264134..3b19a4a274 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
 
                 return -1;
             }
+            g_free(id);
 
             se->compat = g_new0(CompatEntry, 1);
             pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
@@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
 void qemu_savevm_send_postcopy_advise(QEMUFile *f)
 {
     uint64_t tmp[2];
-    tmp[0] = cpu_to_be64(getpagesize());
+    tmp[0] = cpu_to_be64(ram_pagesize_summary());
     tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
 
     trace_qemu_savevm_send_postcopy_advise();
@@ -1276,6 +1277,11 @@ done:
         status = MIGRATION_STATUS_COMPLETED;
     }
     migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
+
+    /* f is outer parameter, it should not stay in global migration state after
+     * this function finished */
+    ms->to_dst_file = NULL;
+
     return ret;
 }
 
@@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
 {
     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
-    uint64_t remote_hps, remote_tps;
+    uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
 
     trace_loadvm_postcopy_handle_advise();
     if (ps != POSTCOPY_INCOMING_NONE) {
@@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
         return -1;
     }
 
-    remote_hps = qemu_get_be64(mis->from_src_file);
-    if (remote_hps != getpagesize())  {
+    remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
+    local_pagesize_summary = ram_pagesize_summary();
+
+    if (remote_pagesize_summary != local_pagesize_summary)  {
         /*
-         * Some combinations of mismatch are probably possible but it gets
-         * a bit more complicated.  In particular we need to place whole
-         * host pages on the dest at once, and we need to ensure that we
-         * handle dirtying to make sure we never end up sending part of
-         * a hostpage on it's own.
+         * This detects two potential causes of mismatch:
+         *   a) A mismatch in host page sizes
+         *      Some combinations of mismatch are probably possible but it gets
+         *      a bit more complicated.  In particular we need to place whole
+         *      host pages on the dest at once, and we need to ensure that we
+         *      handle dirtying to make sure we never end up sending part of
+         *      a hostpage on it's own.
+         *   b) The use of different huge page sizes on source/destination
+         *      a more fine grain test is performed during RAM block migration
+         *      but this test here causes a nice early clear failure, and
+         *      also fails when passed to an older qemu that doesn't
+         *      do huge pages.
          */
-        error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
-                     (int)remote_hps, getpagesize());
+        error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
+                                                             " d=%" PRIx64 ")",
+                     remote_pagesize_summary, local_pagesize_summary);
         return -1;
     }
 
diff --git a/migration/trace-events b/migration/trace-events
index fa660e35b1..7372ce2a51 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
 migration_throttle(void) ""
+ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
 ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
 ram_postcopy_send_discard_bitmap(void) ""
 ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"
@@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
 # migration/postcopy-ram.c
 postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
 postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
-postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
 postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
 postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
 postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
diff --git a/migration/vmstate.c b/migration/vmstate.c
index b4d8ae982a..78b3cd48e7 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field)
     return size;
 }
 
-static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
+static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque)
 {
-    void *base_addr = opaque + field->offset;
-
-    if (field->flags & VMS_POINTER) {
-        if (alloc && (field->flags & VMS_ALLOC)) {
-            gsize size = 0;
-            if (field->flags & VMS_VBUFFER) {
-                size = vmstate_size(opaque, field);
-            } else {
-                int n_elems = vmstate_n_elems(opaque, field);
-                if (n_elems) {
-                    size = n_elems * field->size;
-                }
-            }
-            if (size) {
-                *(void **)base_addr = g_malloc(size);
-            }
+    if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) {
+        gsize size = vmstate_size(opaque, field);
+        size *= vmstate_n_elems(opaque, field);
+        if (size) {
+            *(void **)ptr = g_malloc(size);
         }
-        base_addr = *(void **)base_addr;
     }
-
-    return base_addr;
 }
 
 int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
@@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
              field->field_exists(opaque, version_id)) ||
             (!field->field_exists &&
              field->version_id <= version_id)) {
-            void *base_addr = vmstate_base_addr(opaque, field, true);
+            void *first_elem = opaque + field->offset;
             int i, n_elems = vmstate_n_elems(opaque, field);
             int size = vmstate_size(opaque, field);
 
+            vmstate_handle_alloc(first_elem, field, opaque);
+            if (field->flags & VMS_POINTER) {
+                first_elem = *(void **)first_elem;
+                assert(first_elem  || !n_elems);
+            }
             for (i = 0; i < n_elems; i++) {
-                void *addr = base_addr + size * i;
+                void *curr_elem = first_elem + size * i;
 
                 if (field->flags & VMS_ARRAY_OF_POINTER) {
-                    addr = *(void **)addr;
+                    curr_elem = *(void **)curr_elem;
                 }
-                if (field->flags & VMS_STRUCT) {
-                    ret = vmstate_load_state(f, field->vmsd, addr,
+                if (!curr_elem) {
+                    /* if null pointer check placeholder and do not follow */
+                    assert(field->flags & VMS_ARRAY_OF_POINTER);
+                    ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL);
+                } else if (field->flags & VMS_STRUCT) {
+                    ret = vmstate_load_state(f, field->vmsd, curr_elem,
                                              field->vmsd->version_id);
                 } else {
-                   ret = field->info->get(f, addr, size, field);
+                    ret = field->info->get(f, curr_elem, size, field);
                 }
                 if (ret >= 0) {
                     ret = qemu_file_get_error(f);
@@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
     while (field->name) {
         if (!field->field_exists ||
             field->field_exists(opaque, vmsd->version_id)) {
-            void *base_addr = vmstate_base_addr(opaque, field, false);
+            void *first_elem = opaque + field->offset;
             int i, n_elems = vmstate_n_elems(opaque, field);
             int size = vmstate_size(opaque, field);
             int64_t old_offset, written_bytes;
             QJSON *vmdesc_loop = vmdesc;
 
             trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
+            if (field->flags & VMS_POINTER) {
+                first_elem = *(void **)first_elem;
+                assert(first_elem  || !n_elems);
+            }
             for (i = 0; i < n_elems; i++) {
-                void *addr = base_addr + size * i;
+                void *curr_elem = first_elem + size * i;
 
                 vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems);
                 old_offset = qemu_ftell_fast(f);
-
                 if (field->flags & VMS_ARRAY_OF_POINTER) {
-                    addr = *(void **)addr;
+                    assert(curr_elem);
+                    curr_elem = *(void **)curr_elem;
                 }
-                if (field->flags & VMS_STRUCT) {
-                    vmstate_save_state(f, field->vmsd, addr, vmdesc_loop);
+                if (!curr_elem) {
+                    /* if null pointer write placeholder and do not follow */
+                    assert(field->flags & VMS_ARRAY_OF_POINTER);
+                    vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL);
+                } else if (field->flags & VMS_STRUCT) {
+                    vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop);
                 } else {
-                    field->info->put(f, addr, size, field, vmdesc_loop);
+                    field->info->put(f, curr_elem, size, field, vmdesc_loop);
                 }
 
                 written_bytes = qemu_ftell_fast(f) - old_offset;
@@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = {
     .put  = put_uint64,
 };
 
+static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field)
+
+{
+    if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) {
+        return  0;
+    }
+    error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER");
+    return -EINVAL;
+}
+
+static int put_nullptr(QEMUFile *f, void *pv, size_t size,
+                        VMStateField *field, QJSON *vmdesc)
+
+{
+    if (pv == NULL) {
+        qemu_put_byte(f, VMS_NULLPTR_MARKER);
+        return 0;
+    }
+    error_report("vmstate: put_nullptr must be called with pv == NULL");
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_nullptr = {
+    .name = "uint64",
+    .get  = get_nullptr,
+    .put  = put_nullptr,
+};
+
 /* 64 bit unsigned int. See that the received value is the same than the one
    in the field */
 
diff --git a/monitor.c b/monitor.c
index f8f4a07cfb..b68944d93c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -984,8 +984,10 @@ static void qmp_unregister_commands_hack(void)
 #ifndef TARGET_ARM
     qmp_unregister_command("query-gic-capabilities");
 #endif
-#if !defined(TARGET_S390X)
+#if !defined(TARGET_S390X) && !defined(TARGET_I386)
     qmp_unregister_command("query-cpu-model-expansion");
+#endif
+#if !defined(TARGET_S390X)
     qmp_unregister_command("query-cpu-model-baseline");
     qmp_unregister_command("query-cpu-model-comparison");
 #endif
diff --git a/nbd/server.c b/nbd/server.c
index ac92fa0727..924a1fe2db 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -891,9 +891,21 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
 {
     BlockBackend *blk;
     NBDExport *exp = g_malloc0(sizeof(NBDExport));
+    uint64_t perm;
+    int ret;
 
-    blk = blk_new();
-    blk_insert_bs(blk, bs);
+    /* Don't allow resize while the NBD server is running, otherwise we don't
+     * care what happens with the node. */
+    perm = BLK_PERM_CONSISTENT_READ;
+    if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
+        perm |= BLK_PERM_WRITE;
+    }
+    blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                        BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        goto fail;
+    }
     blk_set_enable_write_cache(blk, !writethrough);
 
     exp->refcount = 1;
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 77b8110f8c..e7e63408a1 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,
 
     qemu_chr_fe_disconnect(&s->chr);
 
-    return FALSE;
+    return TRUE;
+}
+
+static void net_vhost_user_event(void *opaque, int event);
+
+static void chr_closed_bh(void *opaque)
+{
+    const char *name = opaque;
+    NetClientState *ncs[MAX_QUEUE_NUM];
+    VhostUserState *s;
+    Error *err = NULL;
+    int queues;
+
+    queues = qemu_find_net_clients_except(name, ncs,
+                                          NET_CLIENT_DRIVER_NIC,
+                                          MAX_QUEUE_NUM);
+    assert(queues < MAX_QUEUE_NUM);
+
+    s = DO_UPCAST(VhostUserState, nc, ncs[0]);
+
+    qmp_set_link(name, false, &err);
+    vhost_user_stop(queues, ncs);
+
+    qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event,
+                             opaque, NULL, true);
+
+    if (err) {
+        error_report_err(err);
+    }
 }
 
 static void net_vhost_user_event(void *opaque, int event)
@@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int event)
     trace_vhost_user_event(chr->label, event);
     switch (event) {
     case CHR_EVENT_OPENED:
-        s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
-                                         net_vhost_user_watch, s);
         if (vhost_user_start(queues, ncs, &s->chr) < 0) {
             qemu_chr_fe_disconnect(&s->chr);
             return;
         }
+        s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
+                                         net_vhost_user_watch, s);
         qmp_set_link(name, true, &err);
         s->started = true;
         break;
     case CHR_EVENT_CLOSED:
-        qmp_set_link(name, false, &err);
-        vhost_user_stop(queues, ncs);
-        g_source_remove(s->watch);
-        s->watch = 0;
+        /* a close event may happen during a read/write, but vhost
+         * code assumes the vhost_dev remains setup, so delay the
+         * stop & clear to idle.
+         * FIXME: better handle failure in vhost code, remove bh
+         */
+        if (s->watch) {
+            AioContext *ctx = qemu_get_current_aio_context();
+
+            g_source_remove(s->watch);
+            s->watch = 0;
+            qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL,
+                                     NULL, NULL, false);
+
+            aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque);
+        }
         break;
     }
 
diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin
index 229b5af986..18666c9f2f 100644
--- a/pc-bios/bios-256k.bin
+++ b/pc-bios/bios-256k.bin
Binary files differdiff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index 9a9b0f0106..a394411fe5 100644
--- a/pc-bios/bios.bin
+++ b/pc-bios/bios.bin
Binary files differdiff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 95f1167261..4869c9dcf3 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
Binary files differdiff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 675968ea62..aada55e094 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
Binary files differdiff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index d4b95326fe..cf466f6a4c 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
Binary files differdiff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index cf05bf0be2..2a4adfa654 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differdiff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 611102e3ef..b21c877b53 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -724,11 +724,17 @@ static void zipl_load_vscsi(void)
 
 void zipl_load(void)
 {
-    if (virtio_get_device()->is_cdrom) {
+    VDev *vdev = virtio_get_device();
+
+    if (vdev->is_cdrom) {
         ipl_iso_el_torito();
         panic("\n! Cannot IPL this ISO image !\n");
     }
 
+    if (virtio_get_device_type() == VIRTIO_ID_NET) {
+        jump_to_IPL_code(vdev->netboot_start_addr);
+    }
+
     ipl_scsi();
 
     switch (virtio_get_device_type()) {
diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h
index 86abc56a90..890aed9ece 100644
--- a/pc-bios/s390-ccw/iplb.h
+++ b/pc-bios/s390-ccw/iplb.h
@@ -13,7 +13,8 @@
 #define IPLB_H
 
 struct IplBlockCcw {
-    uint8_t  reserved0[85];
+    uint64_t netboot_start_addr;
+    uint8_t  reserved0[77];
     uint8_t  ssid;
     uint16_t devno;
     uint8_t  vm_flags;
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 345b848752..0946766d86 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -53,6 +53,12 @@ static bool find_dev(Schib *schib, int dev_no)
         if (!virtio_is_supported(blk_schid)) {
             continue;
         }
+        /* Skip net devices since no IPLB is created and therefore no
+         * no network bootloader has been loaded
+         */
+        if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) {
+            continue;
+        }
         if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) {
             return true;
         }
@@ -67,6 +73,7 @@ static void virtio_setup(void)
     int ssid;
     bool found = false;
     uint16_t dev_no;
+    VDev *vdev = virtio_get_device();
 
     /*
      * We unconditionally enable mss support. In every sane configuration,
@@ -85,9 +92,6 @@ static void virtio_setup(void)
             found = find_dev(&schib, dev_no);
             break;
         case S390_IPL_TYPE_QEMU_SCSI:
-        {
-            VDev *vdev = virtio_get_device();
-
             vdev->scsi_device_selected = true;
             vdev->selected_scsi_device.channel = iplb.scsi.channel;
             vdev->selected_scsi_device.target = iplb.scsi.target;
@@ -95,7 +99,6 @@ static void virtio_setup(void)
             blk_schid.ssid = iplb.scsi.ssid & 0x3;
             found = find_dev(&schib, iplb.scsi.devno);
             break;
-        }
         default:
             panic("List-directed IPL not supported yet!\n");
         }
@@ -111,9 +114,14 @@ static void virtio_setup(void)
 
     IPL_assert(found, "No virtio device found");
 
-    virtio_setup_device(blk_schid);
+    if (virtio_get_device_type() == VIRTIO_ID_NET) {
+        sclp_print("Network boot device detected\n");
+        vdev->netboot_start_addr = iplb.ccw.netboot_start_addr;
+    } else {
+        virtio_setup_device(blk_schid);
 
-    IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
+        IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
+    }
 }
 
 int main(void)
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index b333734955..6ee93d56db 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -585,6 +585,7 @@ bool virtio_is_supported(SubChannelId schid)
         switch (vdev.senseid.cu_model) {
         case VIRTIO_ID_BLOCK:
         case VIRTIO_ID_SCSI:
+        case VIRTIO_ID_NET:
             return true;
         }
     }
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index eb35ea5faf..3388a423e5 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -276,6 +276,7 @@ struct VDev {
     uint8_t scsi_dev_heads;
     bool scsi_device_selected;
     ScsiDevice selected_scsi_device;
+    uint64_t netboot_start_addr;
 };
 typedef struct VDev VDev;
 
diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin
index 9dadce2345..e6c42bd3c3 100644
--- a/pc-bios/vgabios-cirrus.bin
+++ b/pc-bios/vgabios-cirrus.bin
Binary files differdiff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin
index a89725c81c..915eba7c81 100644
--- a/pc-bios/vgabios-qxl.bin
+++ b/pc-bios/vgabios-qxl.bin
Binary files differdiff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin
index ea041412a2..40eca8c6d1 100644
--- a/pc-bios/vgabios-stdvga.bin
+++ b/pc-bios/vgabios-stdvga.bin
Binary files differdiff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin
index 71e22fc868..8b3abfa003 100644
--- a/pc-bios/vgabios-virtio.bin
+++ b/pc-bios/vgabios-virtio.bin
Binary files differdiff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin
index ad239cbfe8..6a90b945bd 100644
--- a/pc-bios/vgabios-vmware.bin
+++ b/pc-bios/vgabios-vmware.bin
Binary files differdiff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin
index 9947c2c26f..d3ed89d94b 100644
--- a/pc-bios/vgabios.bin
+++ b/pc-bios/vgabios.bin
Binary files differdiff --git a/qapi-schema.json b/qapi-schema.json
index 150ee98e9e..fb39d1dc11 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4274,6 +4274,15 @@
 #        migration-safe, but allows tooling to get an insight and work with
 #        model details.
 #
+# Note: When a non-migration-safe CPU model is expanded in static mode, some
+# features enabled by the CPU model may be omitted, because they can't be
+# implemented by a static CPU model definition (e.g. cache info passthrough and
+# PMU passthrough in x86). If you need an accurate representation of the
+# features enabled by a non-migration-safe CPU model, use @full. If you need a
+# static representation that will keep ABI compatibility even when changing QEMU
+# version or machine-type, use @static (but keep in mind that some features may
+# be omitted).
+#
 # Since: 2.8.0
 ##
 { 'enum': 'CpuModelExpansionType',
@@ -5990,6 +5999,79 @@
 { 'command': 'xen-load-devices-state', 'data': {'filename': 'str'} }
 
 ##
+# @xen-set-replication:
+#
+# Enable or disable replication.
+#
+# @enable: true to enable, false to disable.
+#
+# @primary: true for primary or false for secondary.
+#
+# @failover: #optional true to do failover, false to stop. but cannot be
+#            specified if 'enable' is true. default value is false.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "xen-set-replication",
+#      "arguments": {"enable": true, "primary": false} }
+# <- { "return": {} }
+#
+# Since: 2.9
+##
+{ 'command': 'xen-set-replication',
+  'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' } }
+
+##
+# @ReplicationStatus:
+#
+# The result format for 'query-xen-replication-status'.
+#
+# @error: true if an error happened, false if replication is normal.
+#
+# @desc: #optional the human readable error description string, when
+#        @error is 'true'.
+#
+# Since: 2.9
+##
+{ 'struct': 'ReplicationStatus',
+  'data': { 'error': 'bool', '*desc': 'str' } }
+
+##
+# @query-xen-replication-status:
+#
+# Query replication status while the vm is running.
+#
+# Returns: A @ReplicationResult object showing the status.
+#
+# Example:
+#
+# -> { "execute": "query-xen-replication-status" }
+# <- { "return": { "error": false } }
+#
+# Since: 2.9
+##
+{ 'command': 'query-xen-replication-status',
+  'returns': 'ReplicationStatus' }
+
+##
+# @xen-colo-do-checkpoint:
+#
+# Xen uses this command to notify replication to trigger a checkpoint.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "xen-colo-do-checkpoint" }
+# <- { "return": {} }
+#
+# Since: 2.9
+##
+{ 'command': 'xen-colo-do-checkpoint' }
+
+##
 # @GICCapability:
 #
 # The struct describes capability for a specific GIC (Generic
@@ -6115,3 +6197,23 @@
 #
 ##
 { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] }
+
+##
+# @GuidInfo:
+#
+# GUID information.
+#
+# @guid: the globally unique identifier
+#
+# Since: 2.9
+##
+{ 'struct': 'GuidInfo', 'data': {'guid': 'str'} }
+
+##
+# @query-vm-generation-id:
+#
+# Show Virtual Machine Generation ID
+#
+# Since 2.9
+##
+{ 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 5f82d35fab..bc0ccd615c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1304,6 +1304,11 @@
 #
 # @speed:  #optional the maximum speed, in bytes per second
 #
+# @filter-node-name: #optional the node name that should be assigned to the
+#                    filter driver that the commit job inserts into the graph
+#                    above @top. If this option is not given, a node name is
+#                    autogenerated. (Since: 2.9)
+#
 # Returns: Nothing on success
 #          If commit or stream is already active on this device, DeviceInUse
 #          If @device does not exist, DeviceNotFound
@@ -1323,7 +1328,8 @@
 ##
 { 'command': 'block-commit',
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str',
-            '*backing-file': 'str', '*speed': 'int' } }
+            '*backing-file': 'str', '*speed': 'int',
+            '*filter-node-name': 'str' } }
 
 ##
 # @drive-backup:
@@ -1671,6 +1677,11 @@
 #                   default 'report' (no limitations, since this applies to
 #                   a different block device than @device).
 #
+# @filter-node-name: #optional the node name that should be assigned to the
+#                    filter driver that the mirror job inserts into the graph
+#                    above @device. If this option is not given, a node name is
+#                    autogenerated. (Since: 2.9)
+#
 # Returns: nothing on success.
 #
 # Since: 2.6
@@ -1690,7 +1701,8 @@
             'sync': 'MirrorSyncMode',
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
-            '*on-target-error': 'BlockdevOnError' } }
+            '*on-target-error': 'BlockdevOnError',
+            '*filter-node-name': 'str' } }
 
 ##
 # @block_set_io_throttle:
@@ -2111,6 +2123,7 @@
 # @replication: Since 2.8
 # @ssh: Since 2.8
 # @iscsi: Since 2.9
+# @rbd: Since 2.9
 #
 # Since: 2.0
 ##
@@ -2119,7 +2132,7 @@
             'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
             'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
             'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed',
-            'quorum', 'raw', 'replication', 'ssh', 'vdi', 'vhdx', 'vmdk',
+            'quorum', 'raw', 'rbd', 'replication', 'ssh', 'vdi', 'vhdx', 'vmdk',
             'vpc', 'vvfat' ] }
 
 ##
@@ -2625,29 +2638,29 @@
 ##
 # @BlockdevOptionsIscsi:
 #
-# @transport        The iscsi transport type
+# @transport:       The iscsi transport type
 #
-# @portal           The address of the iscsi portal
+# @portal:          The address of the iscsi portal
 #
-# @target           The target iqn name
+# @target:          The target iqn name
 #
-# @lun              #optional LUN to connect to. Defaults to 0.
+# @lun:             #optional LUN to connect to. Defaults to 0.
 #
-# @user             #optional User name to log in with. If omitted, no CHAP
+# @user:            #optional User name to log in with. If omitted, no CHAP
 #                   authentication is performed.
 #
-# @password-secret  #optional The ID of a QCryptoSecret object providing
+# @password-secret: #optional The ID of a QCryptoSecret object providing
 #                   the password for the login. This option is required if
 #                   @user is specified.
 #
-# @initiator-name   #optional The iqn name we want to identify to the target
+# @initiator-name:  #optional The iqn name we want to identify to the target
 #                   as. If this option is not specified, an initiator name is
 #                   generated automatically.
 #
-# @header-digest    #optional The desired header digest. Defaults to
+# @header-digest:   #optional The desired header digest. Defaults to
 #                   none-crc32c.
 #
-# @timeout          #optional Timeout in seconds after which a request will
+# @timeout:         #optional Timeout in seconds after which a request will
 #                   timeout. 0 means no timeout and is the default.
 #
 # Driver specific block device options for iscsi
@@ -2665,6 +2678,63 @@
             '*header-digest': 'IscsiHeaderDigest',
             '*timeout': 'int' } }
 
+
+##
+# @RbdAuthSupport:
+#
+# An enumeration of RBD auth support
+#
+# Since: 2.9
+##
+{ 'enum': 'RbdAuthSupport',
+  'data': [ 'cephx', 'none' ] }
+
+
+##
+# @RbdAuthMethod:
+#
+# An enumeration of rados auth_supported types
+#
+# Since: 2.9
+##
+{ 'struct': 'RbdAuthMethod',
+  'data': { 'auth': 'RbdAuthSupport' } }
+
+##
+# @BlockdevOptionsRbd:
+#
+# @pool:               Ceph pool name.
+#
+# @image:              Image name in the Ceph pool.
+#
+# @conf:               #optional path to Ceph configuration file.  Values
+#                      in the configuration file will be overridden by
+#                      options specified via QAPI.
+#
+# @snapshot:           #optional Ceph snapshot name.
+#
+# @user:               #optional Ceph id name.
+#
+# @server:             #optional Monitor host address and port.  This maps
+#                      to the "mon_host" Ceph option.
+#
+# @auth-supported:     #optional Authentication supported.
+#
+# @password-secret:    #optional The ID of a QCryptoSecret object providing
+#                      the password for the login.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsRbd',
+  'data': { 'pool': 'str',
+            'image': 'str',
+            '*conf': 'str',
+            '*snapshot': 'str',
+            '*user': 'str',
+            '*server': ['InetSocketAddress'],
+            '*auth-supported': ['RbdAuthMethod'],
+            '*password-secret': 'str' } }
+
 ##
 # @ReplicationMode:
 #
@@ -2863,7 +2933,7 @@
       'qed':        'BlockdevOptionsGenericCOWFormat',
       'quorum':     'BlockdevOptionsQuorum',
       'raw':        'BlockdevOptionsRaw',
-# TODO rbd: Wait for structured options
+      'rbd':        'BlockdevOptionsRbd',
       'replication':'BlockdevOptionsReplication',
 # TODO sheepdog: Wait for structured options
       'ssh':        'BlockdevOptionsSsh',
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 549f45f066..5f2fcdfc45 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -29,7 +29,6 @@
 #include "qemu/error-report.h"
 #include "qemu/help_option.h"
 #include "sysemu/block-backend.h"
-#include "migration/migration.h"
 
 /*
  * Aliases were a bad idea from the start.  Let's keep them
@@ -579,14 +578,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
         return NULL;
     }
 
-    if (only_migratable) {
-        if (dc->vmsd->unmigratable) {
-            error_setg(errp, "Device %s is not migratable, but "
-                       "--only-migratable was specified", driver);
-            return NULL;
-        }
-    }
-
     /* find bus */
     path = qemu_opt_get(opts, "bus");
     if (path != NULL) {
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index f054599a91..9c9702cc62 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -40,9 +40,9 @@ STEXI
 ETEXI
 
 DEF("convert", img_convert,
-    "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
+    "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
 STEXI
-@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
 ETEXI
 
 DEF("dd", img_dd,
diff --git a/qemu-img.c b/qemu-img.c
index df3aefd35a..98b836b030 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void)
            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
            "       hiding corruption that has already occurred.\n"
            "\n"
+           "Parameters to convert subcommand:\n"
+           "  '-m' specifies how many coroutines work in parallel during the convert\n"
+           "       process (defaults to 8)\n"
+           "  '-W' allow to write to the target out of order rather than sequential\n"
+           "\n"
            "Parameters to snapshot subcommand:\n"
            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
            "  '-a' applies a snapshot (revert disk to saved state)\n"
@@ -809,6 +814,8 @@ static void run_block_job(BlockJob *job, Error **errp)
 {
     AioContext *aio_context = blk_get_aio_context(job->blk);
 
+    /* FIXME In error cases, the job simply goes away and we access a dangling
+     * pointer below. */
     aio_context_acquire(aio_context);
     do {
         aio_poll(aio_context, true);
@@ -830,6 +837,7 @@ static int img_commit(int argc, char **argv)
     const char *filename, *fmt, *cache, *base;
     BlockBackend *blk;
     BlockDriverState *bs, *base_bs;
+    BlockJob *job;
     bool progress = false, quiet = false, drop = false;
     bool writethrough;
     Error *local_err = NULL;
@@ -950,8 +958,8 @@ static int img_commit(int argc, char **argv)
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
     commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
-                        BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
-                        &local_err, false);
+                        BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
+                        &cbi, &local_err, false);
     aio_context_release(aio_context);
     if (local_err) {
         goto done;
@@ -965,7 +973,8 @@ static int img_commit(int argc, char **argv)
         bdrv_ref(bs);
     }
 
-    run_block_job(bs->job, &local_err);
+    job = block_job_get("commit");
+    run_block_job(job, &local_err);
     if (local_err) {
         goto unref_backing;
     }
@@ -1462,48 +1471,61 @@ enum ImgConvertBlockStatus {
     BLK_BACKING_FILE,
 };
 
+#define MAX_COROUTINES 16
+
 typedef struct ImgConvertState {
     BlockBackend **src;
     int64_t *src_sectors;
-    int src_cur, src_num;
-    int64_t src_cur_offset;
+    int src_num;
     int64_t total_sectors;
     int64_t allocated_sectors;
+    int64_t allocated_done;
+    int64_t sector_num;
+    int64_t wr_offs;
     enum ImgConvertBlockStatus status;
     int64_t sector_next_status;
     BlockBackend *target;
     bool has_zero_init;
     bool compressed;
     bool target_has_backing;
+    bool wr_in_order;
     int min_sparse;
     size_t cluster_sectors;
     size_t buf_sectors;
+    int num_coroutines;
+    int running_coroutines;
+    Coroutine *co[MAX_COROUTINES];
+    int64_t wait_sector_num[MAX_COROUTINES];
+    CoMutex lock;
+    int ret;
 } ImgConvertState;
 
-static void convert_select_part(ImgConvertState *s, int64_t sector_num)
+static void convert_select_part(ImgConvertState *s, int64_t sector_num,
+                                int *src_cur, int64_t *src_cur_offset)
 {
-    assert(sector_num >= s->src_cur_offset);
-    while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
-        s->src_cur_offset += s->src_sectors[s->src_cur];
-        s->src_cur++;
-        assert(s->src_cur < s->src_num);
+    *src_cur = 0;
+    *src_cur_offset = 0;
+    while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
+        *src_cur_offset += s->src_sectors[*src_cur];
+        (*src_cur)++;
+        assert(*src_cur < s->src_num);
     }
 }
 
 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
 {
-    int64_t ret;
-    int n;
+    int64_t ret, src_cur_offset;
+    int n, src_cur;
 
-    convert_select_part(s, sector_num);
+    convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
 
     assert(s->total_sectors > sector_num);
     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
 
     if (s->sector_next_status <= sector_num) {
         BlockDriverState *file;
-        ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
-                                    sector_num - s->src_cur_offset,
+        ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
+                                    sector_num - src_cur_offset,
                                     n, &n, &file);
         if (ret < 0) {
             return ret;
@@ -1519,8 +1541,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
             /* Check block status of the backing file chain to avoid
              * needlessly reading zeroes and limiting the iteration to the
              * buffer size */
-            ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL,
-                                              sector_num - s->src_cur_offset,
+            ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
+                                              sector_num - src_cur_offset,
                                               n, &n, &file);
             if (ret < 0) {
                 return ret;
@@ -1558,28 +1580,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
     return n;
 }
 
-static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
-                        uint8_t *buf)
+static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
+                                        int nb_sectors, uint8_t *buf)
 {
-    int n;
-    int ret;
+    int n, ret;
+    QEMUIOVector qiov;
+    struct iovec iov;
 
     assert(nb_sectors <= s->buf_sectors);
     while (nb_sectors > 0) {
         BlockBackend *blk;
-        int64_t bs_sectors;
+        int src_cur;
+        int64_t bs_sectors, src_cur_offset;
 
         /* In the case of compression with multiple source files, we can get a
          * nb_sectors that spreads into the next part. So we must be able to
          * read across multiple BDSes for one convert_read() call. */
-        convert_select_part(s, sector_num);
-        blk = s->src[s->src_cur];
-        bs_sectors = s->src_sectors[s->src_cur];
-
-        n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
-        ret = blk_pread(blk,
-                        (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
-                        buf, n << BDRV_SECTOR_BITS);
+        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+        blk = s->src[src_cur];
+        bs_sectors = s->src_sectors[src_cur];
+
+        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
+        iov.iov_base = buf;
+        iov.iov_len = n << BDRV_SECTOR_BITS;
+        qemu_iovec_init_external(&qiov, &iov, 1);
+
+        ret = blk_co_preadv(
+                blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS,
+                n << BDRV_SECTOR_BITS, &qiov, 0);
         if (ret < 0) {
             return ret;
         }
@@ -1592,15 +1620,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
     return 0;
 }
 
-static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
-                         const uint8_t *buf)
+
+static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
+                                         int nb_sectors, uint8_t *buf,
+                                         enum ImgConvertBlockStatus status)
 {
     int ret;
+    QEMUIOVector qiov;
+    struct iovec iov;
 
     while (nb_sectors > 0) {
         int n = nb_sectors;
-
-        switch (s->status) {
+        switch (status) {
         case BLK_BACKING_FILE:
             /* If we have a backing file, leave clusters unallocated that are
              * unallocated in the source image, so that the backing file is
@@ -1621,9 +1652,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
                     break;
                 }
 
-                ret = blk_pwrite_compressed(s->target,
-                                            sector_num << BDRV_SECTOR_BITS,
-                                            buf, n << BDRV_SECTOR_BITS);
+                iov.iov_base = buf;
+                iov.iov_len = n << BDRV_SECTOR_BITS;
+                qemu_iovec_init_external(&qiov, &iov, 1);
+
+                ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+                                     n << BDRV_SECTOR_BITS, &qiov,
+                                     BDRV_REQ_WRITE_COMPRESSED);
                 if (ret < 0) {
                     return ret;
                 }
@@ -1636,8 +1671,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
             if (!s->min_sparse ||
                 is_allocated_sectors_min(buf, n, &n, s->min_sparse))
             {
-                ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
-                                 buf, n << BDRV_SECTOR_BITS, 0);
+                iov.iov_base = buf;
+                iov.iov_len = n << BDRV_SECTOR_BITS;
+                qemu_iovec_init_external(&qiov, &iov, 1);
+
+                ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+                                     n << BDRV_SECTOR_BITS, &qiov, 0);
                 if (ret < 0) {
                     return ret;
                 }
@@ -1649,8 +1688,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
             if (s->has_zero_init) {
                 break;
             }
-            ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
-                                    n << BDRV_SECTOR_BITS, 0);
+            ret = blk_co_pwrite_zeroes(s->target,
+                                       sector_num << BDRV_SECTOR_BITS,
+                                       n << BDRV_SECTOR_BITS, 0);
             if (ret < 0) {
                 return ret;
             }
@@ -1665,12 +1705,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
     return 0;
 }
 
-static int convert_do_copy(ImgConvertState *s)
+static void coroutine_fn convert_co_do_copy(void *opaque)
 {
+    ImgConvertState *s = opaque;
     uint8_t *buf = NULL;
-    int64_t sector_num, allocated_done;
-    int ret;
-    int n;
+    int ret, i;
+    int index = -1;
+
+    for (i = 0; i < s->num_coroutines; i++) {
+        if (s->co[i] == qemu_coroutine_self()) {
+            index = i;
+            break;
+        }
+    }
+    assert(index >= 0);
+
+    s->running_coroutines++;
+    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
+
+    while (1) {
+        int n;
+        int64_t sector_num;
+        enum ImgConvertBlockStatus status;
+
+        qemu_co_mutex_lock(&s->lock);
+        if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
+            qemu_co_mutex_unlock(&s->lock);
+            goto out;
+        }
+        n = convert_iteration_sectors(s, s->sector_num);
+        if (n < 0) {
+            qemu_co_mutex_unlock(&s->lock);
+            s->ret = n;
+            goto out;
+        }
+        /* save current sector and allocation status to local variables */
+        sector_num = s->sector_num;
+        status = s->status;
+        if (!s->min_sparse && s->status == BLK_ZERO) {
+            n = MIN(n, s->buf_sectors);
+        }
+        /* increment global sector counter so that other coroutines can
+         * already continue reading beyond this request */
+        s->sector_num += n;
+        qemu_co_mutex_unlock(&s->lock);
+
+        if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
+            s->allocated_done += n;
+            qemu_progress_print(100.0 * s->allocated_done /
+                                        s->allocated_sectors, 0);
+        }
+
+        if (status == BLK_DATA) {
+            ret = convert_co_read(s, sector_num, n, buf);
+            if (ret < 0) {
+                error_report("error while reading sector %" PRId64
+                             ": %s", sector_num, strerror(-ret));
+                s->ret = ret;
+                goto out;
+            }
+        } else if (!s->min_sparse && status == BLK_ZERO) {
+            status = BLK_DATA;
+            memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
+        }
+
+        if (s->wr_in_order) {
+            /* keep writes in order */
+            while (s->wr_offs != sector_num) {
+                if (s->ret != -EINPROGRESS) {
+                    goto out;
+                }
+                s->wait_sector_num[index] = sector_num;
+                qemu_coroutine_yield();
+            }
+            s->wait_sector_num[index] = -1;
+        }
+
+        ret = convert_co_write(s, sector_num, n, buf, status);
+        if (ret < 0) {
+            error_report("error while writing sector %" PRId64
+                         ": %s", sector_num, strerror(-ret));
+            s->ret = ret;
+            goto out;
+        }
+
+        if (s->wr_in_order) {
+            /* reenter the coroutine that might have waited
+             * for this write to complete */
+            s->wr_offs = sector_num + n;
+            for (i = 0; i < s->num_coroutines; i++) {
+                if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
+                    /*
+                     * A -> B -> A cannot occur because A has
+                     * s->wait_sector_num[i] == -1 during A -> B.  Therefore
+                     * B will never enter A during this time window.
+                     */
+                    qemu_coroutine_enter(s->co[i]);
+                    break;
+                }
+            }
+        }
+    }
+
+out:
+    qemu_vfree(buf);
+    s->co[index] = NULL;
+    s->running_coroutines--;
+    if (!s->running_coroutines && s->ret == -EINPROGRESS) {
+        /* the convert job finished successfully */
+        s->ret = 0;
+    }
+}
+
+static int convert_do_copy(ImgConvertState *s)
+{
+    int ret, i, n;
+    int64_t sector_num = 0;
 
     /* Check whether we have zero initialisation or can get it efficiently */
     s->has_zero_init = s->min_sparse && !s->target_has_backing
@@ -1691,21 +1841,15 @@ static int convert_do_copy(ImgConvertState *s)
     if (s->compressed) {
         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
             error_report("invalid cluster size");
-            ret = -EINVAL;
-            goto fail;
+            return -EINVAL;
         }
         s->buf_sectors = s->cluster_sectors;
     }
-    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
 
-    /* Calculate allocated sectors for progress */
-    s->allocated_sectors = 0;
-    sector_num = 0;
     while (sector_num < s->total_sectors) {
         n = convert_iteration_sectors(s, sector_num);
         if (n < 0) {
-            ret = n;
-            goto fail;
+            return n;
         }
         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
         {
@@ -1715,61 +1859,29 @@ static int convert_do_copy(ImgConvertState *s)
     }
 
     /* Do the copy */
-    s->src_cur = 0;
-    s->src_cur_offset = 0;
     s->sector_next_status = 0;
+    s->ret = -EINPROGRESS;
 
-    sector_num = 0;
-    allocated_done = 0;
-
-    while (sector_num < s->total_sectors) {
-        n = convert_iteration_sectors(s, sector_num);
-        if (n < 0) {
-            ret = n;
-            goto fail;
-        }
-        if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
-        {
-            allocated_done += n;
-            qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
-                                0);
-        }
-
-        if (s->status == BLK_DATA) {
-            ret = convert_read(s, sector_num, n, buf);
-            if (ret < 0) {
-                error_report("error while reading sector %" PRId64
-                             ": %s", sector_num, strerror(-ret));
-                goto fail;
-            }
-        } else if (!s->min_sparse && s->status == BLK_ZERO) {
-            n = MIN(n, s->buf_sectors);
-            memset(buf, 0, n * BDRV_SECTOR_SIZE);
-            s->status = BLK_DATA;
-        }
-
-        ret = convert_write(s, sector_num, n, buf);
-        if (ret < 0) {
-            error_report("error while writing sector %" PRId64
-                         ": %s", sector_num, strerror(-ret));
-            goto fail;
-        }
+    qemu_co_mutex_init(&s->lock);
+    for (i = 0; i < s->num_coroutines; i++) {
+        s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
+        s->wait_sector_num[i] = -1;
+        qemu_coroutine_enter(s->co[i]);
+    }
 
-        sector_num += n;
+    while (s->ret == -EINPROGRESS) {
+        main_loop_wait(false);
     }
 
-    if (s->compressed) {
+    if (s->compressed && !s->ret) {
         /* signal EOF to align */
         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
         if (ret < 0) {
-            goto fail;
+            return ret;
         }
     }
 
-    ret = 0;
-fail:
-    qemu_vfree(buf);
-    return ret;
+    return s->ret;
 }
 
 static int img_convert(int argc, char **argv)
@@ -1797,6 +1909,8 @@ static int img_convert(int argc, char **argv)
     QemuOpts *sn_opts = NULL;
     ImgConvertState state;
     bool image_opts = false;
+    bool wr_in_order = true;
+    long num_coroutines = 8;
 
     fmt = NULL;
     out_fmt = "raw";
@@ -1812,7 +1926,7 @@ static int img_convert(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
+        c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -1904,6 +2018,18 @@ static int img_convert(int argc, char **argv)
         case 'n':
             skip_create = 1;
             break;
+        case 'm':
+            if (qemu_strtol(optarg, NULL, 0, &num_coroutines) ||
+                num_coroutines < 1 || num_coroutines > MAX_COROUTINES) {
+                error_report("Invalid number of coroutines. Allowed number of"
+                             " coroutines is between 1 and %d", MAX_COROUTINES);
+                ret = -1;
+                goto fail_getopt;
+            }
+            break;
+        case 'W':
+            wr_in_order = false;
+            break;
         case OPTION_OBJECT:
             opts = qemu_opts_parse_noisily(&qemu_object_opts,
                                            optarg, true);
@@ -1923,6 +2049,12 @@ static int img_convert(int argc, char **argv)
         goto fail_getopt;
     }
 
+    if (!wr_in_order && compress) {
+        error_report("Out of order write and compress are mutually exclusive");
+        ret = -1;
+        goto fail_getopt;
+    }
+
     /* Initialize before goto out */
     if (quiet) {
         progress = 0;
@@ -2163,6 +2295,8 @@ static int img_convert(int argc, char **argv)
         .min_sparse         = min_sparse,
         .cluster_sectors    = cluster_sectors,
         .buf_sectors        = bufsectors,
+        .wr_in_order        = wr_in_order,
+        .num_coroutines     = num_coroutines,
     };
     ret = convert_do_copy(&state);
 
@@ -3289,7 +3423,7 @@ static int img_resize(int argc, char **argv)
     qemu_opts_del(param);
 
     blk = img_open(image_opts, filename, fmt,
-                   BDRV_O_RDWR, false, quiet);
+                   BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet);
     if (!blk) {
         ret = -1;
         goto out;
diff --git a/qemu-img.texi b/qemu-img.texi
index 174aae38b7..c81db3e81c 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -137,6 +137,12 @@ Parameters to convert subcommand:
 
 @item -n
 Skip the creation of the target volume
+@item -m
+Number of parallel coroutines for the convert process
+@item -W
+Allow out-of-order writes to the destination. This option improves performance,
+but is only recommended for preallocated devices like host devices or other
+raw block devices.
 @end table
 
 Parameters to dd subcommand:
@@ -296,7 +302,7 @@ Error on reading data
 
 @end table
 
-@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
 
 Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated)
 to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c}
@@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target
 volume has already been created with site specific options that cannot
 be supplied through qemu-img.
 
+Out of order writes can be enabled with @code{-W} to improve performance.
+This is only recommended for preallocated devices like host devices or other
+raw block devices. Out of order write does not work in combination with
+creating compressed images.
+
+@var{num_coroutines} specifies how many coroutines work in parallel during
+the convert process (defaults to 8).
+
 @item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
 
 Dd copies from @var{input} file to @var{output} file converting it from
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 7ac1576d4c..2c48f9ce1a 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -83,6 +83,29 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
         }
         return 0;
     }
+
+    /* Request additional permissions if necessary for this command. The caller
+     * is responsible for restoring the original permissions afterwards if this
+     * is what it wants. */
+    if (ct->perm && blk_is_available(blk)) {
+        uint64_t orig_perm, orig_shared_perm;
+        blk_get_perm(blk, &orig_perm, &orig_shared_perm);
+
+        if (ct->perm & ~orig_perm) {
+            uint64_t new_perm;
+            Error *local_err = NULL;
+            int ret;
+
+            new_perm = orig_perm | ct->perm;
+
+            ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err);
+            if (ret < 0) {
+                error_report_err(local_err);
+                return 0;
+            }
+        }
+    }
+
     optind = 0;
     return ct->cfunc(blk, argc, argv);
 }
@@ -918,6 +941,7 @@ static const cmdinfo_t write_cmd = {
     .name       = "write",
     .altname    = "w",
     .cfunc      = write_f,
+    .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
     .args       = "[-bcCfquz] [-P pattern] off len",
@@ -1093,6 +1117,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv);
 static const cmdinfo_t writev_cmd = {
     .name       = "writev",
     .cfunc      = writev_f,
+    .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
     .args       = "[-Cfq] [-P pattern] off len [len..]",
@@ -1392,6 +1417,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv);
 static const cmdinfo_t aio_write_cmd = {
     .name       = "aio_write",
     .cfunc      = aio_write_f,
+    .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
     .args       = "[-Cfiquz] [-P pattern] off len [len..]",
@@ -1556,6 +1582,7 @@ static const cmdinfo_t truncate_cmd = {
     .name       = "truncate",
     .altname    = "t",
     .cfunc      = truncate_f,
+    .perm       = BLK_PERM_WRITE | BLK_PERM_RESIZE,
     .argmin     = 1,
     .argmax     = 1,
     .args       = "off",
@@ -1653,6 +1680,7 @@ static const cmdinfo_t discard_cmd = {
     .name       = "discard",
     .altname    = "d",
     .cfunc      = discard_f,
+    .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
     .args       = "[-Cq] off len",
diff --git a/qemu-options.hx b/qemu-options.hx
index bf458f83c3..c85f77d1d8 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -744,7 +744,12 @@ ETEXI
 
 DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
     "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n"
-    " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n",
+    " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n"
+    " [[,throttling.bps-total=b]|[[,throttling.bps-read=r][,throttling.bps-write=w]]]\n"
+    " [[,throttling.iops-total=i]|[[,throttling.iops-read=r][,throttling.iops-write=w]]]\n"
+    " [[,throttling.bps-total-max=bm]|[[,throttling.bps-read-max=rm][,throttling.bps-write-max=wm]]]\n"
+    " [[,throttling.iops-total-max=im]|[[,throttling.iops-read-max=irm][,throttling.iops-write-max=iwm]]]\n"
+    " [[,throttling.iops-size=is]]\n",
     QEMU_ARCH_ALL)
 
 STEXI
@@ -2146,7 +2151,7 @@ Example:
 @example
 qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
      -numa node,memdev=mem \
-     -chardev socket,path=/path/to/socket \
+     -chardev socket,id=chr0,path=/path/to/socket \
      -netdev type=vhost-user,id=net0,chardev=chr0 \
      -device virtio-net-pci,netdev=net0
 @end example
diff --git a/qtest.c b/qtest.c
index a6858272eb..5aa6636ca8 100644
--- a/qtest.c
+++ b/qtest.c
@@ -240,6 +240,7 @@ static void GCC_FMT_ATTR(2, 3) qtest_sendf(CharBackend *chr,
     va_start(ap, fmt);
     buffer = g_strdup_vprintf(fmt, ap);
     qtest_send(chr, buffer);
+    g_free(buffer);
     va_end(ap);
 }
 
diff --git a/replay/Makefile.objs b/replay/Makefile.objs
index b2afd4030a..cee6539a23 100644
--- a/replay/Makefile.objs
+++ b/replay/Makefile.objs
@@ -6,3 +6,4 @@ common-obj-y += replay-input.o
 common-obj-y += replay-char.o
 common-obj-y += replay-snapshot.o
 common-obj-y += replay-net.o
+common-obj-y += replay-audio.o
\ No newline at end of file
diff --git a/replay/replay-audio.c b/replay/replay-audio.c
new file mode 100644
index 0000000000..3d837434d4
--- /dev/null
+++ b/replay/replay-audio.c
@@ -0,0 +1,79 @@
+/*
+ * replay-audio.c
+ *
+ * Copyright (c) 2010-2017 Institute for System Programming
+ *                         of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "sysemu/replay.h"
+#include "replay-internal.h"
+#include "sysemu/sysemu.h"
+#include "audio/audio.h"
+
+void replay_audio_out(int *played)
+{
+    if (replay_mode == REPLAY_MODE_RECORD) {
+        replay_save_instructions();
+        replay_mutex_lock();
+        replay_put_event(EVENT_AUDIO_OUT);
+        replay_put_dword(*played);
+        replay_mutex_unlock();
+    } else if (replay_mode == REPLAY_MODE_PLAY) {
+        replay_account_executed_instructions();
+        replay_mutex_lock();
+        if (replay_next_event_is(EVENT_AUDIO_OUT)) {
+            *played = replay_get_dword();
+            replay_finish_event();
+            replay_mutex_unlock();
+        } else {
+            replay_mutex_unlock();
+            error_report("Missing audio out event in the replay log");
+            abort();
+        }
+    }
+}
+
+void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
+{
+    int pos;
+    uint64_t left, right;
+    if (replay_mode == REPLAY_MODE_RECORD) {
+        replay_save_instructions();
+        replay_mutex_lock();
+        replay_put_event(EVENT_AUDIO_IN);
+        replay_put_dword(*recorded);
+        replay_put_dword(*wpos);
+        for (pos = (*wpos - *recorded + size) % size ; pos != *wpos
+             ; pos = (pos + 1) % size) {
+            audio_sample_to_uint64(samples, pos, &left, &right);
+            replay_put_qword(left);
+            replay_put_qword(right);
+        }
+        replay_mutex_unlock();
+    } else if (replay_mode == REPLAY_MODE_PLAY) {
+        replay_account_executed_instructions();
+        replay_mutex_lock();
+        if (replay_next_event_is(EVENT_AUDIO_IN)) {
+            *recorded = replay_get_dword();
+            *wpos = replay_get_dword();
+            for (pos = (*wpos - *recorded + size) % size ; pos != *wpos
+                 ; pos = (pos + 1) % size) {
+                left = replay_get_qword();
+                right = replay_get_qword();
+                audio_sample_from_uint64(samples, pos, left, right);
+            }
+            replay_finish_event();
+            replay_mutex_unlock();
+        } else {
+            replay_mutex_unlock();
+            error_report("Missing audio in event in the replay log");
+            abort();
+        }
+    }
+}
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index c26d0795f2..ed66ed803c 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -29,6 +29,10 @@ enum ReplayEvents {
     /* for character device read all event */
     EVENT_CHAR_READ_ALL,
     EVENT_CHAR_READ_ALL_ERROR,
+    /* for audio out event */
+    EVENT_AUDIO_OUT,
+    /* for audio in event */
+    EVENT_AUDIO_IN,
     /* for clock read/writes */
     /* some of greater codes are reserved for clocks */
     EVENT_CLOCK,
diff --git a/roms/openbios b/roms/openbios
-Subproject ef8a14e8afb47635c9c5f7524a52c3251827e29
+Subproject 0cd97cc904e71fbb461112f6756934ec6af890b
diff --git a/roms/seabios b/roms/seabios
-Subproject 8891697e3f7d84355420573efd98e94f1473676
+Subproject 5f4c7b13cdf9c450eb55645f4362ea58fa61b79
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 72cf1fbf0a..6a370a8669 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -75,7 +75,13 @@ for arch in $ARCHLIST; do
         continue
     fi
 
-    make -C "$linux" INSTALL_HDR_PATH="$tmpdir" SRCARCH=$arch headers_install
+    if [ "$arch" = x86 ]; then
+        arch_var=SRCARCH
+    else
+        arch_var=ARCH
+    fi
+
+    make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install
 
     rm -rf "$output/linux-headers/asm-$arch"
     mkdir -p "$output/linux-headers/asm-$arch"
@@ -92,6 +98,11 @@ for arch in $ARCHLIST; do
         cp_portable "$tmpdir/include/asm/kvm_virtio.h" "$output/include/standard-headers/asm-s390/"
         cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/"
     fi
+    if [ $arch = arm ]; then
+        cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/"
+        cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/"
+        cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/"
+    fi
     if [ $arch = x86 ]; then
         cp_portable "$tmpdir/include/asm/hyperv.h" "$output/include/standard-headers/asm-x86/"
         cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
index 14a27e7f6a..bcef7ee28e 100755
--- a/scripts/vmstate-static-checker.py
+++ b/scripts/vmstate-static-checker.py
@@ -85,6 +85,11 @@ def check_fields_match(name, s_field, d_field):
         'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj',
                                           'br.dev.exp.aer_log',
                                           'parent_obj.parent_obj.exp.aer_log'],
+        'spapr_pci': ['dma_liobn[0]', 'mig_liobn',
+                      'mem_win_addr', 'mig_mem_win_addr',
+                      'mem_win_size', 'mig_mem_win_size',
+                      'io_win_addr', 'mig_io_win_addr',
+                      'io_win_size', 'mig_io_win_size'],
     }
 
     if not name in changed_names:
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index aa6050f406..224f04ba69 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -36,3 +36,4 @@ stub-obj-y += qmp_pc_dimm_device_list.o
 stub-obj-y += target-monitor-defs.o
 stub-obj-y += target-get-monitor-def.o
 stub-obj-y += pc_madt_cpu_entry.o
+stub-obj-y += vmgenid.o
diff --git a/stubs/vmgenid.c b/stubs/vmgenid.c
new file mode 100644
index 0000000000..c64eb7a16e
--- /dev/null
+++ b/stubs/vmgenid.c
@@ -0,0 +1,9 @@
+#include "qemu/osdep.h"
+#include "qmp-commands.h"
+#include "qapi/qmp/qerror.h"
+
+GuidInfo *qmp_query_vm_generation_id(Error **errp)
+{
+    error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
+}
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index bbe158fe3b..6d52f29bb2 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,6 +1,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "migration/vmstate.h"
+#include "migration/migration.h"
 
 const VMStateDescription vmstate_dummy = {};
 
@@ -19,3 +20,8 @@ void vmstate_unregister(DeviceState *dev,
                         void *opaque)
 {
 }
+
+int check_migratable(Object *obj, Error **err)
+{
+    return 0;
+}
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index b08d1601d1..691ac00c0b 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -28,6 +28,9 @@
 
 #define CPUArchState struct CPUAlphaState
 
+/* Alpha processors have a weak memory model */
+#define TCG_GUEST_DEFAULT_MO      (0)
+
 #include "exec/cpu-defs.h"
 
 #include "fpu/softfloat.h"
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index f7157dc0e5..04b062cb7e 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -338,13 +338,6 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     CPUARMState *env = &cpu->env;
     bool ret = false;
 
-
-    if (interrupt_request & CPU_INTERRUPT_FIQ
-        && !(env->daif & PSTATE_F)) {
-        cs->exception_index = EXCP_FIQ;
-        cc->do_interrupt(cs);
-        ret = true;
-    }
     /* ARMv7-M interrupt return works by loading a magic value
      * into the PC.  On real hardware the load causes the
      * return to occur.  The qemu implementation performs the
@@ -354,9 +347,16 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
      * the stack if an interrupt occurred at the wrong time.
      * We avoid this by disabling interrupts when
      * pc contains a magic address.
+     *
+     * ARMv7-M interrupt masking works differently than -A or -R.
+     * There is no FIQ/IRQ distinction. Instead of I and F bits
+     * masking FIQ and IRQ interrupts, an exception is taken only
+     * if it is higher priority than the current execution priority
+     * (which depends on state like BASEPRI, FAULTMASK and the
+     * currently active exception).
      */
     if (interrupt_request & CPU_INTERRUPT_HARD
-        && !(env->daif & PSTATE_I)
+        && (armv7m_nvic_can_take_pending_exception(env->nvic))
         && (env->regs[15] < 0xfffffff0)) {
         cs->exception_index = EXCP_IRQ;
         cc->do_interrupt(cs);
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 38a8e00908..25ceaabb5d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -57,6 +57,7 @@
 #define EXCP_VFIQ           15
 #define EXCP_SEMIHOST       16   /* semihosting call */
 #define EXCP_NOCP           17   /* v7M NOCP UsageFault */
+#define EXCP_INVSTATE       18   /* v7M INVSTATE UsageFault */
 
 #define ARMV7M_EXCP_RESET   1
 #define ARMV7M_EXCP_NMI     2
@@ -520,6 +521,8 @@ typedef struct CPUARMState {
 
     void *nvic;
     const struct arm_boot_info *boot_info;
+    /* Store GICv3CPUState to access from this struct */
+    void *gicv3state;
 } CPUARMState;
 
 /**
@@ -1356,9 +1359,27 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
                                  uint32_t cur_el, bool secure);
 
 /* Interface between CPU and Interrupt controller.  */
+#ifndef CONFIG_USER_ONLY
+bool armv7m_nvic_can_take_pending_exception(void *opaque);
+#else
+static inline bool armv7m_nvic_can_take_pending_exception(void *opaque)
+{
+    return true;
+}
+#endif
 void armv7m_nvic_set_pending(void *opaque, int irq);
-int armv7m_nvic_acknowledge_irq(void *opaque);
-void armv7m_nvic_complete_irq(void *opaque, int irq);
+void armv7m_nvic_acknowledge_irq(void *opaque);
+/**
+ * armv7m_nvic_complete_irq: complete specified interrupt or exception
+ * @opaque: the NVIC
+ * @irq: the exception number to complete
+ *
+ * Returns: -1 if the irq was not active
+ *           1 if completing this irq brought us back to base (no active irqs)
+ *           0 if there is still an irq active after this one was completed
+ * (Ignoring -1, this is the same as the RETTOBASE value before completion.)
+ */
+int armv7m_nvic_complete_irq(void *opaque, int irq);
 
 /* Interface for defining coprocessor registers.
  * Registers are defined in tables of arm_cp_reginfo structs
diff --git a/target/arm/helper.c b/target/arm/helper.c
index bcedb4a808..3f4211b572 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6002,22 +6002,165 @@ static void switch_v7m_sp(CPUARMState *env, bool new_spsel)
     }
 }
 
-static void do_v7m_exception_exit(CPUARMState *env)
+static uint32_t arm_v7m_load_vector(ARMCPU *cpu)
 {
+    CPUState *cs = CPU(cpu);
+    CPUARMState *env = &cpu->env;
+    MemTxResult result;
+    hwaddr vec = env->v7m.vecbase + env->v7m.exception * 4;
+    uint32_t addr;
+
+    addr = address_space_ldl(cs->as, vec,
+                             MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        /* Architecturally this should cause a HardFault setting HSFR.VECTTBL,
+         * which would then be immediately followed by our failing to load
+         * the entry vector for that HardFault, which is a Lockup case.
+         * Since we don't model Lockup, we just report this guest error
+         * via cpu_abort().
+         */
+        cpu_abort(cs, "Failed to read from exception vector table "
+                  "entry %08x\n", (unsigned)vec);
+    }
+    return addr;
+}
+
+static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr)
+{
+    /* Do the "take the exception" parts of exception entry,
+     * but not the pushing of state to the stack. This is
+     * similar to the pseudocode ExceptionTaken() function.
+     */
+    CPUARMState *env = &cpu->env;
+    uint32_t addr;
+
+    armv7m_nvic_acknowledge_irq(env->nvic);
+    switch_v7m_sp(env, 0);
+    /* Clear IT bits */
+    env->condexec_bits = 0;
+    env->regs[14] = lr;
+    addr = arm_v7m_load_vector(cpu);
+    env->regs[15] = addr & 0xfffffffe;
+    env->thumb = addr & 1;
+}
+
+static void v7m_push_stack(ARMCPU *cpu)
+{
+    /* Do the "set up stack frame" part of exception entry,
+     * similar to pseudocode PushStack().
+     */
+    CPUARMState *env = &cpu->env;
+    uint32_t xpsr = xpsr_read(env);
+
+    /* Align stack pointer if the guest wants that */
+    if ((env->regs[13] & 4) && (env->v7m.ccr & R_V7M_CCR_STKALIGN_MASK)) {
+        env->regs[13] -= 4;
+        xpsr |= 0x200;
+    }
+    /* Switch to the handler mode.  */
+    v7m_push(env, xpsr);
+    v7m_push(env, env->regs[15]);
+    v7m_push(env, env->regs[14]);
+    v7m_push(env, env->regs[12]);
+    v7m_push(env, env->regs[3]);
+    v7m_push(env, env->regs[2]);
+    v7m_push(env, env->regs[1]);
+    v7m_push(env, env->regs[0]);
+}
+
+static void do_v7m_exception_exit(ARMCPU *cpu)
+{
+    CPUARMState *env = &cpu->env;
     uint32_t type;
     uint32_t xpsr;
-
+    bool ufault = false;
+    bool return_to_sp_process = false;
+    bool return_to_handler = false;
+    bool rettobase = false;
+
+    /* We can only get here from an EXCP_EXCEPTION_EXIT, and
+     * arm_v7m_do_unassigned_access() enforces the architectural rule
+     * that jumps to magic addresses don't have magic behaviour unless
+     * we're in Handler mode (compare pseudocode BXWritePC()).
+     */
+    assert(env->v7m.exception != 0);
+
+    /* In the spec pseudocode ExceptionReturn() is called directly
+     * from BXWritePC() and gets the full target PC value including
+     * bit zero. In QEMU's implementation we treat it as a normal
+     * jump-to-register (which is then caught later on), and so split
+     * the target value up between env->regs[15] and env->thumb in
+     * gen_bx(). Reconstitute it.
+     */
     type = env->regs[15];
+    if (env->thumb) {
+        type |= 1;
+    }
+
+    qemu_log_mask(CPU_LOG_INT, "Exception return: magic PC %" PRIx32
+                  " previous exception %d\n",
+                  type, env->v7m.exception);
+
+    if (extract32(type, 5, 23) != extract32(-1, 5, 23)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "M profile: zero high bits in exception "
+                      "exit PC value 0x%" PRIx32 " are UNPREDICTABLE\n", type);
+    }
+
     if (env->v7m.exception != ARMV7M_EXCP_NMI) {
         /* Auto-clear FAULTMASK on return from other than NMI */
         env->daif &= ~PSTATE_F;
     }
-    if (env->v7m.exception != 0) {
-        armv7m_nvic_complete_irq(env->nvic, env->v7m.exception);
+
+    switch (armv7m_nvic_complete_irq(env->nvic, env->v7m.exception)) {
+    case -1:
+        /* attempt to exit an exception that isn't active */
+        ufault = true;
+        break;
+    case 0:
+        /* still an irq active now */
+        break;
+    case 1:
+        /* we returned to base exception level, no nesting.
+         * (In the pseudocode this is written using "NestedActivation != 1"
+         * where we have 'rettobase == false'.)
+         */
+        rettobase = true;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    switch (type & 0xf) {
+    case 1: /* Return to Handler */
+        return_to_handler = true;
+        break;
+    case 13: /* Return to Thread using Process stack */
+        return_to_sp_process = true;
+        /* fall through */
+    case 9: /* Return to Thread using Main stack */
+        if (!rettobase &&
+            !(env->v7m.ccr & R_V7M_CCR_NONBASETHRDENA_MASK)) {
+            ufault = true;
+        }
+        break;
+    default:
+        ufault = true;
+    }
+
+    if (ufault) {
+        /* Bad exception return: instead of popping the exception
+         * stack, directly take a usage fault on the current stack.
+         */
+        env->v7m.cfsr |= R_V7M_CFSR_INVPC_MASK;
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
+        v7m_exception_taken(cpu, type | 0xf0000000);
+        qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
+                      "stackframe: failed exception return integrity check\n");
+        return;
     }
 
     /* Switch to the target stack.  */
-    switch_v7m_sp(env, (type & 4) != 0);
+    switch_v7m_sp(env, return_to_sp_process);
     /* Pop registers.  */
     env->regs[0] = v7m_pop(env);
     env->regs[1] = v7m_pop(env);
@@ -6041,11 +6184,24 @@ static void do_v7m_exception_exit(CPUARMState *env)
     /* Undo stack alignment.  */
     if (xpsr & 0x200)
         env->regs[13] |= 4;
-    /* ??? The exception return type specifies Thread/Handler mode.  However
-       this is also implied by the xPSR value. Not sure what to do
-       if there is a mismatch.  */
-    /* ??? Likewise for mismatches between the CONTROL register and the stack
-       pointer.  */
+
+    /* The restored xPSR exception field will be zero if we're
+     * resuming in Thread mode. If that doesn't match what the
+     * exception return type specified then this is a UsageFault.
+     */
+    if (return_to_handler == (env->v7m.exception == 0)) {
+        /* Take an INVPC UsageFault by pushing the stack again. */
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
+        env->v7m.cfsr |= R_V7M_CFSR_INVPC_MASK;
+        v7m_push_stack(cpu);
+        v7m_exception_taken(cpu, type | 0xf0000000);
+        qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on new stackframe: "
+                      "failed exception return integrity check\n");
+        return;
+    }
+
+    /* Otherwise, we have a successful exception exit. */
+    qemu_log_mask(CPU_LOG_INT, "...successful exception return\n");
 }
 
 static void arm_log_exception(int idx)
@@ -6063,37 +6219,11 @@ static void arm_log_exception(int idx)
     }
 }
 
-static uint32_t arm_v7m_load_vector(ARMCPU *cpu)
-
-{
-    CPUState *cs = CPU(cpu);
-    CPUARMState *env = &cpu->env;
-    MemTxResult result;
-    hwaddr vec = env->v7m.vecbase + env->v7m.exception * 4;
-    uint32_t addr;
-
-    addr = address_space_ldl(cs->as, vec,
-                             MEMTXATTRS_UNSPECIFIED, &result);
-    if (result != MEMTX_OK) {
-        /* Architecturally this should cause a HardFault setting HSFR.VECTTBL,
-         * which would then be immediately followed by our failing to load
-         * the entry vector for that HardFault, which is a Lockup case.
-         * Since we don't model Lockup, we just report this guest error
-         * via cpu_abort().
-         */
-        cpu_abort(cs, "Failed to read from exception vector table "
-                  "entry %08x\n", (unsigned)vec);
-    }
-    return addr;
-}
-
 void arm_v7m_cpu_do_interrupt(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
-    uint32_t xpsr = xpsr_read(env);
     uint32_t lr;
-    uint32_t addr;
 
     arm_log_exception(cs->exception_index);
 
@@ -6106,28 +6236,30 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
 
     /* For exceptions we just mark as pending on the NVIC, and let that
        handle it.  */
-    /* TODO: Need to escalate if the current priority is higher than the
-       one we're raising.  */
     switch (cs->exception_index) {
     case EXCP_UDEF:
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
         env->v7m.cfsr |= R_V7M_CFSR_UNDEFINSTR_MASK;
-        return;
+        break;
     case EXCP_NOCP:
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
         env->v7m.cfsr |= R_V7M_CFSR_NOCP_MASK;
-        return;
+        break;
+    case EXCP_INVSTATE:
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
+        env->v7m.cfsr |= R_V7M_CFSR_INVSTATE_MASK;
+        break;
     case EXCP_SWI:
         /* The PC already points to the next instruction.  */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC);
-        return;
+        break;
     case EXCP_PREFETCH_ABORT:
     case EXCP_DATA_ABORT:
         /* TODO: if we implemented the MPU registers, this is where we
          * should set the MMFAR, etc from exception.fsr and exception.vaddress.
          */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM);
-        return;
+        break;
     case EXCP_BKPT:
         if (semihosting_enabled()) {
             int nr;
@@ -6142,39 +6274,20 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
             }
         }
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_DEBUG);
-        return;
+        break;
     case EXCP_IRQ:
-        env->v7m.exception = armv7m_nvic_acknowledge_irq(env->nvic);
         break;
     case EXCP_EXCEPTION_EXIT:
-        do_v7m_exception_exit(env);
+        do_v7m_exception_exit(cpu);
         return;
     default:
         cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
         return; /* Never happens.  Keep compiler happy.  */
     }
 
-    /* Align stack pointer if the guest wants that */
-    if ((env->regs[13] & 4) && (env->v7m.ccr & R_V7M_CCR_STKALIGN_MASK)) {
-        env->regs[13] -= 4;
-        xpsr |= 0x200;
-    }
-    /* Switch to the handler mode.  */
-    v7m_push(env, xpsr);
-    v7m_push(env, env->regs[15]);
-    v7m_push(env, env->regs[14]);
-    v7m_push(env, env->regs[12]);
-    v7m_push(env, env->regs[3]);
-    v7m_push(env, env->regs[2]);
-    v7m_push(env, env->regs[1]);
-    v7m_push(env, env->regs[0]);
-    switch_v7m_sp(env, 0);
-    /* Clear IT bits */
-    env->condexec_bits = 0;
-    env->regs[14] = lr;
-    addr = arm_v7m_load_vector(cpu);
-    env->regs[15] = addr & 0xfffffffe;
-    env->thumb = addr & 1;
+    v7m_push_stack(cpu);
+    v7m_exception_taken(cpu, lr);
+    qemu_log_mask(CPU_LOG_INT, "... as %d\n", env->v7m.exception);
 }
 
 /* Function used to synchronize QEMU's AArch64 register set with AArch32
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e15eae6d41..24de30d92c 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10933,6 +10933,10 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* Note that we convert the Vx register indexes into the
      * index within the vfp.regs[] array, so we can share the
      * helper with the AArch32 instructions.
@@ -10997,6 +11001,10 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rd_regno = tcg_const_i32(rd << 1);
     tcg_rn_regno = tcg_const_i32(rn << 1);
     tcg_rm_regno = tcg_const_i32(rm << 1);
@@ -11060,6 +11068,10 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rd_regno = tcg_const_i32(rd << 1);
     tcg_rn_regno = tcg_const_i32(rn << 1);
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index abc1f77ee4..b859f10755 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -7990,9 +7990,13 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
     TCGv_i32 addr;
     TCGv_i64 tmp64;
 
-    /* M variants do not implement ARM mode.  */
+    /* M variants do not implement ARM mode; this must raise the INVSTATE
+     * UsageFault exception.
+     */
     if (arm_dc_feature(s, ARM_FEATURE_M)) {
-        goto illegal_op;
+        gen_exception_insn(s, 4, EXCP_INVSTATE, syn_uncategorized(),
+                           default_exception_el(s));
+        return;
     }
     cond = insn >> 28;
     if (cond == 0xf){
diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h
index 8cd607e9a2..c2205e6077 100644
--- a/target/i386/cpu-qom.h
+++ b/target/i386/cpu-qom.h
@@ -48,7 +48,9 @@ typedef struct X86CPUDefinition X86CPUDefinition;
  * X86CPUClass:
  * @cpu_def: CPU model definition
  * @kvm_required: Whether CPU model requires KVM to be enabled.
+ * @ordering: Ordering on the "-cpu help" CPU model list.
  * @migration_safe: See CpuDefinitionInfo::migration_safe
+ * @static_model: See CpuDefinitionInfo::static
  * @parent_realize: The parent class' realize handler.
  * @parent_reset: The parent class' reset handler.
  *
@@ -59,11 +61,15 @@ typedef struct X86CPUClass {
     CPUClass parent_class;
     /*< public >*/
 
-    /* Should be eventually replaced by subclass-specific property defaults. */
+    /* CPU definition, automatically loaded by instance_init if not NULL.
+     * Should be eventually replaced by subclass-specific property defaults.
+     */
     X86CPUDefinition *cpu_def;
 
     bool kvm_required;
+    int ordering;
     bool migration_safe;
+    bool static_model;
 
     /* Optional description of CPU model.
      * If unavailable, cpu_def->model_id is used */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b6f157dca3..89421c893b 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -29,10 +29,16 @@
 #include "qemu/option.h"
 #include "qemu/config-file.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qfloat.h"
 
 #include "qapi-types.h"
 #include "qapi-visit.h"
 #include "qapi/visitor.h"
+#include "qom/qom-qobject.h"
 #include "sysemu/arch_init.h"
 
 #if defined(CONFIG_KVM)
@@ -1503,15 +1509,15 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value)
 static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
                                                    bool migratable_only);
 
-#ifdef CONFIG_KVM
-
 static bool lmce_supported(void)
 {
-    uint64_t mce_cap;
+    uint64_t mce_cap = 0;
 
+#ifdef CONFIG_KVM
     if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
         return false;
     }
+#endif
 
     return !!(mce_cap & MCG_LMCE_P);
 }
@@ -1531,51 +1537,28 @@ static int cpu_x86_fill_model_id(char *str)
     return 0;
 }
 
-static X86CPUDefinition host_cpudef;
-
-static Property host_x86_cpu_properties[] = {
+static Property max_x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true),
     DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false),
     DEFINE_PROP_END_OF_LIST()
 };
 
-/* class_init for the "host" CPU model
- *
- * This function may be called before KVM is initialized.
- */
-static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+static void max_x86_cpu_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     X86CPUClass *xcc = X86_CPU_CLASS(oc);
-    uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
-
-    xcc->kvm_required = true;
-
-    host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
-    x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx);
-
-    host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
-    host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
-    host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
-    host_cpudef.stepping = eax & 0x0F;
 
-    cpu_x86_fill_model_id(host_cpudef.model_id);
+    xcc->ordering = 9;
 
-    xcc->cpu_def = &host_cpudef;
     xcc->model_description =
-        "KVM processor with all supported host features "
-        "(only available in KVM mode)";
-
-    /* level, xlevel, xlevel2, and the feature words are initialized on
-     * instance_init, because they require KVM to be initialized.
-     */
+        "Enables all features supported by the accelerator in the current host";
 
-    dc->props = host_x86_cpu_properties;
-    /* Reason: host_x86_cpu_initfn() dies when !kvm_enabled() */
-    dc->cannot_destroy_with_object_finalize_yet = true;
+    dc->props = max_x86_cpu_properties;
 }
 
-static void host_x86_cpu_initfn(Object *obj)
+static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp);
+
+static void max_x86_cpu_initfn(Object *obj)
 {
     X86CPU *cpu = X86_CPU(obj);
     CPUX86State *env = &cpu->env;
@@ -1584,10 +1567,24 @@ static void host_x86_cpu_initfn(Object *obj)
     /* We can't fill the features array here because we don't know yet if
      * "migratable" is true or false.
      */
-    cpu->host_features = true;
+    cpu->max_features = true;
 
-    /* If KVM is disabled, x86_cpu_realizefn() will report an error later */
     if (kvm_enabled()) {
+        X86CPUDefinition host_cpudef = { };
+        uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+        host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
+        x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx);
+
+        host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
+        host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
+        host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
+        host_cpudef.stepping = eax & 0x0F;
+
+        cpu_x86_fill_model_id(host_cpudef.model_id);
+
+        x86_cpu_load_def(cpu, &host_cpudef, &error_abort);
+
         env->cpuid_min_level =
             kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
         env->cpuid_min_xlevel =
@@ -1598,15 +1595,44 @@ static void host_x86_cpu_initfn(Object *obj)
         if (lmce_supported()) {
             object_property_set_bool(OBJECT(cpu), true, "lmce", &error_abort);
         }
+    } else {
+        object_property_set_str(OBJECT(cpu), CPUID_VENDOR_AMD,
+                                "vendor", &error_abort);
+        object_property_set_int(OBJECT(cpu), 6, "family", &error_abort);
+        object_property_set_int(OBJECT(cpu), 6, "model", &error_abort);
+        object_property_set_int(OBJECT(cpu), 3, "stepping", &error_abort);
+        object_property_set_str(OBJECT(cpu),
+                                "QEMU TCG CPU version " QEMU_HW_VERSION,
+                                "model-id", &error_abort);
     }
 
     object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort);
 }
 
+static const TypeInfo max_x86_cpu_type_info = {
+    .name = X86_CPU_TYPE_NAME("max"),
+    .parent = TYPE_X86_CPU,
+    .instance_init = max_x86_cpu_initfn,
+    .class_init = max_x86_cpu_class_init,
+};
+
+#ifdef CONFIG_KVM
+
+static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+{
+    X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+    xcc->kvm_required = true;
+    xcc->ordering = 8;
+
+    xcc->model_description =
+        "KVM processor with all supported host features "
+        "(only available in KVM mode)";
+}
+
 static const TypeInfo host_x86_cpu_type_info = {
     .name = X86_CPU_TYPE_NAME("host"),
-    .parent = TYPE_X86_CPU,
-    .instance_init = host_x86_cpu_initfn,
+    .parent = X86_CPU_TYPE_NAME("max"),
     .class_init = host_x86_cpu_class_init,
 };
 
@@ -2060,7 +2086,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
     }
 }
 
-static void x86_cpu_load_features(X86CPU *cpu, Error **errp);
+static void x86_cpu_expand_features(X86CPU *cpu, Error **errp);
 static int x86_cpu_filter_features(X86CPU *cpu);
 
 /* Check for missing features that may prevent the CPU class from
@@ -2083,9 +2109,9 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc,
 
     xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
 
-    x86_cpu_load_features(xc, &err);
+    x86_cpu_expand_features(xc, &err);
     if (err) {
-        /* Errors at x86_cpu_load_features should never happen,
+        /* Errors at x86_cpu_expand_features should never happen,
          * but in case it does, just report the model as not
          * runnable at all using the "type" property.
          */
@@ -2128,7 +2154,7 @@ static void listflags(FILE *f, fprintf_function print, const char **featureset)
     }
 }
 
-/* Sort alphabetically by type name, listing kvm_required models last. */
+/* Sort alphabetically by type name, respecting X86CPUClass::ordering. */
 static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
     ObjectClass *class_a = (ObjectClass *)a;
@@ -2137,9 +2163,8 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
     X86CPUClass *cc_b = X86_CPU_CLASS(class_b);
     const char *name_a, *name_b;
 
-    if (cc_a->kvm_required != cc_b->kvm_required) {
-        /* kvm_required items go last */
-        return cc_a->kvm_required ? 1 : -1;
+    if (cc_a->ordering != cc_b->ordering) {
+        return cc_a->ordering - cc_b->ordering;
     } else {
         name_a = object_class_get_name(class_a);
         name_b = object_class_get_name(class_b);
@@ -2161,7 +2186,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data)
     CPUListState *s = user_data;
     char *name = x86_cpu_class_get_model_name(cc);
     const char *desc = cc->model_description;
-    if (!desc) {
+    if (!desc && cc->cpu_def) {
         desc = cc->cpu_def->model_id;
     }
 
@@ -2210,6 +2235,7 @@ static void x86_cpu_definition_entry(gpointer data, gpointer user_data)
     info->q_typename = g_strdup(object_class_get_name(oc));
     info->migration_safe = cc->migration_safe;
     info->has_migration_safe = true;
+    info->q_static = cc->static_model;
 
     entry = g_malloc0(sizeof(*entry));
     entry->value = info;
@@ -2247,31 +2273,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
     return r;
 }
 
-/*
- * Filters CPU feature words based on host availability of each feature.
- *
- * Returns: 0 if all flags are supported by the host, non-zero otherwise.
- */
-static int x86_cpu_filter_features(X86CPU *cpu)
-{
-    CPUX86State *env = &cpu->env;
-    FeatureWord w;
-    int rv = 0;
-
-    for (w = 0; w < FEATURE_WORDS; w++) {
-        uint32_t host_feat =
-            x86_cpu_get_supported_feature_word(w, false);
-        uint32_t requested_features = env->features[w];
-        env->features[w] &= host_feat;
-        cpu->filtered_features[w] = requested_features & ~env->features[w];
-        if (cpu->filtered_features[w]) {
-            rv = 1;
-        }
-    }
-
-    return rv;
-}
-
 static void x86_cpu_report_filtered_features(X86CPU *cpu)
 {
     FeatureWord w;
@@ -2293,7 +2294,7 @@ static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
     }
 }
 
-/* Load data from X86CPUDefinition
+/* Load data from X86CPUDefinition into a X86CPU object
  */
 static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
 {
@@ -2302,6 +2303,11 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
     char host_vendor[CPUID_VENDOR_SZ + 1];
     FeatureWord w;
 
+    /*NOTE: any property set by this function should be returned by
+     * x86_cpu_static_props(), so static expansion of
+     * query-cpu-model-expansion is always complete.
+     */
+
     /* CPU models only set _minimum_ values for level/xlevel: */
     object_property_set_int(OBJECT(cpu), def->level, "min-level", errp);
     object_property_set_int(OBJECT(cpu), def->xlevel, "min-xlevel", errp);
@@ -2346,6 +2352,212 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
 
 }
 
+/* Return a QDict containing keys for all properties that can be included
+ * in static expansion of CPU models. All properties set by x86_cpu_load_def()
+ * must be included in the dictionary.
+ */
+static QDict *x86_cpu_static_props(void)
+{
+    FeatureWord w;
+    int i;
+    static const char *props[] = {
+        "min-level",
+        "min-xlevel",
+        "family",
+        "model",
+        "stepping",
+        "model-id",
+        "vendor",
+        "lmce",
+        NULL,
+    };
+    static QDict *d;
+
+    if (d) {
+        return d;
+    }
+
+    d = qdict_new();
+    for (i = 0; props[i]; i++) {
+        qdict_put_obj(d, props[i], qnull());
+    }
+
+    for (w = 0; w < FEATURE_WORDS; w++) {
+        FeatureWordInfo *fi = &feature_word_info[w];
+        int bit;
+        for (bit = 0; bit < 32; bit++) {
+            if (!fi->feat_names[bit]) {
+                continue;
+            }
+            qdict_put_obj(d, fi->feat_names[bit], qnull());
+        }
+    }
+
+    return d;
+}
+
+/* Add an entry to @props dict, with the value for property. */
+static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop)
+{
+    QObject *value = object_property_get_qobject(OBJECT(cpu), prop,
+                                                 &error_abort);
+
+    qdict_put_obj(props, prop, value);
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model.
+ */
+static void x86_cpu_to_dict(X86CPU *cpu, QDict *props)
+{
+    QDict *sprops = x86_cpu_static_props();
+    const QDictEntry *e;
+
+    for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) {
+        const char *prop = qdict_entry_key(e);
+        x86_cpu_expand_prop(cpu, props, prop);
+    }
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model, including every
+ * writeable QOM property.
+ */
+static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props)
+{
+    ObjectPropertyIterator iter;
+    ObjectProperty *prop;
+
+    object_property_iter_init(&iter, OBJECT(cpu));
+    while ((prop = object_property_iter_next(&iter))) {
+        /* skip read-only or write-only properties */
+        if (!prop->get || !prop->set) {
+            continue;
+        }
+
+        /* "hotplugged" is the only property that is configurable
+         * on the command-line but will be set differently on CPUs
+         * created using "-cpu ... -smp ..." and by CPUs created
+         * on the fly by x86_cpu_from_model() for querying. Skip it.
+         */
+        if (!strcmp(prop->name, "hotplugged")) {
+            continue;
+        }
+        x86_cpu_expand_prop(cpu, props, prop->name);
+    }
+}
+
+static void object_apply_props(Object *obj, QDict *props, Error **errp)
+{
+    const QDictEntry *prop;
+    Error *err = NULL;
+
+    for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) {
+        object_property_set_qobject(obj, qdict_entry_value(prop),
+                                         qdict_entry_key(prop), &err);
+        if (err) {
+            break;
+        }
+    }
+
+    error_propagate(errp, err);
+}
+
+/* Create X86CPU object according to model+props specification */
+static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp)
+{
+    X86CPU *xc = NULL;
+    X86CPUClass *xcc;
+    Error *err = NULL;
+
+    xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model));
+    if (xcc == NULL) {
+        error_setg(&err, "CPU model '%s' not found", model);
+        goto out;
+    }
+
+    xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
+    if (props) {
+        object_apply_props(OBJECT(xc), props, &err);
+        if (err) {
+            goto out;
+        }
+    }
+
+    x86_cpu_expand_features(xc, &err);
+    if (err) {
+        goto out;
+    }
+
+out:
+    if (err) {
+        error_propagate(errp, err);
+        object_unref(OBJECT(xc));
+        xc = NULL;
+    }
+    return xc;
+}
+
+CpuModelExpansionInfo *
+arch_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                      CpuModelInfo *model,
+                                                      Error **errp)
+{
+    X86CPU *xc = NULL;
+    Error *err = NULL;
+    CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1);
+    QDict *props = NULL;
+    const char *base_name;
+
+    xc = x86_cpu_from_model(model->name,
+                            model->has_props ?
+                                qobject_to_qdict(model->props) :
+                                NULL, &err);
+    if (err) {
+        goto out;
+    }
+
+    props = qdict_new();
+
+    switch (type) {
+    case CPU_MODEL_EXPANSION_TYPE_STATIC:
+        /* Static expansion will be based on "base" only */
+        base_name = "base";
+        x86_cpu_to_dict(xc, props);
+    break;
+    case CPU_MODEL_EXPANSION_TYPE_FULL:
+        /* As we don't return every single property, full expansion needs
+         * to keep the original model name+props, and add extra
+         * properties on top of that.
+         */
+        base_name = model->name;
+        x86_cpu_to_dict_full(xc, props);
+    break;
+    default:
+        error_setg(&err, "Unsupportted expansion type");
+        goto out;
+    }
+
+    if (!props) {
+        props = qdict_new();
+    }
+    x86_cpu_to_dict(xc, props);
+
+    ret->model = g_new0(CpuModelInfo, 1);
+    ret->model->name = g_strdup(base_name);
+    ret->model->props = QOBJECT(props);
+    ret->model->has_props = true;
+
+out:
+    object_unref(OBJECT(xc));
+    if (err) {
+        error_propagate(errp, err);
+        qapi_free_CpuModelExpansionInfo(ret);
+        ret = NULL;
+    }
+    return ret;
+}
+
 X86CPU *cpu_x86_init(const char *cpu_model)
 {
     return X86_CPU(cpu_generic_init(TYPE_X86_CPU, cpu_model));
@@ -3095,20 +3307,59 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
     env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
 }
 
-/* Load CPUID data based on configured features */
-static void x86_cpu_load_features(X86CPU *cpu, Error **errp)
+/***** Steps involved on loading and filtering CPUID data
+ *
+ * When initializing and realizing a CPU object, the steps
+ * involved in setting up CPUID data are:
+ *
+ * 1) Loading CPU model definition (X86CPUDefinition). This is
+ *    implemented by x86_cpu_load_def() and should be completely
+ *    transparent, as it is done automatically by instance_init.
+ *    No code should need to look at X86CPUDefinition structs
+ *    outside instance_init.
+ *
+ * 2) CPU expansion. This is done by realize before CPUID
+ *    filtering, and will make sure host/accelerator data is
+ *    loaded for CPU models that depend on host capabilities
+ *    (e.g. "host"). Done by x86_cpu_expand_features().
+ *
+ * 3) CPUID filtering. This initializes extra data related to
+ *    CPUID, and checks if the host supports all capabilities
+ *    required by the CPU. Runnability of a CPU model is
+ *    determined at this step. Done by x86_cpu_filter_features().
+ *
+ * Some operations don't require all steps to be performed.
+ * More precisely:
+ *
+ * - CPU instance creation (instance_init) will run only CPU
+ *   model loading. CPU expansion can't run at instance_init-time
+ *   because host/accelerator data may be not available yet.
+ * - CPU realization will perform both CPU model expansion and CPUID
+ *   filtering, and return an error in case one of them fails.
+ * - query-cpu-definitions needs to run all 3 steps. It needs
+ *   to run CPUID filtering, as the 'unavailable-features'
+ *   field is set based on the filtering results.
+ * - The query-cpu-model-expansion QMP command only needs to run
+ *   CPU model loading and CPU expansion. It should not filter
+ *   any CPUID data based on host capabilities.
+ */
+
+/* Expand CPU configuration data, based on configured features
+ * and host/accelerator capabilities when appropriate.
+ */
+static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
 {
     CPUX86State *env = &cpu->env;
     FeatureWord w;
     GList *l;
     Error *local_err = NULL;
 
-    /*TODO: cpu->host_features incorrectly overwrites features
+    /*TODO: cpu->max_features incorrectly overwrites features
      * set using "feat=on|off". Once we fix this, we can convert
      * plus_features & minus_features to global properties
      * inside x86_cpu_parse_featurestr() too.
      */
-    if (cpu->host_features) {
+    if (cpu->max_features) {
         for (w = 0; w < FEATURE_WORDS; w++) {
             env->features[w] =
                 x86_cpu_get_supported_feature_word(w, cpu->migratable);
@@ -3173,6 +3424,32 @@ out:
     }
 }
 
+/*
+ * Finishes initialization of CPUID data, filters CPU feature
+ * words based on host availability of each feature.
+ *
+ * Returns: 0 if all flags are supported by the host, non-zero otherwise.
+ */
+static int x86_cpu_filter_features(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+    FeatureWord w;
+    int rv = 0;
+
+    for (w = 0; w < FEATURE_WORDS; w++) {
+        uint32_t host_feat =
+            x86_cpu_get_supported_feature_word(w, false);
+        uint32_t requested_features = env->features[w];
+        env->features[w] &= host_feat;
+        cpu->filtered_features[w] = requested_features & ~env->features[w];
+        if (cpu->filtered_features[w]) {
+            rv = 1;
+        }
+    }
+
+    return rv;
+}
+
 #define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \
                            (env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \
                            (env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3)
@@ -3200,7 +3477,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
         return;
     }
 
-    x86_cpu_load_features(cpu, &local_err);
+    x86_cpu_expand_features(cpu, &local_err);
     if (local_err) {
         goto out;
     }
@@ -3619,7 +3896,9 @@ static void x86_cpu_initfn(Object *obj)
     object_property_add_alias(obj, "sse4_1", obj, "sse4.1", &error_abort);
     object_property_add_alias(obj, "sse4_2", obj, "sse4.2", &error_abort);
 
-    x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
+    if (xcc->cpu_def) {
+        x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
+    }
 }
 
 static int64_t x86_cpu_get_arch_id(CPUState *cs)
@@ -3774,6 +4053,24 @@ static const TypeInfo x86_cpu_type_info = {
     .class_init = x86_cpu_common_class_init,
 };
 
+
+/* "base" CPU model, used by query-cpu-model-expansion */
+static void x86_cpu_base_class_init(ObjectClass *oc, void *data)
+{
+    X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+    xcc->static_model = true;
+    xcc->migration_safe = true;
+    xcc->model_description = "base CPU model type with no features enabled";
+    xcc->ordering = 8;
+}
+
+static const TypeInfo x86_base_cpu_type_info = {
+        .name = X86_CPU_TYPE_NAME("base"),
+        .parent = TYPE_X86_CPU,
+        .class_init = x86_cpu_base_class_init,
+};
+
 static void x86_cpu_register_types(void)
 {
     int i;
@@ -3782,6 +4079,8 @@ static void x86_cpu_register_types(void)
     for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
         x86_register_cpudef_type(&builtin_x86_defs[i]);
     }
+    type_register_static(&max_x86_cpu_type_info);
+    type_register_static(&x86_base_cpu_type_info);
 #ifdef CONFIG_KVM
     type_register_static(&host_x86_cpu_type_info);
 #endif
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8df124f332..12a39d590f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1211,7 +1211,7 @@ struct X86CPU {
     bool enforce_cpuid;
     bool expose_kvm;
     bool migratable;
-    bool host_features;
+    bool max_features; /* Enable all supported features automatically */
     uint32_t apic_id;
 
     /* Enables publishing of TSC increment and Local APIC bus frequencies to
@@ -1417,6 +1417,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
 void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
 void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr);
 
 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c
index 66474ad98e..69ea33a5c2 100644
--- a/target/i386/fpu_helper.c
+++ b/target/i386/fpu_helper.c
@@ -1377,6 +1377,18 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
     }
 }
 
+#if defined(CONFIG_USER_ONLY)
+void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
+{
+    helper_fxsave(env, ptr);
+}
+
+void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
+{
+    helper_fxrstor(env, ptr);
+}
+#endif
+
 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
 {
     uintptr_t ra = GETPC();
diff --git a/target/ppc/Makefile.objs b/target/ppc/Makefile.objs
index a8c7a30cde..0057b319c0 100644
--- a/target/ppc/Makefile.objs
+++ b/target/ppc/Makefile.objs
@@ -1,8 +1,9 @@
 obj-y += cpu-models.o
+obj-y += cpu.o
 obj-y += translate.o
 ifeq ($(CONFIG_SOFTMMU),y)
-obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o
-obj-$(TARGET_PPC64) += mmu-hash64.o arch_dump.o compat.o
+obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o arch_dump.o
+obj-$(TARGET_PPC64) += mmu-hash64.o compat.o
 endif
 obj-$(CONFIG_KVM) += kvm.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 40282a1f50..28d9cc7d79 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -1,5 +1,5 @@
 /*
- * writing ELF notes for ppc64 arch
+ * writing ELF notes for ppc{64,} arch
  *
  *
  * Copyright IBM, Corp. 2013
@@ -19,36 +19,48 @@
 #include "sysemu/dump.h"
 #include "sysemu/kvm.h"
 
-struct PPC64UserRegStruct {
-    uint64_t gpr[32];
-    uint64_t nip;
-    uint64_t msr;
-    uint64_t orig_gpr3;
-    uint64_t ctr;
-    uint64_t link;
-    uint64_t xer;
-    uint64_t ccr;
-    uint64_t softe;
-    uint64_t trap;
-    uint64_t dar;
-    uint64_t dsisr;
-    uint64_t result;
+#ifdef TARGET_PPC64
+#define ELFCLASS ELFCLASS64
+#define cpu_to_dump_reg cpu_to_dump64
+typedef uint64_t reg_t;
+typedef Elf64_Nhdr Elf_Nhdr;
+#else
+#define ELFCLASS ELFCLASS32
+#define cpu_to_dump_reg cpu_to_dump32
+typedef uint32_t reg_t;
+typedef Elf32_Nhdr Elf_Nhdr;
+#endif /* TARGET_PPC64 */
+
+struct PPCUserRegStruct {
+    reg_t gpr[32];
+    reg_t nip;
+    reg_t msr;
+    reg_t orig_gpr3;
+    reg_t ctr;
+    reg_t link;
+    reg_t xer;
+    reg_t ccr;
+    reg_t softe;
+    reg_t trap;
+    reg_t dar;
+    reg_t dsisr;
+    reg_t result;
 } QEMU_PACKED;
 
-struct PPC64ElfPrstatus {
+struct PPCElfPrstatus {
     char pad1[112];
-    struct PPC64UserRegStruct pr_reg;
-    uint64_t pad2[4];
+    struct PPCUserRegStruct pr_reg;
+    reg_t pad2[4];
 } QEMU_PACKED;
 
 
-struct PPC64ElfFpregset {
+struct PPCElfFpregset {
     uint64_t fpr[32];
-    uint64_t fpscr;
+    reg_t fpscr;
 }  QEMU_PACKED;
 
 
-struct PPC64ElfVmxregset {
+struct PPCElfVmxregset {
     ppc_avr_t avr[32];
     ppc_avr_t vscr;
     union {
@@ -57,26 +69,26 @@ struct PPC64ElfVmxregset {
     } vrsave;
 }  QEMU_PACKED;
 
-struct PPC64ElfVsxregset {
+struct PPCElfVsxregset {
     uint64_t vsr[32];
 }  QEMU_PACKED;
 
-struct PPC64ElfSperegset {
+struct PPCElfSperegset {
     uint32_t evr[32];
     uint64_t spe_acc;
     uint32_t spe_fscr;
 }  QEMU_PACKED;
 
 typedef struct noteStruct {
-    Elf64_Nhdr hdr;
+    Elf_Nhdr hdr;
     char name[5];
     char pad3[3];
     union {
-        struct PPC64ElfPrstatus  prstatus;
-        struct PPC64ElfFpregset  fpregset;
-        struct PPC64ElfVmxregset vmxregset;
-        struct PPC64ElfVsxregset vsxregset;
-        struct PPC64ElfSperegset speregset;
+        struct PPCElfPrstatus  prstatus;
+        struct PPCElfFpregset  fpregset;
+        struct PPCElfVmxregset vmxregset;
+        struct PPCElfVsxregset vsxregset;
+        struct PPCElfSperegset speregset;
     } contents;
 } QEMU_PACKED Note;
 
@@ -85,12 +97,12 @@ typedef struct NoteFuncArg {
     DumpState *state;
 } NoteFuncArg;
 
-static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
     int i;
-    uint64_t cr;
-    struct PPC64ElfPrstatus *prstatus;
-    struct PPC64UserRegStruct *reg;
+    reg_t cr;
+    struct PPCElfPrstatus *prstatus;
+    struct PPCUserRegStruct *reg;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -101,25 +113,25 @@ static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
     reg = &prstatus->pr_reg;
 
     for (i = 0; i < 32; i++) {
-        reg->gpr[i] = cpu_to_dump64(s, cpu->env.gpr[i]);
+        reg->gpr[i] = cpu_to_dump_reg(s, cpu->env.gpr[i]);
     }
-    reg->nip = cpu_to_dump64(s, cpu->env.nip);
-    reg->msr = cpu_to_dump64(s, cpu->env.msr);
-    reg->ctr = cpu_to_dump64(s, cpu->env.ctr);
-    reg->link = cpu_to_dump64(s, cpu->env.lr);
-    reg->xer = cpu_to_dump64(s, cpu_read_xer(&cpu->env));
+    reg->nip = cpu_to_dump_reg(s, cpu->env.nip);
+    reg->msr = cpu_to_dump_reg(s, cpu->env.msr);
+    reg->ctr = cpu_to_dump_reg(s, cpu->env.ctr);
+    reg->link = cpu_to_dump_reg(s, cpu->env.lr);
+    reg->xer = cpu_to_dump_reg(s, cpu_read_xer(&cpu->env));
 
     cr = 0;
     for (i = 0; i < 8; i++) {
         cr |= (cpu->env.crf[i] & 15) << (4 * (7 - i));
     }
-    reg->ccr = cpu_to_dump64(s, cr);
+    reg->ccr = cpu_to_dump_reg(s, cr);
 }
 
-static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
     int i;
-    struct PPC64ElfFpregset  *fpregset;
+    struct PPCElfFpregset  *fpregset;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -131,13 +143,13 @@ static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
     for (i = 0; i < 32; i++) {
         fpregset->fpr[i] = cpu_to_dump64(s, cpu->env.fpr[i]);
     }
-    fpregset->fpscr = cpu_to_dump64(s, cpu->env.fpscr);
+    fpregset->fpscr = cpu_to_dump_reg(s, cpu->env.fpscr);
 }
 
-static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
     int i;
-    struct PPC64ElfVmxregset *vmxregset;
+    struct PPCElfVmxregset *vmxregset;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -164,10 +176,11 @@ static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
     }
     vmxregset->vscr.u32[3] = cpu_to_dump32(s, cpu->env.vscr);
 }
-static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+
+static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
     int i;
-    struct PPC64ElfVsxregset *vsxregset;
+    struct PPCElfVsxregset *vsxregset;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -179,9 +192,10 @@ static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
         vsxregset->vsr[i] = cpu_to_dump64(s, cpu->env.vsr[i]);
     }
 }
-static void ppc64_write_elf64_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+
+static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
-    struct PPC64ElfSperegset *speregset;
+    struct PPCElfSperegset *speregset;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -197,11 +211,11 @@ static const struct NoteFuncDescStruct {
     int contents_size;
     void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu);
 } note_func[] = {
-    {sizeof(((Note *)0)->contents.prstatus),  ppc64_write_elf64_prstatus},
-    {sizeof(((Note *)0)->contents.fpregset),  ppc64_write_elf64_fpregset},
-    {sizeof(((Note *)0)->contents.vmxregset), ppc64_write_elf64_vmxregset},
-    {sizeof(((Note *)0)->contents.vsxregset), ppc64_write_elf64_vsxregset},
-    {sizeof(((Note *)0)->contents.speregset), ppc64_write_elf64_speregset},
+    {sizeof(((Note *)0)->contents.prstatus),  ppc_write_elf_prstatus},
+    {sizeof(((Note *)0)->contents.fpregset),  ppc_write_elf_fpregset},
+    {sizeof(((Note *)0)->contents.vmxregset), ppc_write_elf_vmxregset},
+    {sizeof(((Note *)0)->contents.vsxregset), ppc_write_elf_vsxregset},
+    {sizeof(((Note *)0)->contents.speregset), ppc_write_elf_speregset},
     { 0, NULL}
 };
 
@@ -213,8 +227,9 @@ int cpu_get_dump_info(ArchDumpInfo *info,
     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 
-    info->d_machine = EM_PPC64;
-    info->d_class = ELFCLASS64;
+    info->d_machine = PPC_ELF_MACHINE;
+    info->d_class = ELFCLASS;
+
     if ((*pcc->interrupts_big_endian)(cpu)) {
         info->d_endian = ELFDATA2MSB;
     } else {
@@ -236,25 +251,19 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
     int note_head_size;
     const NoteFuncDesc *nf;
 
-    if (class != ELFCLASS64) {
-        return -1;
-    }
-    assert(machine == EM_PPC64);
-
-    note_head_size = sizeof(Elf64_Nhdr);
-
+    note_head_size = sizeof(Elf_Nhdr);
     for (nf = note_func; nf->note_contents_func; nf++) {
         elf_note_size = elf_note_size + note_head_size + name_size +
-                        nf->contents_size;
+            nf->contents_size;
     }
 
     return (elf_note_size) * nr_cpus;
 }
 
-static int ppc64_write_all_elf64_notes(const char *note_name,
-                                       WriteCoreDumpFunction f,
-                                       PowerPCCPU *cpu, int id,
-                                       void *opaque)
+static int ppc_write_all_elf_notes(const char *note_name,
+                                   WriteCoreDumpFunction f,
+                                   PowerPCCPU *cpu, int id,
+                                   void *opaque)
 {
     NoteFuncArg arg = { .state = opaque };
     int ret = -1;
@@ -282,5 +291,12 @@ int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
                                int cpuid, void *opaque)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
-    return ppc64_write_all_elf64_notes("CORE", f, cpu, cpuid, opaque);
+    return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
+}
+
+int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+                               int cpuid, void *opaque)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
 }
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
new file mode 100644
index 0000000000..28011668e7
--- /dev/null
+++ b/target/ppc/cpu.c
@@ -0,0 +1,47 @@
+/*
+ *  PowerPC CPU routines for qemu.
+ *
+ * Copyright (c) 2017 Nikunj A Dadhania, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpu-models.h"
+
+target_ulong cpu_read_xer(CPUPPCState *env)
+{
+    if (is_isa300(env)) {
+        return env->xer | (env->so << XER_SO) |
+            (env->ov << XER_OV) | (env->ca << XER_CA) |
+            (env->ov32 << XER_OV32) | (env->ca32 << XER_CA32);
+    }
+
+    return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) |
+        (env->ca << XER_CA);
+}
+
+void cpu_write_xer(CPUPPCState *env, target_ulong xer)
+{
+    env->so = (xer >> XER_SO) & 1;
+    env->ov = (xer >> XER_OV) & 1;
+    env->ca = (xer >> XER_CA) & 1;
+    /* write all the flags, while reading back check of isa300 */
+    env->ov32 = (xer >> XER_OV32) & 1;
+    env->ca32 = (xer >> XER_CA32) & 1;
+    env->xer = xer & ~((1ul << XER_SO) |
+                       (1ul << XER_OV) | (1ul << XER_CA) |
+                       (1ul << XER_OV32) | (1ul << XER_CA32));
+}
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 425e79d52d..d33c17e646 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -223,11 +223,12 @@ enum {
 typedef struct opc_handler_t opc_handler_t;
 
 /*****************************************************************************/
-/* Types used to describe some PowerPC registers */
+/* Types used to describe some PowerPC registers etc. */
 typedef struct DisasContext DisasContext;
 typedef struct ppc_spr_t ppc_spr_t;
 typedef union ppc_avr_t ppc_avr_t;
 typedef union ppc_tlb_t ppc_tlb_t;
+typedef struct ppc_hash_pte64 ppc_hash_pte64_t;
 
 /* SPR access micro-ops generations callbacks */
 struct ppc_spr_t {
@@ -305,14 +306,6 @@ union ppc_tlb_t {
 #define TLB_MAS                3
 #endif
 
-#define SDR_32_HTABORG         0xFFFF0000UL
-#define SDR_32_HTABMASK        0x000001FFUL
-
-#if defined(TARGET_PPC64)
-#define SDR_64_HTABORG         0xFFFFFFFFFFFC0000ULL
-#define SDR_64_HTABSIZE        0x000000000000001FULL
-#endif /* defined(TARGET_PPC64 */
-
 typedef struct ppc_slb_t ppc_slb_t;
 struct ppc_slb_t {
     uint64_t esid;
@@ -965,6 +958,8 @@ struct CPUPPCState {
     target_ulong so;
     target_ulong ov;
     target_ulong ca;
+    target_ulong ov32;
+    target_ulong ca32;
     /* Reservation address */
     target_ulong reserve_addr;
     /* Reservation value */
@@ -1005,12 +1000,7 @@ struct CPUPPCState {
     /* tcg TLB needs flush (deferred slb inval instruction typically) */
 #endif
     /* segment registers */
-    hwaddr htab_base;
-    /* mask used to normalize hash value to PTEG index */
-    hwaddr htab_mask;
     target_ulong sr[32];
-    /* externally stored hash table */
-    uint8_t *external_htab;
     /* BATs */
     uint32_t nb_BATs;
     target_ulong DBAT[2][8];
@@ -1218,6 +1208,14 @@ struct PPCVirtualHypervisor {
 struct PPCVirtualHypervisorClass {
     InterfaceClass parent;
     void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu);
+    hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp);
+    const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp,
+                                         hwaddr ptex, int n);
+    void (*unmap_hptes)(PPCVirtualHypervisor *vhyp,
+                        const ppc_hash_pte64_t *hptes,
+                        hwaddr ptex, int n);
+    void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                       uint64_t pte0, uint64_t pte1);
 };
 
 #define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor"
@@ -1243,6 +1241,8 @@ int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
 int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
                                int cpuid, void *opaque);
+int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+                               int cpuid, void *opaque);
 #ifndef CONFIG_USER_ONLY
 void ppc_cpu_do_system_reset(CPUState *cs);
 extern const struct VMStateDescription vmstate_ppc_cpu;
@@ -1300,8 +1300,7 @@ void store_booke_tcr (CPUPPCState *env, target_ulong val);
 void store_booke_tsr (CPUPPCState *env, target_ulong val);
 void ppc_tlb_invalidate_all (CPUPPCState *env);
 void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
-void cpu_ppc_set_papr(PowerPCCPU *cpu);
+void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
 #endif
 #endif
 
@@ -1372,11 +1371,15 @@ int ppc_compat_max_threads(PowerPCCPU *cpu);
 #define XER_SO  31
 #define XER_OV  30
 #define XER_CA  29
+#define XER_OV32  19
+#define XER_CA32  18
 #define XER_CMP  8
 #define XER_BC   0
 #define xer_so  (env->so)
 #define xer_ov  (env->ov)
 #define xer_ca  (env->ca)
+#define xer_ov32  (env->ov)
+#define xer_ca32  (env->ca)
 #define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
 #define xer_bc  ((env->xer >> XER_BC)  & 0x7F)
 
@@ -2343,18 +2346,9 @@ enum {
 
 /*****************************************************************************/
 
-static inline target_ulong cpu_read_xer(CPUPPCState *env)
-{
-    return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | (env->ca << XER_CA);
-}
-
-static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
-{
-    env->so = (xer >> XER_SO) & 1;
-    env->ov = (xer >> XER_OV) & 1;
-    env->ca = (xer >> XER_CA) & 1;
-    env->xer = xer & ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA));
-}
+#define is_isa300(ctx) (!!(ctx->insns_flags2 & PPC2_ISA300))
+target_ulong cpu_read_xer(CPUPPCState *env);
+void cpu_write_xer(CPUPPCState *env, target_ulong xer);
 
 static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index dd0a8929b3..da4e1a62c9 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -28,6 +28,15 @@
 /*****************************************************************************/
 /* Fixed point operations helpers */
 
+static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
+{
+    if (unlikely(ov)) {
+        env->so = env->ov = 1;
+    } else {
+        env->ov = 0;
+    }
+}
+
 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
                            uint32_t oe)
 {
@@ -49,11 +58,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
     }
 
     if (oe) {
-        if (unlikely(overflow)) {
-            env->so = env->ov = 1;
-        } else {
-            env->ov = 0;
-        }
+        helper_update_ov_legacy(env, overflow);
     }
 
     return (target_ulong)rt;
@@ -81,11 +86,7 @@ target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
     }
 
     if (oe) {
-        if (unlikely(overflow)) {
-            env->so = env->ov = 1;
-        } else {
-            env->ov = 0;
-        }
+        helper_update_ov_legacy(env, overflow);
     }
 
     return (target_ulong)rt;
@@ -105,11 +106,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
     }
 
     if (oe) {
-        if (unlikely(overflow)) {
-            env->so = env->ov = 1;
-        } else {
-            env->ov = 0;
-        }
+        helper_update_ov_legacy(env, overflow);
     }
 
     return rt;
@@ -127,12 +124,7 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
     }
 
     if (oe) {
-
-        if (unlikely(overflow)) {
-            env->so = env->ov = 1;
-        } else {
-            env->ov = 0;
-        }
+        helper_update_ov_legacy(env, overflow);
     }
 
     return rt;
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 52bbea514a..acc40ece65 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1251,7 +1251,7 @@ static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
         return ret;
     }
 
-    if (!env->external_htab) {
+    if (!cpu->vhyp) {
         ppc_store_sdr1(env, sregs.u.s.sdr1);
     }
 
@@ -2596,89 +2596,85 @@ void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-struct kvm_get_htab_buf {
-    struct kvm_get_htab_header header;
-    /*
-     * We require one extra byte for read
-     */
-    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
-};
-
-uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
+void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
 {
-    int htab_fd;
-    struct kvm_get_htab_fd ghf;
-    struct kvm_get_htab_buf  *hpte_buf;
+    struct kvm_get_htab_fd ghf = {
+        .flags = 0,
+        .start_index = ptex,
+    };
+    int fd, rc;
+    int i;
 
-    ghf.flags = 0;
-    ghf.start_index = pte_index;
-    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
-    if (htab_fd < 0) {
-        goto error_out;
+    fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+    if (fd < 0) {
+        hw_error("kvmppc_read_hptes: Unable to open HPT fd");
     }
 
-    hpte_buf = g_malloc0(sizeof(*hpte_buf));
-    /*
-     * Read the hpte group
-     */
-    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
-        goto out_close;
-    }
+    i = 0;
+    while (i < n) {
+        struct kvm_get_htab_header *hdr;
+        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
+        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
 
-    close(htab_fd);
-    return (uint64_t)(uintptr_t) hpte_buf->hpte;
+        rc = read(fd, buf, sizeof(buf));
+        if (rc < 0) {
+            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
+        }
 
-out_close:
-    g_free(hpte_buf);
-    close(htab_fd);
-error_out:
-    return 0;
-}
+        hdr = (struct kvm_get_htab_header *)buf;
+        while ((i < n) && ((char *)hdr < (buf + rc))) {
+            int invalid = hdr->n_invalid;
 
-void kvmppc_hash64_free_pteg(uint64_t token)
-{
-    struct kvm_get_htab_buf *htab_buf;
+            if (hdr->index != (ptex + i)) {
+                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
+                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
+            }
+
+            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
+            i += hdr->n_valid;
 
-    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
-                            hpte);
-    g_free(htab_buf);
-    return;
+            if ((n - i) < invalid) {
+                invalid = n - i;
+            }
+            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
+            i += hdr->n_invalid;
+
+            hdr = (struct kvm_get_htab_header *)
+                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
+        }
+    }
+
+    close(fd);
 }
 
-void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
-                             target_ulong pte0, target_ulong pte1)
+void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
 {
-    int htab_fd;
+    int fd, rc;
     struct kvm_get_htab_fd ghf;
-    struct kvm_get_htab_buf hpte_buf;
+    struct {
+        struct kvm_get_htab_header hdr;
+        uint64_t pte0;
+        uint64_t pte1;
+    } buf;
 
     ghf.flags = 0;
     ghf.start_index = 0;     /* Ignored */
-    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
-    if (htab_fd < 0) {
-        goto error_out;
-    }
-
-    hpte_buf.header.n_valid = 1;
-    hpte_buf.header.n_invalid = 0;
-    hpte_buf.header.index = pte_index;
-    hpte_buf.hpte[0] = pte0;
-    hpte_buf.hpte[1] = pte1;
-    /*
-     * Write the hpte entry.
-     * CAUTION: write() has the warn_unused_result attribute. Hence we
-     * need to check the return value, even though we do nothing.
-     */
-    if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
-        goto out_close;
+    fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+    if (fd < 0) {
+        hw_error("kvmppc_write_hpte: Unable to open HPT fd");
     }
 
-out_close:
-    close(htab_fd);
-    return;
+    buf.hdr.n_valid = 1;
+    buf.hdr.n_invalid = 0;
+    buf.hdr.index = ptex;
+    buf.pte0 = cpu_to_be64(pte0);
+    buf.pte1 = cpu_to_be64(pte1);
 
-error_out:
-    return;
+    rc = write(fd, &buf, sizeof(buf));
+    if (rc != sizeof(buf)) {
+        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
+    }
+    close(fd);
 }
 
 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 8da2ee418a..8e9f42d0c6 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -49,11 +49,8 @@ int kvmppc_get_htab_fd(bool write);
 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
                            uint16_t n_valid, uint16_t n_invalid);
-uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index);
-void kvmppc_hash64_free_pteg(uint64_t token);
-
-void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
-                             target_ulong pte0, target_ulong pte1);
+void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n);
+void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1);
 bool kvmppc_has_cap_fixup_hcalls(void);
 bool kvmppc_has_cap_htm(void);
 int kvmppc_enable_hwrng(void);
@@ -234,20 +231,13 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
     abort();
 }
 
-static inline uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu,
-                                               target_ulong pte_index)
-{
-    abort();
-}
-
-static inline void kvmppc_hash64_free_pteg(uint64_t token)
+static inline void kvmppc_read_hptes(ppc_hash_pte64_t *hptes,
+                                     hwaddr ptex, int n)
 {
     abort();
 }
 
-static inline void kvmppc_hash64_write_pte(CPUPPCState *env,
-                                           target_ulong pte_index,
-                                           target_ulong pte0, target_ulong pte1)
+static inline void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
 {
     abort();
 }
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index df9f7a4e05..6cb3a48db1 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -76,7 +76,7 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
         qemu_get_betls(f, &env->pb[i]);
     for (i = 0; i < 1024; i++)
         qemu_get_betls(f, &env->spr[i]);
-    if (!env->external_htab) {
+    if (!cpu->vhyp) {
         ppc_store_sdr1(env, sdr1);
     }
     qemu_get_be32s(f, &env->vscr);
@@ -228,8 +228,7 @@ static int cpu_post_load(void *opaque, int version_id)
         env->IBAT[1][i+4] = env->spr[SPR_IBAT4U + 2*i + 1];
     }
 
-    if (!env->external_htab) {
-        /* Restore htab_base and htab_mask variables */
+    if (!cpu->vhyp) {
         ppc_store_sdr1(env, env->spr[SPR_SDR1]);
     }
 
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index ab432bafaf..fa573dd7d2 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -82,11 +82,9 @@ void helper_store_sdr1(CPUPPCState *env, target_ulong val)
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
 
-    if (!env->external_htab) {
-        if (env->spr[SPR_SDR1] != val) {
-            ppc_store_sdr1(env, val);
-            tlb_flush(CPU(cpu));
-        }
+    if (env->spr[SPR_SDR1] != val) {
+        ppc_store_sdr1(env, val);
+        tlb_flush(CPU(cpu));
     }
 }
 
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 29bace622a..03ae3c1279 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -304,9 +304,9 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
 
 hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong mask = ppc_hash32_hpt_mask(cpu);
 
-    return (hash * HASH_PTEG_SIZE_32) & env->htab_mask;
+    return (hash * HASH_PTEG_SIZE_32) & mask;
 }
 
 static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
@@ -339,7 +339,6 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
                                      target_ulong sr, target_ulong eaddr,
                                      ppc_hash_pte32_t *pte)
 {
-    CPUPPCState *env = &cpu->env;
     hwaddr pteg_off, pte_offset;
     hwaddr hash;
     uint32_t vsid, pgidx, ptem;
@@ -353,21 +352,22 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
     qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
             " htab_mask " TARGET_FMT_plx
             " hash " TARGET_FMT_plx "\n",
-            env->htab_base, env->htab_mask, hash);
+            ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
 
     /* Primary PTEG lookup */
     qemu_log_mask(CPU_LOG_MMU, "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
             " vsid=%" PRIx32 " ptem=%" PRIx32
             " hash=" TARGET_FMT_plx "\n",
-            env->htab_base, env->htab_mask, vsid, ptem, hash);
+            ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu),
+            vsid, ptem, hash);
     pteg_off = get_pteg_offset32(cpu, hash);
     pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte);
     if (pte_offset == -1) {
         /* Secondary PTEG lookup */
         qemu_log_mask(CPU_LOG_MMU, "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
                 " vsid=%" PRIx32 " api=%" PRIx32
-                " hash=" TARGET_FMT_plx "\n", env->htab_base,
-                env->htab_mask, vsid, ptem, ~hash);
+                " hash=" TARGET_FMT_plx "\n", ppc_hash32_hpt_base(cpu),
+                ppc_hash32_hpt_mask(cpu), vsid, ptem, ~hash);
         pteg_off = get_pteg_offset32(cpu, ~hash);
         pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte);
     }
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 5b9fb08d1a..898021f0d8 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -44,6 +44,8 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
 /*
  * Hash page table definitions
  */
+#define SDR_32_HTABORG         0xFFFF0000UL
+#define SDR_32_HTABMASK        0x000001FFUL
 
 #define HPTES_PER_GROUP         8
 #define HASH_PTE_SIZE_32        8
@@ -65,42 +67,46 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
 #define HPTE32_R_WIMG           0x00000078
 #define HPTE32_R_PP             0x00000003
 
+static inline hwaddr ppc_hash32_hpt_base(PowerPCCPU *cpu)
+{
+    return cpu->env.spr[SPR_SDR1] & SDR_32_HTABORG;
+}
+
+static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
+{
+    return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF;
+}
+
 static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
                                                  hwaddr pte_offset)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong base = ppc_hash32_hpt_base(cpu);
 
-    assert(!env->external_htab); /* Not supported on 32-bit for now */
-    return ldl_phys(CPU(cpu)->as, env->htab_base + pte_offset);
+    return ldl_phys(CPU(cpu)->as, base + pte_offset);
 }
 
 static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
                                                  hwaddr pte_offset)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong base = ppc_hash32_hpt_base(cpu);
 
-    assert(!env->external_htab); /* Not supported on 32-bit for now */
-    return ldl_phys(CPU(cpu)->as,
-                    env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2);
+    return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
 }
 
 static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
                                           hwaddr pte_offset, target_ulong pte0)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong base = ppc_hash32_hpt_base(cpu);
 
-    assert(!env->external_htab); /* Not supported on 32-bit for now */
-    stl_phys(CPU(cpu)->as, env->htab_base + pte_offset, pte0);
+    stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
 }
 
 static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
                                           hwaddr pte_offset, target_ulong pte1)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong base = ppc_hash32_hpt_base(cpu);
 
-    assert(!env->external_htab); /* Not supported on 32-bit for now */
-    stl_phys(CPU(cpu)->as,
-             env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
+    stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
 }
 
 typedef struct {
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index 76669ed82c..d44f2bb432 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -27,6 +27,7 @@
 #include "kvm_ppc.h"
 #include "mmu-hash64.h"
 #include "exec/log.h"
+#include "hw/hw.h"
 
 //#define DEBUG_SLB
 
@@ -37,12 +38,6 @@
 #endif
 
 /*
- * Used to indicate that a CPU has its hash page table (HPT) managed
- * within the host kernel
- */
-#define MMU_HASH64_KVM_MANAGED_HPT      ((void *)-1)
-
-/*
  * SLB handling
  */
 
@@ -294,55 +289,6 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
     return rt;
 }
 
-/*
- * 64-bit hash table MMU handling
- */
-void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
-                         Error **errp)
-{
-    CPUPPCState *env = &cpu->env;
-    target_ulong htabsize = value & SDR_64_HTABSIZE;
-
-    env->spr[SPR_SDR1] = value;
-    if (htabsize > 28) {
-        error_setg(errp,
-                   "Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
-                   htabsize);
-        htabsize = 28;
-    }
-    env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1;
-    env->htab_base = value & SDR_64_HTABORG;
-}
-
-void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
-                                 Error **errp)
-{
-    CPUPPCState *env = &cpu->env;
-    Error *local_err = NULL;
-
-    if (hpt) {
-        env->external_htab = hpt;
-    } else {
-        env->external_htab = MMU_HASH64_KVM_MANAGED_HPT;
-    }
-    ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18),
-                        &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    /* Not strictly necessary, but makes it clearer that an external
-     * htab is in use when debugging */
-    env->htab_base = -1;
-
-    if (kvm_enabled()) {
-        if (kvmppc_put_books_sregs(cpu) < 0) {
-            error_setg(errp, "Unable to update SDR1 in KVM");
-        }
-    }
-}
-
 static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
                                ppc_slb_t *slb, ppc_hash_pte64_t pte)
 {
@@ -431,34 +377,43 @@ static int ppc_hash64_amr_prot(PowerPCCPU *cpu, ppc_hash_pte64_t pte)
     return prot;
 }
 
-uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index)
+const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
+                                             hwaddr ptex, int n)
 {
-    uint64_t token = 0;
-    hwaddr pte_offset;
+    hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+    hwaddr base = ppc_hash64_hpt_base(cpu);
+    hwaddr plen = n * HASH_PTE_SIZE_64;
+    const ppc_hash_pte64_t *hptes;
+
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        return vhc->map_hptes(cpu->vhyp, ptex, n);
+    }
 
-    pte_offset = pte_index * HASH_PTE_SIZE_64;
-    if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
-        /*
-         * HTAB is controlled by KVM. Fetch the PTEG into a new buffer.
-         */
-        token = kvmppc_hash64_read_pteg(cpu, pte_index);
-    } else if (cpu->env.external_htab) {
-        /*
-         * HTAB is controlled by QEMU. Just point to the internally
-         * accessible PTEG.
-         */
-        token = (uint64_t)(uintptr_t) cpu->env.external_htab + pte_offset;
-    } else if (cpu->env.htab_base) {
-        token = cpu->env.htab_base + pte_offset;
+    if (!base) {
+        return NULL;
     }
-    return token;
+
+    hptes = address_space_map(CPU(cpu)->as, base + pte_offset, &plen, false);
+    if (plen < (n * HASH_PTE_SIZE_64)) {
+        hw_error("%s: Unable to map all requested HPTEs\n", __func__);
+    }
+    return hptes;
 }
 
-void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token)
+void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
+                            hwaddr ptex, int n)
 {
-    if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
-        kvmppc_hash64_free_pteg(token);
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
+        return;
     }
+
+    address_space_unmap(CPU(cpu)->as, (void *)hptes, n * HASH_PTE_SIZE_64,
+                        false, n * HASH_PTE_SIZE_64);
 }
 
 static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps,
@@ -503,20 +458,19 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
                                      target_ulong ptem,
                                      ppc_hash_pte64_t *pte, unsigned *pshift)
 {
-    CPUPPCState *env = &cpu->env;
     int i;
-    uint64_t token;
+    const ppc_hash_pte64_t *pteg;
     target_ulong pte0, pte1;
-    target_ulong pte_index;
+    target_ulong ptex;
 
-    pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP;
-    token = ppc_hash64_start_access(cpu, pte_index);
-    if (!token) {
+    ptex = (hash & ppc_hash64_hpt_mask(cpu)) * HPTES_PER_GROUP;
+    pteg = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+    if (!pteg) {
         return -1;
     }
     for (i = 0; i < HPTES_PER_GROUP; i++) {
-        pte0 = ppc_hash64_load_hpte0(cpu, token, i);
-        pte1 = ppc_hash64_load_hpte1(cpu, token, i);
+        pte0 = ppc_hash64_hpte0(cpu, pteg, i);
+        pte1 = ppc_hash64_hpte1(cpu, pteg, i);
 
         /* This compares V, B, H (secondary) and the AVPN */
         if (HPTE64_V_COMPARE(pte0, ptem)) {
@@ -536,11 +490,11 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
              */
             pte->pte0 = pte0;
             pte->pte1 = pte1;
-            ppc_hash64_stop_access(cpu, token);
-            return (pte_index + i) * HASH_PTE_SIZE_64;
+            ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP);
+            return ptex + i;
         }
     }
-    ppc_hash64_stop_access(cpu, token);
+    ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP);
     /*
      * We didn't find a valid entry.
      */
@@ -552,8 +506,7 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
                                      ppc_hash_pte64_t *pte, unsigned *pshift)
 {
     CPUPPCState *env = &cpu->env;
-    hwaddr pte_offset;
-    hwaddr hash;
+    hwaddr hash, ptex;
     uint64_t vsid, epnmask, epn, ptem;
     const struct ppc_one_seg_page_size *sps = slb->sps;
 
@@ -588,29 +541,30 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
     qemu_log_mask(CPU_LOG_MMU,
             "htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
             " hash " TARGET_FMT_plx "\n",
-            env->htab_base, env->htab_mask, hash);
+            ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu), hash);
 
     /* Primary PTEG lookup */
     qemu_log_mask(CPU_LOG_MMU,
             "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
             " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
             " hash=" TARGET_FMT_plx "\n",
-            env->htab_base, env->htab_mask, vsid, ptem,  hash);
-    pte_offset = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
+            ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu),
+            vsid, ptem,  hash);
+    ptex = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
 
-    if (pte_offset == -1) {
+    if (ptex == -1) {
         /* Secondary PTEG lookup */
         ptem |= HPTE64_V_SECONDARY;
         qemu_log_mask(CPU_LOG_MMU,
                 "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
                 " vsid=" TARGET_FMT_lx " api=" TARGET_FMT_lx
-                " hash=" TARGET_FMT_plx "\n", env->htab_base,
-                env->htab_mask, vsid, ptem, ~hash);
+                " hash=" TARGET_FMT_plx "\n", ppc_hash64_hpt_base(cpu),
+                ppc_hash64_hpt_mask(cpu), vsid, ptem, ~hash);
 
-        pte_offset = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
+        ptex = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
     }
 
-    return pte_offset;
+    return ptex;
 }
 
 unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
@@ -708,7 +662,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
     CPUPPCState *env = &cpu->env;
     ppc_slb_t *slb;
     unsigned apshift;
-    hwaddr pte_offset;
+    hwaddr ptex;
     ppc_hash_pte64_t pte;
     int pp_prot, amr_prot, prot;
     uint64_t new_pte1, dsisr;
@@ -792,8 +746,8 @@ skip_slb_search:
     }
 
     /* 4. Locate the PTE in the hash table */
-    pte_offset = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
-    if (pte_offset == -1) {
+    ptex = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
+    if (ptex == -1) {
         dsisr = 0x40000000;
         if (rwx == 2) {
             ppc_hash64_set_isi(cs, env, dsisr);
@@ -806,7 +760,7 @@ skip_slb_search:
         return 1;
     }
     qemu_log_mask(CPU_LOG_MMU,
-                "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
+                  "found PTE at index %08" HWADDR_PRIx "\n", ptex);
 
     /* 5. Check access permissions */
 
@@ -849,8 +803,7 @@ skip_slb_search:
     }
 
     if (new_pte1 != pte.pte1) {
-        ppc_hash64_store_hpte(cpu, pte_offset / HASH_PTE_SIZE_64,
-                              pte.pte0, new_pte1);
+        ppc_hash64_store_hpte(cpu, ptex, pte.pte0, new_pte1);
     }
 
     /* 7. Determine the real address from the PTE */
@@ -867,7 +820,7 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
 {
     CPUPPCState *env = &cpu->env;
     ppc_slb_t *slb;
-    hwaddr pte_offset, raddr;
+    hwaddr ptex, raddr;
     ppc_hash_pte64_t pte;
     unsigned apshift;
 
@@ -900,8 +853,8 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
         }
     }
 
-    pte_offset = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
-    if (pte_offset == -1) {
+    ptex = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
+    if (ptex == -1) {
         return -1;
     }
 
@@ -909,30 +862,24 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
         & TARGET_PAGE_MASK;
 }
 
-void ppc_hash64_store_hpte(PowerPCCPU *cpu,
-                           target_ulong pte_index,
-                           target_ulong pte0, target_ulong pte1)
+void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+                           uint64_t pte0, uint64_t pte1)
 {
-    CPUPPCState *env = &cpu->env;
+    hwaddr base = ppc_hash64_hpt_base(cpu);
+    hwaddr offset = ptex * HASH_PTE_SIZE_64;
 
-    if (env->external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
-        kvmppc_hash64_write_pte(env, pte_index, pte0, pte1);
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        vhc->store_hpte(cpu->vhyp, ptex, pte0, pte1);
         return;
     }
 
-    pte_index *= HASH_PTE_SIZE_64;
-    if (env->external_htab) {
-        stq_p(env->external_htab + pte_index, pte0);
-        stq_p(env->external_htab + pte_index + HASH_PTE_SIZE_64 / 2, pte1);
-    } else {
-        stq_phys(CPU(cpu)->as, env->htab_base + pte_index, pte0);
-        stq_phys(CPU(cpu)->as,
-                 env->htab_base + pte_index + HASH_PTE_SIZE_64 / 2, pte1);
-    }
+    stq_phys(CPU(cpu)->as, base + offset, pte0);
+    stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1);
 }
 
-void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
-                               target_ulong pte_index,
+void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
                                target_ulong pte0, target_ulong pte1)
 {
     /*
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 7a0b7fca41..54f1e37655 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -10,8 +10,8 @@ int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
 hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
 int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
                                 int mmu_idx);
-void ppc_hash64_store_hpte(PowerPCCPU *cpu, target_ulong index,
-                           target_ulong pte0, target_ulong pte1);
+void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+                           uint64_t pte0, uint64_t pte1);
 void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
                                target_ulong pte_index,
                                target_ulong pte0, target_ulong pte1);
@@ -56,6 +56,9 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
  * Hash page table definitions
  */
 
+#define SDR_64_HTABORG         0x0FFFFFFFFFFC0000ULL
+#define SDR_64_HTABSIZE        0x000000000000001FULL
+
 #define HPTES_PER_GROUP         8
 #define HASH_PTE_SIZE_64        16
 #define HASH_PTEG_SIZE_64       (HASH_PTE_SIZE_64 * HPTES_PER_GROUP)
@@ -91,45 +94,41 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
 #define HPTE64_V_1TB_SEG        0x4000000000000000ULL
 #define HPTE64_V_VRMA_MASK      0x4001ffffff000000ULL
 
-void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
-                         Error **errp);
-void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
-                                 Error **errp);
-
-uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index);
-void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token);
-
-static inline target_ulong ppc_hash64_load_hpte0(PowerPCCPU *cpu,
-                                                 uint64_t token, int index)
+static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
 {
-    CPUPPCState *env = &cpu->env;
-    uint64_t addr;
-
-    addr = token + (index * HASH_PTE_SIZE_64);
-    if (env->external_htab) {
-        return  ldq_p((const void *)(uintptr_t)addr);
-    } else {
-        return ldq_phys(CPU(cpu)->as, addr);
-    }
+    return cpu->env.spr[SPR_SDR1] & SDR_64_HTABORG;
 }
 
-static inline target_ulong ppc_hash64_load_hpte1(PowerPCCPU *cpu,
-                                                 uint64_t token, int index)
+static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
 {
-    CPUPPCState *env = &cpu->env;
-    uint64_t addr;
-
-    addr = token + (index * HASH_PTE_SIZE_64) + HASH_PTE_SIZE_64/2;
-    if (env->external_htab) {
-        return  ldq_p((const void *)(uintptr_t)addr);
-    } else {
-        return ldq_phys(CPU(cpu)->as, addr);
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        return vhc->hpt_mask(cpu->vhyp);
     }
+    return (1ULL << ((cpu->env.spr[SPR_SDR1] & SDR_64_HTABSIZE) + 18 - 7)) - 1;
 }
 
-typedef struct {
+struct ppc_hash_pte64 {
     uint64_t pte0, pte1;
-} ppc_hash_pte64_t;
+};
+
+const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
+                                             hwaddr ptex, int n);
+void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
+                            hwaddr ptex, int n);
+
+static inline uint64_t ppc_hash64_hpte0(PowerPCCPU *cpu,
+                                        const ppc_hash_pte64_t *hptes, int i)
+{
+    return ldq_p(&(hptes[i].pte0));
+}
+
+static inline uint64_t ppc_hash64_hpte1(PowerPCCPU *cpu,
+                                        const ppc_hash_pte64_t *hptes, int i)
+{
+    return ldq_p(&(hptes[i].pte1));
+}
 
 #endif /* CONFIG_USER_ONLY */
 
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index eb2d482ef7..a1af3d6bf2 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -28,6 +28,7 @@
 #include "exec/cpu_ldst.h"
 #include "exec/log.h"
 #include "helper_regs.h"
+#include "qemu/error-report.h"
 
 //#define DEBUG_MMU
 //#define DEBUG_BATS
@@ -466,6 +467,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
 static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
                                       target_ulong eaddr, int rw, int type)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     hwaddr hash;
     target_ulong vsid;
     int ds, pr, target_page_bits;
@@ -503,7 +505,7 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
             qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
                     " htab_mask " TARGET_FMT_plx
                     " hash " TARGET_FMT_plx "\n",
-                    env->htab_base, env->htab_mask, hash);
+                    ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
             ctx->hash[0] = hash;
             ctx->hash[1] = ~hash;
 
@@ -518,9 +520,11 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
                 uint32_t a0, a1, a2, a3;
 
                 qemu_log("Page table: " TARGET_FMT_plx " len " TARGET_FMT_plx
-                         "\n", env->htab_base, env->htab_mask + 0x80);
-                for (curaddr = env->htab_base;
-                     curaddr < (env->htab_base + env->htab_mask + 0x80);
+                         "\n", ppc_hash32_hpt_base(cpu),
+                         ppc_hash32_hpt_mask(env) + 0x80);
+                for (curaddr = ppc_hash32_hpt_base(cpu);
+                     curaddr < (ppc_hash32_hpt_base(cpu)
+                                + ppc_hash32_hpt_mask(cpu) + 0x80);
                      curaddr += 16) {
                     a0 = ldl_phys(cs->as, curaddr);
                     a1 = ldl_phys(cs->as, curaddr + 4);
@@ -1205,12 +1209,13 @@ static void mmu6xx_dump_BATs(FILE *f, fprintf_function cpu_fprintf,
 static void mmu6xx_dump_mmu(FILE *f, fprintf_function cpu_fprintf,
                             CPUPPCState *env)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc6xx_tlb_t *tlb;
     target_ulong sr;
     int type, way, entry, i;
 
-    cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", env->htab_base);
-    cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", env->htab_mask);
+    cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_base(cpu));
+    cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_mask(cpu));
 
     cpu_fprintf(f, "\nSegment registers:\n");
     for (i = 0; i < 32; i++) {
@@ -1592,9 +1597,9 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
                     env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem;
                 tlb_miss:
                     env->error_code |= ctx.key << 19;
-                    env->spr[SPR_HASH1] = env->htab_base +
+                    env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +
                         get_pteg_offset32(cpu, ctx.hash[0]);
-                    env->spr[SPR_HASH2] = env->htab_base +
+                    env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) +
                         get_pteg_offset32(cpu, ctx.hash[1]);
                     break;
                 case POWERPC_MMU_SOFT_74xx:
@@ -1997,26 +2002,28 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
 /* Special registers manipulation */
 void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
-    assert(!env->external_htab);
-    env->spr[SPR_SDR1] = value;
+    assert(!cpu->vhyp);
 #if defined(TARGET_PPC64)
     if (env->mmu_model & POWERPC_MMU_64) {
-        PowerPCCPU *cpu = ppc_env_get_cpu(env);
-        Error *local_err = NULL;
+        target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
+        target_ulong htabsize = value & SDR_64_HTABSIZE;
 
-        ppc_hash64_set_sdr1(cpu, value, &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-            error_free(local_err);
+        if (value & ~sdr_mask) {
+            error_report("Invalid bits 0x"TARGET_FMT_lx" set in SDR1",
+                         value & ~sdr_mask);
+            value &= sdr_mask;
+        }
+        if (htabsize > 28) {
+            error_report("Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
+                         htabsize);
+            return;
         }
-    } else
-#endif /* defined(TARGET_PPC64) */
-    {
-        /* FIXME: Should check for valid HTABMASK values */
-        env->htab_mask = ((value & SDR_32_HTABMASK) << 16) | 0xFFFF;
-        env->htab_base = value & SDR_32_HTABORG;
     }
+#endif /* defined(TARGET_PPC64) */
+    /* FIXME: Should check for valid HTABMASK values in 32-bit case */
+    env->spr[SPR_SDR1] = value;
 }
 
 /* Segment registers load and store */
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 3ba2616b8a..6e6868b7a0 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -71,7 +71,7 @@ static TCGv cpu_lr;
 #if defined(TARGET_PPC64)
 static TCGv cpu_cfar;
 #endif
-static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca;
+static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
 static TCGv cpu_reserve;
 static TCGv cpu_fpscr;
 static TCGv_i32 cpu_access_type;
@@ -173,6 +173,10 @@ void ppc_translate_init(void)
                                 offsetof(CPUPPCState, ov), "OV");
     cpu_ca = tcg_global_mem_new(cpu_env,
                                 offsetof(CPUPPCState, ca), "CA");
+    cpu_ov32 = tcg_global_mem_new(cpu_env,
+                                  offsetof(CPUPPCState, ov32), "OV32");
+    cpu_ca32 = tcg_global_mem_new(cpu_env,
+                                  offsetof(CPUPPCState, ca32), "CA32");
 
     cpu_reserve = tcg_global_mem_new(cpu_env,
                                      offsetof(CPUPPCState, reserve_addr),
@@ -806,12 +810,40 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
     }
     tcg_temp_free(t0);
     if (NARROW_MODE(ctx)) {
-        tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
+        tcg_gen_extract_tl(cpu_ov, cpu_ov, 31, 1);
+        if (is_isa300(ctx)) {
+            tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+        }
+    } else {
+        if (is_isa300(ctx)) {
+            tcg_gen_extract_tl(cpu_ov32, cpu_ov, 31, 1);
+        }
+        tcg_gen_extract_tl(cpu_ov, cpu_ov, 63, 1);
     }
-    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
     tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }
 
+static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
+                                             TCGv res, TCGv arg0, TCGv arg1,
+                                             int sub)
+{
+    TCGv t0;
+
+    if (!is_isa300(ctx)) {
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    if (sub) {
+        tcg_gen_eqv_tl(t0, arg0, arg1);
+    } else {
+        tcg_gen_xor_tl(t0, arg0, arg1);
+    }
+    tcg_gen_xor_tl(t0, t0, res);
+    tcg_gen_extract_tl(cpu_ca32, t0, 32, 1);
+    tcg_temp_free(t0);
+}
+
 /* Common add function */
 static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
                                     TCGv arg2, bool add_ca, bool compute_ca,
@@ -838,6 +870,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
             tcg_temp_free(t1);
             tcg_gen_shri_tl(cpu_ca, cpu_ca, 32);   /* extract bit 32 */
             tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+            if (is_isa300(ctx)) {
+                tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+            }
         } else {
             TCGv zero = tcg_const_tl(0);
             if (add_ca) {
@@ -846,6 +881,7 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
             } else {
                 tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero);
             }
+            gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 0);
             tcg_temp_free(zero);
         }
     } else {
@@ -985,6 +1021,9 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
     if (compute_ov) {
         tcg_gen_extu_i32_tl(cpu_ov, t2);
+        if (is_isa300(ctx)) {
+            tcg_gen_extu_i32_tl(cpu_ov32, t2);
+        }
         tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
     tcg_temp_free_i32(t0);
@@ -1056,6 +1095,9 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
     if (compute_ov) {
         tcg_gen_mov_tl(cpu_ov, t2);
+        if (is_isa300(ctx)) {
+            tcg_gen_mov_tl(cpu_ov32, t2);
+        }
         tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
     tcg_temp_free_i64(t0);
@@ -1074,10 +1116,10 @@ static void glue(gen_, name)(DisasContext *ctx)
                       cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],     \
                       sign, compute_ov);                                      \
 }
-/* divwu  divwu.  divwuo  divwuo.   */
+/* divdu  divdu.  divduo  divduo.   */
 GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0);
 GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1);
-/* divw  divw.  divwo  divwo.   */
+/* divd  divd.  divdo  divdo.   */
 GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0);
 GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
 
@@ -1249,6 +1291,9 @@ static void gen_mullwo(DisasContext *ctx)
     tcg_gen_sari_i32(t0, t0, 31);
     tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
     tcg_gen_extu_i32_tl(cpu_ov, t0);
+    if (is_isa300(ctx)) {
+        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+    }
     tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 
     tcg_temp_free_i32(t0);
@@ -1310,6 +1355,9 @@ static void gen_mulldo(DisasContext *ctx)
 
     tcg_gen_sari_i64(t0, t0, 63);
     tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+    if (is_isa300(ctx)) {
+        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+    }
     tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 
     tcg_temp_free_i64(t0);
@@ -1353,17 +1401,22 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
             tcg_temp_free(t1);
             tcg_gen_shri_tl(cpu_ca, cpu_ca, 32);    /* extract bit 32 */
             tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+            if (is_isa300(ctx)) {
+                tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+            }
         } else if (add_ca) {
             TCGv zero, inv1 = tcg_temp_new();
             tcg_gen_not_tl(inv1, arg1);
             zero = tcg_const_tl(0);
             tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero);
             tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero);
+            gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, 0);
             tcg_temp_free(zero);
             tcg_temp_free(inv1);
         } else {
             tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1);
             tcg_gen_sub_tl(t0, arg2, arg1);
+            gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 1);
         }
     } else if (add_ca) {
         /* Since we're ignoring carry-out, we can simplify the
@@ -1442,7 +1495,10 @@ static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov)
 
 static void gen_neg(DisasContext *ctx)
 {
-    gen_op_arith_neg(ctx, 0);
+    tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
+    }
 }
 
 static void gen_nego(DisasContext *ctx)
@@ -3703,7 +3759,7 @@ static void gen_tdi(DisasContext *ctx)
 
 /***                          Processor control                            ***/
 
-static void gen_read_xer(TCGv dst)
+static void gen_read_xer(DisasContext *ctx, TCGv dst)
 {
     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();
@@ -3715,6 +3771,12 @@ static void gen_read_xer(TCGv dst)
     tcg_gen_or_tl(t0, t0, t1);
     tcg_gen_or_tl(dst, dst, t2);
     tcg_gen_or_tl(dst, dst, t0);
+    if (is_isa300(ctx)) {
+        tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32);
+        tcg_gen_or_tl(dst, dst, t0);
+        tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32);
+        tcg_gen_or_tl(dst, dst, t0);
+    }
     tcg_temp_free(t0);
     tcg_temp_free(t1);
     tcg_temp_free(t2);
@@ -3722,14 +3784,16 @@ static void gen_read_xer(TCGv dst)
 
 static void gen_write_xer(TCGv src)
 {
+    /* Write all flags, while reading back check for isa300 */
     tcg_gen_andi_tl(cpu_xer, src,
-                    ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA)));
-    tcg_gen_shri_tl(cpu_so, src, XER_SO);
-    tcg_gen_shri_tl(cpu_ov, src, XER_OV);
-    tcg_gen_shri_tl(cpu_ca, src, XER_CA);
-    tcg_gen_andi_tl(cpu_so, cpu_so, 1);
-    tcg_gen_andi_tl(cpu_ov, cpu_ov, 1);
-    tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+                    ~((1u << XER_SO) |
+                      (1u << XER_OV) | (1u << XER_OV32) |
+                      (1u << XER_CA) | (1u << XER_CA32)));
+    tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1);
+    tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1);
+    tcg_gen_extract_tl(cpu_so, src, XER_SO, 1);
+    tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1);
+    tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
 }
 
 /* mcrxr */
@@ -3755,6 +3819,28 @@ static void gen_mcrxr(DisasContext *ctx)
     tcg_gen_movi_tl(cpu_ca, 0);
 }
 
+#ifdef TARGET_PPC64
+/* mcrxrx */
+static void gen_mcrxrx(DisasContext *ctx)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
+
+    /* copy OV and OV32 */
+    tcg_gen_shli_tl(t0, cpu_ov, 1);
+    tcg_gen_or_tl(t0, t0, cpu_ov32);
+    tcg_gen_shli_tl(t0, t0, 2);
+    /* copy CA and CA32 */
+    tcg_gen_shli_tl(t1, cpu_ca, 1);
+    tcg_gen_or_tl(t1, t1, cpu_ca32);
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_gen_trunc_tl_i32(dst, t0);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+#endif
+
 /* mfcr mfocrf */
 static void gen_mfcr(DisasContext *ctx)
 {
@@ -6424,6 +6510,7 @@ GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x00000801, PPC_MISC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
 GEN_HANDLER_E(setb, 0x1F, 0x00, 0x04, 0x0003F801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(mcrxrx, 0x1F, 0x00, 0x12, 0x007FF801, PPC_NONE, PPC2_ISA300),
 #endif
 GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
 GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000000, PPC_MISC),
diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
index be35cbd3a2..37f74be984 100644
--- a/target/ppc/translate_init.c
+++ b/target/ppc/translate_init.c
@@ -107,7 +107,7 @@ static void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
 /* XER */
 static void spr_read_xer (DisasContext *ctx, int gprn, int sprn)
 {
-    gen_read_xer(cpu_gpr[gprn]);
+    gen_read_xer(ctx, cpu_gpr[gprn]);
 }
 
 static void spr_write_xer (DisasContext *ctx, int sprn, int gprn)
@@ -740,10 +740,22 @@ static void gen_spr_ne_601 (CPUPPCState *env)
                  &spr_read_decr, &spr_write_decr,
                  0x00000000);
     /* Memory management */
-    spr_register(env, SPR_SDR1, "SDR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_sdr1,
-                 0x00000000);
+#ifndef CONFIG_USER_ONLY
+    if (env->has_hv_mode) {
+        /* SDR1 is a hypervisor resource on CPUs which have a
+         * hypervisor mode */
+        spr_register_hv(env, SPR_SDR1, "SDR1",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_sdr1,
+                        0x00000000);
+    } else {
+        spr_register(env, SPR_SDR1, "SDR1",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_sdr1,
+                     0x00000000);
+    }
+#endif
 }
 
 /* BATs 0-3 */
@@ -8835,18 +8847,14 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
 }
 
 #if !defined(CONFIG_USER_ONLY)
-
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
-{
-    cpu->vhyp = vhyp;
-}
-
-void cpu_ppc_set_papr(PowerPCCPU *cpu)
+void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
 {
     CPUPPCState *env = &cpu->env;
     ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR];
     ppc_spr_t *amor = &env->spr_cb[SPR_AMOR];
 
+    cpu->vhyp = vhyp;
+
     /* PAPR always has exception vectors in RAM not ROM. To ensure this,
      * MSR[IP] should never be set.
      *
@@ -10489,11 +10497,12 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
 #else
     cc->get_phys_page_debug = ppc_cpu_get_phys_page_debug;
     cc->vmsd = &vmstate_ppc_cpu;
-#if defined(TARGET_PPC64)
-    cc->write_elf64_note = ppc64_cpu_write_elf64_note;
-#endif
 #endif
     cc->cpu_exec_enter = ppc_cpu_exec_enter;
+#if defined(CONFIG_SOFTMMU)
+    cc->write_elf64_note = ppc64_cpu_write_elf64_note;
+    cc->write_elf32_note = ppc32_cpu_write_elf32_note;
+#endif
 
     cc->gdb_num_core_regs = 71;
 
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 5b66d3325d..2a894eec65 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -671,7 +671,7 @@ static S390CPUModel *get_max_cpu_model(Error **errp)
     if (kvm_enabled()) {
         kvm_s390_get_host_cpu_model(&max_model, errp);
     } else {
-        /* TCG enulates a z900 */
+        /* TCG emulates a z900 */
         max_model.def = &s390_cpu_defs[0];
         bitmap_copy(max_model.features, max_model.def->default_feat,
                     S390_FEAT_MAX);
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 655060cd9a..aa6734d54e 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -2448,8 +2448,31 @@ static void gen_ldstub_asi(DisasContext *dc, TCGv dst, TCGv addr, int insn)
         gen_ldstub(dc, dst, addr, da.mem_idx);
         break;
     default:
-        /* ??? Should be DAE_invalid_asi.  */
-        gen_exception(dc, TT_DATA_ACCESS);
+        /* ??? In theory, this should be raise DAE_invalid_asi.
+           But the SS-20 roms do ldstuba [%l0] #ASI_M_CTL, %o1.  */
+        if (parallel_cpus) {
+            gen_helper_exit_atomic(cpu_env);
+        } else {
+            TCGv_i32 r_asi = tcg_const_i32(da.asi);
+            TCGv_i32 r_mop = tcg_const_i32(MO_UB);
+            TCGv_i64 s64, t64;
+
+            save_state(dc);
+            t64 = tcg_temp_new_i64();
+            gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_mop);
+
+            s64 = tcg_const_i64(0xff);
+            gen_helper_st_asi(cpu_env, addr, s64, r_asi, r_mop);
+            tcg_temp_free_i64(s64);
+            tcg_temp_free_i32(r_mop);
+            tcg_temp_free_i32(r_asi);
+
+            tcg_gen_trunc_i64_tl(dst, t64);
+            tcg_temp_free_i64(t64);
+
+            /* End the TB.  */
+            dc->npc = DYNAMIC_PC;
+        }
         break;
     }
 }
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 6d227a5a6a..290de6dae6 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
+static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
                            TCGArg b, bool b_const, TCGLabel *l)
 {
     intptr_t offset;
@@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
     }
 }
 
-static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
+static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
                                    TCGReg rh, TCGReg al, TCGReg ah,
                                    tcg_target_long bl, tcg_target_long bh,
                                    bool const_bl, bool const_bh, bool sub)
diff --git a/tests/Makefile.include b/tests/Makefile.include
index e60bb6ce58..364ef1bd23 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -308,8 +308,7 @@ check-qtest-sparc-y = tests/prom-env-test$(EXESUF)
 check-qtest-sparc64-y = tests/endianness-test$(EXESUF)
 #check-qtest-sparc64-y += tests/m48t59-test$(EXESUF)
 #gcov-files-sparc64-y += hw/timer/m48t59.c
-#Disabled for now, triggers a TCG bug on 32-bit hosts
-#check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
+check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
 
 check-qtest-arm-y = tests/tmp105-test$(EXESUF)
 check-qtest-arm-y += tests/ds1338-test$(EXESUF)
@@ -670,7 +669,7 @@ tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
 tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
-	tests/boot-sector.o $(libqos-obj-y)
+	tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y)
 tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y)
 tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
 tests/ds1338-test$(EXESUF): tests/ds1338-test.o $(libqos-imx-obj-y)
diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT
index d11567c3dc..0dccad439b 100644
--- a/tests/acpi-test-data/q35/DSDT
+++ b/tests/acpi-test-data/q35/DSDT
Binary files differdiff --git a/tests/acpi-test-data/q35/DSDT.bridge b/tests/acpi-test-data/q35/DSDT.bridge
index 412a6e9104..8cd66c3b31 100644
--- a/tests/acpi-test-data/q35/DSDT.bridge
+++ b/tests/acpi-test-data/q35/DSDT.bridge
Binary files differdiff --git a/tests/acpi-test-data/q35/DSDT.cphp b/tests/acpi-test-data/q35/DSDT.cphp
index 79902d0d30..3c28a17a69 100644
--- a/tests/acpi-test-data/q35/DSDT.cphp
+++ b/tests/acpi-test-data/q35/DSDT.cphp
Binary files differdiff --git a/tests/acpi-test-data/q35/DSDT.ipmibt b/tests/acpi-test-data/q35/DSDT.ipmibt
index b658329c5b..3ceb876127 100644
--- a/tests/acpi-test-data/q35/DSDT.ipmibt
+++ b/tests/acpi-test-data/q35/DSDT.ipmibt
Binary files differdiff --git a/tests/acpi-test-data/q35/DSDT.memhp b/tests/acpi-test-data/q35/DSDT.memhp
index e46c1fb5a2..bdbefd47a5 100644
--- a/tests/acpi-test-data/q35/DSDT.memhp
+++ b/tests/acpi-test-data/q35/DSDT.memhp
Binary files differdiff --git a/tests/acpi-utils.c b/tests/acpi-utils.c
new file mode 100644
index 0000000000..41dc1ea9b4
--- /dev/null
+++ b/tests/acpi-utils.c
@@ -0,0 +1,65 @@
+/*
+ * ACPI Utility Functions
+ *
+ * Copyright (c) 2013 Red Hat Inc.
+ * Copyright (c) 2017 Skyport Systems
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>,
+ *  Ben Warren <ben@skyportsystems.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <glib/gstdio.h>
+#include "qemu-common.h"
+#include "hw/smbios/smbios.h"
+#include "qemu/bitmap.h"
+#include "acpi-utils.h"
+#include "boot-sector.h"
+
+uint8_t acpi_calc_checksum(const uint8_t *data, int len)
+{
+    int i;
+    uint8_t sum = 0;
+
+    for (i = 0; i < len; i++) {
+        sum += data[i];
+    }
+
+    return sum;
+}
+
+uint32_t acpi_find_rsdp_address(void)
+{
+    uint32_t off;
+
+    /* RSDP location can vary across a narrow range */
+    for (off = 0xf0000; off < 0x100000; off += 0x10) {
+        uint8_t sig[] = "RSD PTR ";
+        int i;
+
+        for (i = 0; i < sizeof sig - 1; ++i) {
+            sig[i] = readb(off + i);
+        }
+
+        if (!memcmp(sig, "RSD PTR ", sizeof sig)) {
+            break;
+        }
+    }
+    return off;
+}
+
+void acpi_parse_rsdp_table(uint32_t addr, AcpiRsdpDescriptor *rsdp_table)
+{
+    ACPI_READ_FIELD(rsdp_table->signature, addr);
+    ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR ");
+
+    ACPI_READ_FIELD(rsdp_table->checksum, addr);
+    ACPI_READ_ARRAY(rsdp_table->oem_id, addr);
+    ACPI_READ_FIELD(rsdp_table->revision, addr);
+    ACPI_READ_FIELD(rsdp_table->rsdt_physical_address, addr);
+    ACPI_READ_FIELD(rsdp_table->length, addr);
+}
diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h
new file mode 100644
index 0000000000..9f9a2d532c
--- /dev/null
+++ b/tests/acpi-utils.h
@@ -0,0 +1,94 @@
+/*
+ * Utilities for working with ACPI tables
+ *
+ * Copyright (c) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TEST_ACPI_UTILS_H
+#define TEST_ACPI_UTILS_H
+
+#include "hw/acpi/acpi-defs.h"
+#include "libqtest.h"
+
+/* DSDT and SSDTs format */
+typedef struct {
+    AcpiTableHeader header;
+    gchar *aml;            /* aml bytecode from guest */
+    gsize aml_len;
+    gchar *aml_file;
+    gchar *asl;            /* asl code generated from aml */
+    gsize asl_len;
+    gchar *asl_file;
+    bool tmp_files_retain;   /* do not delete the temp asl/aml */
+} QEMU_PACKED AcpiSdtTable;
+
+#define ACPI_READ_FIELD(field, addr)           \
+    do {                                       \
+        switch (sizeof(field)) {               \
+        case 1:                                \
+            field = readb(addr);               \
+            break;                             \
+        case 2:                                \
+            field = readw(addr);               \
+            break;                             \
+        case 4:                                \
+            field = readl(addr);               \
+            break;                             \
+        case 8:                                \
+            field = readq(addr);               \
+            break;                             \
+        default:                               \
+            g_assert(false);                   \
+        }                                      \
+        addr += sizeof(field);                  \
+    } while (0);
+
+#define ACPI_READ_ARRAY_PTR(arr, length, addr)  \
+    do {                                        \
+        int idx;                                \
+        for (idx = 0; idx < length; ++idx) {    \
+            ACPI_READ_FIELD(arr[idx], addr);    \
+        }                                       \
+    } while (0);
+
+#define ACPI_READ_ARRAY(arr, addr)                               \
+    ACPI_READ_ARRAY_PTR(arr, sizeof(arr) / sizeof(arr[0]), addr)
+
+#define ACPI_READ_TABLE_HEADER(table, addr)                      \
+    do {                                                         \
+        ACPI_READ_FIELD((table)->signature, addr);               \
+        ACPI_READ_FIELD((table)->length, addr);                  \
+        ACPI_READ_FIELD((table)->revision, addr);                \
+        ACPI_READ_FIELD((table)->checksum, addr);                \
+        ACPI_READ_ARRAY((table)->oem_id, addr);                  \
+        ACPI_READ_ARRAY((table)->oem_table_id, addr);            \
+        ACPI_READ_FIELD((table)->oem_revision, addr);            \
+        ACPI_READ_ARRAY((table)->asl_compiler_id, addr);         \
+        ACPI_READ_FIELD((table)->asl_compiler_revision, addr);   \
+    } while (0);
+
+#define ACPI_ASSERT_CMP(actual, expected) do { \
+    uint32_t ACPI_ASSERT_CMP_le = cpu_to_le32(actual); \
+    char ACPI_ASSERT_CMP_str[5] = {}; \
+    memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 4); \
+    g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
+} while (0)
+
+#define ACPI_ASSERT_CMP64(actual, expected) do { \
+    uint64_t ACPI_ASSERT_CMP_le = cpu_to_le64(actual); \
+    char ACPI_ASSERT_CMP_str[9] = {}; \
+    memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 8); \
+    g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
+} while (0)
+
+uint8_t acpi_calc_checksum(const uint8_t *data, int len);
+uint32_t acpi_find_rsdp_address(void);
+void acpi_parse_rsdp_table(uint32_t addr, AcpiRsdpDescriptor *rsdp_table);
+
+#endif  /* TEST_ACPI_UTILS_H */
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 54048050c0..88dbf97853 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -13,10 +13,9 @@
 #include "qemu/osdep.h"
 #include <glib/gstdio.h>
 #include "qemu-common.h"
-#include "libqtest.h"
-#include "hw/acpi/acpi-defs.h"
 #include "hw/smbios/smbios.h"
 #include "qemu/bitmap.h"
+#include "acpi-utils.h"
 #include "boot-sector.h"
 
 #define MACHINE_PC "pc"
@@ -24,18 +23,6 @@
 
 #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML"
 
-/* DSDT and SSDTs format */
-typedef struct {
-    AcpiTableHeader header;
-    gchar *aml;            /* aml bytecode from guest */
-    gsize aml_len;
-    gchar *aml_file;
-    gchar *asl;            /* asl code generated from aml */
-    gsize asl_len;
-    gchar *asl_file;
-    bool tmp_files_retain;   /* do not delete the temp asl/aml */
-} QEMU_PACKED AcpiSdtTable;
-
 typedef struct {
     const char *machine;
     const char *variant;
@@ -53,65 +40,6 @@ typedef struct {
     int required_struct_types_len;
 } test_data;
 
-#define ACPI_READ_FIELD(field, addr)           \
-    do {                                       \
-        switch (sizeof(field)) {               \
-        case 1:                                \
-            field = readb(addr);               \
-            break;                             \
-        case 2:                                \
-            field = readw(addr);               \
-            break;                             \
-        case 4:                                \
-            field = readl(addr);               \
-            break;                             \
-        case 8:                                \
-            field = readq(addr);               \
-            break;                             \
-        default:                               \
-            g_assert(false);                   \
-        }                                      \
-        addr += sizeof(field);                  \
-    } while (0);
-
-#define ACPI_READ_ARRAY_PTR(arr, length, addr)  \
-    do {                                        \
-        int idx;                                \
-        for (idx = 0; idx < length; ++idx) {    \
-            ACPI_READ_FIELD(arr[idx], addr);    \
-        }                                       \
-    } while (0);
-
-#define ACPI_READ_ARRAY(arr, addr)                               \
-    ACPI_READ_ARRAY_PTR(arr, sizeof(arr)/sizeof(arr[0]), addr)
-
-#define ACPI_READ_TABLE_HEADER(table, addr)                      \
-    do {                                                         \
-        ACPI_READ_FIELD((table)->signature, addr);               \
-        ACPI_READ_FIELD((table)->length, addr);                  \
-        ACPI_READ_FIELD((table)->revision, addr);                \
-        ACPI_READ_FIELD((table)->checksum, addr);                \
-        ACPI_READ_ARRAY((table)->oem_id, addr);                  \
-        ACPI_READ_ARRAY((table)->oem_table_id, addr);            \
-        ACPI_READ_FIELD((table)->oem_revision, addr);            \
-        ACPI_READ_ARRAY((table)->asl_compiler_id, addr);         \
-        ACPI_READ_FIELD((table)->asl_compiler_revision, addr);   \
-    } while (0);
-
-#define ACPI_ASSERT_CMP(actual, expected) do { \
-    uint32_t ACPI_ASSERT_CMP_le = cpu_to_le32(actual); \
-    char ACPI_ASSERT_CMP_str[5] = {}; \
-    memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 4); \
-    g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
-} while (0)
-
-#define ACPI_ASSERT_CMP64(actual, expected) do { \
-    uint64_t ACPI_ASSERT_CMP_le = cpu_to_le64(actual); \
-    char ACPI_ASSERT_CMP_str[9] = {}; \
-    memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 8); \
-    g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
-} while (0)
-
 static char disk[] = "tests/acpi-test-disk-XXXXXX";
 static const char *data_dir = "tests/acpi-test-data";
 #ifdef CONFIG_IASL
@@ -144,39 +72,12 @@ static void free_test_data(test_data *data)
         g_free(temp->asl_file);
     }
 
-    g_array_free(data->tables, false);
-}
-
-static uint8_t acpi_checksum(const uint8_t *data, int len)
-{
-    int i;
-    uint8_t sum = 0;
-
-    for (i = 0; i < len; i++) {
-        sum += data[i];
-    }
-
-    return sum;
+    g_array_free(data->tables, true);
 }
 
 static void test_acpi_rsdp_address(test_data *data)
 {
-    uint32_t off;
-
-    /* OK, now find RSDP */
-    for (off = 0xf0000; off < 0x100000; off += 0x10) {
-        uint8_t sig[] = "RSD PTR ";
-        int i;
-
-        for (i = 0; i < sizeof sig - 1; ++i) {
-            sig[i] = readb(off + i);
-        }
-
-        if (!memcmp(sig, "RSD PTR ", sizeof sig)) {
-            break;
-        }
-    }
-
+    uint32_t off = acpi_find_rsdp_address();
     g_assert_cmphex(off, <, 0x100000);
     data->rsdp_addr = off;
 }
@@ -186,17 +87,10 @@ static void test_acpi_rsdp_table(test_data *data)
     AcpiRsdpDescriptor *rsdp_table = &data->rsdp_table;
     uint32_t addr = data->rsdp_addr;
 
-    ACPI_READ_FIELD(rsdp_table->signature, addr);
-    ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR ");
-
-    ACPI_READ_FIELD(rsdp_table->checksum, addr);
-    ACPI_READ_ARRAY(rsdp_table->oem_id, addr);
-    ACPI_READ_FIELD(rsdp_table->revision, addr);
-    ACPI_READ_FIELD(rsdp_table->rsdt_physical_address, addr);
-    ACPI_READ_FIELD(rsdp_table->length, addr);
+    acpi_parse_rsdp_table(addr, rsdp_table);
 
     /* rsdp checksum is not for the whole table, but for the first 20 bytes */
-    g_assert(!acpi_checksum((uint8_t *)rsdp_table, 20));
+    g_assert(!acpi_calc_checksum((uint8_t *)rsdp_table, 20));
 }
 
 static void test_acpi_rsdt_table(test_data *data)
@@ -220,8 +114,9 @@ static void test_acpi_rsdt_table(test_data *data)
     tables = g_new0(uint32_t, tables_nr);
     ACPI_READ_ARRAY_PTR(tables, tables_nr, addr);
 
-    checksum = acpi_checksum((uint8_t *)rsdt_table, rsdt_table->length) +
-               acpi_checksum((uint8_t *)tables, tables_nr * sizeof(uint32_t));
+    checksum = acpi_calc_checksum((uint8_t *)rsdt_table, rsdt_table->length) +
+               acpi_calc_checksum((uint8_t *)tables,
+                                  tables_nr * sizeof(uint32_t));
     g_assert(!checksum);
 
    /* SSDT tables after FADT */
@@ -279,7 +174,7 @@ static void test_acpi_fadt_table(test_data *data)
     ACPI_READ_FIELD(fadt_table->flags, addr);
 
     ACPI_ASSERT_CMP(fadt_table->signature, "FACP");
-    g_assert(!acpi_checksum((uint8_t *)fadt_table, fadt_table->length));
+    g_assert(!acpi_calc_checksum((uint8_t *)fadt_table, fadt_table->length));
 }
 
 static void test_acpi_facs_table(test_data *data)
@@ -308,8 +203,10 @@ static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr)
     sdt_table->aml = g_malloc0(sdt_table->aml_len);
     ACPI_READ_ARRAY_PTR(sdt_table->aml, sdt_table->aml_len, addr);
 
-    checksum = acpi_checksum((uint8_t *)sdt_table, sizeof(AcpiTableHeader)) +
-               acpi_checksum((uint8_t *)sdt_table->aml, sdt_table->aml_len);
+    checksum = acpi_calc_checksum((uint8_t *)sdt_table,
+                                  sizeof(AcpiTableHeader)) +
+               acpi_calc_checksum((uint8_t *)sdt_table->aml,
+                                  sdt_table->aml_len);
     g_assert(!checksum);
 }
 
@@ -608,8 +505,9 @@ static bool smbios_ep_table_ok(test_data *data)
         return false;
     }
     ACPI_READ_FIELD(ep_table->smbios_bcd_revision, addr);
-    if (acpi_checksum((uint8_t *)ep_table, sizeof *ep_table) ||
-        acpi_checksum((uint8_t *)ep_table + 0x10, sizeof *ep_table - 0x10)) {
+    if (acpi_calc_checksum((uint8_t *)ep_table, sizeof *ep_table) ||
+        acpi_calc_checksum((uint8_t *)ep_table + 0x10,
+                           sizeof *ep_table - 0x10)) {
         return false;
     }
     return true;
diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker
new file mode 100644
index 0000000000..bbb21ed088
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-s390x-cross.docker
@@ -0,0 +1,22 @@
+#
+# Docker s390 cross-compiler target
+#
+# This docker target is based on stretch (testing) as the stable build
+# doesn't have the cross compiler available.
+#
+FROM debian:testing-slim
+
+# Duplicate deb line as deb-src
+RUN cat /etc/apt/sources.list | sed "s/deb/deb-src/" >> /etc/apt/sources.list
+
+# Add the s390x architecture
+RUN dpkg --add-architecture s390x
+
+# Grab the updated list of packages
+RUN apt update
+RUN apt dist-upgrade -yy
+RUN apt-get build-dep -yy -a s390x qemu || apt-get -f install
+RUN apt install -yy gcc-multilib-s390x-linux-gnu binutils-multiarch
+
+# Specify the cross prefix for this image (see tests/docker/common.rc)
+ENV QEMU_CONFIGURE_OPTS --cross-prefix=s390x-linux-gnu-
diff --git a/tests/e1000-test.c b/tests/e1000-test.c
index 59cab68a60..0c5fcdcc44 100644
--- a/tests/e1000-test.c
+++ b/tests/e1000-test.c
@@ -44,6 +44,7 @@ int main(int argc, char **argv)
 
         path = g_strdup_printf("e1000/%s", models[i]);
         qtest_add_data_func(path, models[i], test_device);
+        g_free(path);
     }
 
     return g_test_run();
diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c
index 8c42ca919f..c612dc64ec 100644
--- a/tests/e1000e-test.c
+++ b/tests/e1000e-test.c
@@ -99,7 +99,10 @@ static QPCIBus *test_bus;
 
 static void e1000e_pci_foreach_callback(QPCIDevice *dev, int devfn, void *data)
 {
-    *(QPCIDevice **) data = dev;
+    QPCIDevice **res = data;
+
+    g_assert_null(*res);
+    *res = dev;
 }
 
 static QPCIDevice *e1000e_device_find(QPCIBus *bus)
@@ -403,6 +406,7 @@ static void data_test_clear(e1000e_device *d)
     e1000e_device_clear(test_bus, d);
     close(test_sockets[0]);
     pc_alloc_uninit(test_alloc);
+    g_free(d->pci_dev);
     qpci_free_pc(test_bus);
     qtest_end();
 }
diff --git a/tests/eepro100-test.c b/tests/eepro100-test.c
index ed23258b0f..bdc8a67d57 100644
--- a/tests/eepro100-test.c
+++ b/tests/eepro100-test.c
@@ -54,6 +54,7 @@ int main(int argc, char **argv)
 
         path = g_strdup_printf("eepro100/%s", models[i]);
         qtest_add_data_func(path, models[i], test_device);
+        g_free(path);
     }
 
     return g_test_run();
diff --git a/tests/endianness-test.c b/tests/endianness-test.c
index cf8d41b7b4..ed0bf52019 100644
--- a/tests/endianness-test.c
+++ b/tests/endianness-test.c
@@ -295,14 +295,17 @@ int main(int argc, char **argv)
         path = g_strdup_printf("endianness/%s",
                                test_cases[i].machine);
         qtest_add_data_func(path, &test_cases[i], test_endianness);
+        g_free(path);
 
         path = g_strdup_printf("endianness/split/%s",
                                test_cases[i].machine);
         qtest_add_data_func(path, &test_cases[i], test_endianness_split);
+        g_free(path);
 
         path = g_strdup_printf("endianness/combine/%s",
                                test_cases[i].machine);
         qtest_add_data_func(path, &test_cases[i], test_endianness_combine);
+        g_free(path);
     }
 
     return g_test_run();
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 6176e81ab2..24870b38f4 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -19,6 +19,8 @@
 #include "qemu-common.h"
 #include "libqtest.h"
 
+#define ARGV_SIZE 256
+
 static char *create_test_img(int secs)
 {
     char *template = strdup("/tmp/qtest.XXXXXX");
@@ -66,7 +68,7 @@ static const CHST hd_chst[backend_last][mbr_last] = {
     },
 };
 
-static const char *img_file_name[backend_last];
+static char *img_file_name[backend_last];
 
 static const CHST *cur_ide[4];
 
@@ -234,28 +236,36 @@ static int setup_ide(int argc, char *argv[], int argv_sz,
  */
 static void test_ide_none(void)
 {
-    char *argv[256];
-
-    setup_common(argv, ARRAY_SIZE(argv));
-    qtest_start(g_strjoinv(" ", argv));
+    char **argv = g_new0(char *, ARGV_SIZE);
+    char *args;
+
+    setup_common(argv, ARGV_SIZE);
+    args = g_strjoinv(" ", argv);
+    qtest_start(args);
+    g_strfreev(argv);
+    g_free(args);
     test_cmos();
     qtest_end();
 }
 
 static void test_ide_mbr(bool use_device, MBRcontents mbr)
 {
-    char *argv[256];
+    char **argv = g_new0(char *, ARGV_SIZE);
+    char *args;
     int argc;
     Backend i;
     const char *dev;
 
-    argc = setup_common(argv, ARRAY_SIZE(argv));
+    argc = setup_common(argv, ARGV_SIZE);
     for (i = 0; i < backend_last; i++) {
         cur_ide[i] = &hd_chst[i][mbr];
         dev = use_device ? (is_hd(cur_ide[i]) ? "ide-hd" : "ide-cd") : NULL;
-        argc = setup_ide(argc, argv, ARRAY_SIZE(argv), i, dev, i, mbr, "");
+        argc = setup_ide(argc, argv, ARGV_SIZE, i, dev, i, mbr, "");
     }
-    qtest_start(g_strjoinv(" ", argv));
+    args = g_strjoinv(" ", argv);
+    qtest_start(args);
+    g_strfreev(argv);
+    g_free(args);
     test_cmos();
     qtest_end();
 }
@@ -310,12 +320,13 @@ static void test_ide_device_mbr_chs(void)
 
 static void test_ide_drive_user(const char *dev, bool trans)
 {
-    char *argv[256], *opts;
+    char **argv = g_new0(char *, ARGV_SIZE);
+    char *args, *opts;
     int argc;
     int secs = img_secs[backend_small];
     const CHST expected_chst = { secs / (4 * 32) , 4, 32, trans };
 
-    argc = setup_common(argv, ARRAY_SIZE(argv));
+    argc = setup_common(argv, ARGV_SIZE);
     opts = g_strdup_printf("%s,%s%scyls=%d,heads=%d,secs=%d",
                            dev ?: "",
                            trans && dev ? "bios-chs-" : "",
@@ -323,11 +334,14 @@ static void test_ide_drive_user(const char *dev, bool trans)
                            expected_chst.cyls, expected_chst.heads,
                            expected_chst.secs);
     cur_ide[0] = &expected_chst;
-    argc = setup_ide(argc, argv, ARRAY_SIZE(argv),
+    argc = setup_ide(argc, argv, ARGV_SIZE,
                      0, dev ? opts : NULL, backend_small, mbr_chs,
                      dev ? "" : opts);
     g_free(opts);
-    qtest_start(g_strjoinv(" ", argv));
+    args = g_strjoinv(" ", argv);
+    qtest_start(args);
+    g_strfreev(argv);
+    g_free(args);
     test_cmos();
     qtest_end();
 }
@@ -369,18 +383,22 @@ static void test_ide_device_user_chst(void)
  */
 static void test_ide_drive_cd_0(void)
 {
-    char *argv[256];
+    char **argv = g_new0(char *, ARGV_SIZE);
+    char *args;
     int argc, ide_idx;
     Backend i;
 
-    argc = setup_common(argv, ARRAY_SIZE(argv));
+    argc = setup_common(argv, ARGV_SIZE);
     for (i = 0; i <= backend_empty; i++) {
         ide_idx = backend_empty - i;
         cur_ide[ide_idx] = &hd_chst[i][mbr_blank];
-        argc = setup_ide(argc, argv, ARRAY_SIZE(argv),
+        argc = setup_ide(argc, argv, ARGV_SIZE,
                          ide_idx, NULL, i, mbr_blank, "");
     }
-    qtest_start(g_strjoinv(" ", argv));
+    args = g_strjoinv(" ", argv);
+    qtest_start(args);
+    g_strfreev(argv);
+    g_free(args);
     test_cmos();
     qtest_end();
 }
@@ -418,6 +436,7 @@ int main(int argc, char **argv)
     for (i = 0; i < backend_last; i++) {
         if (img_file_name[i]) {
             unlink(img_file_name[i]);
+            free(img_file_name[i]);
         }
     }
 
diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index da2d5a53f0..e9d05c87d1 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -134,6 +134,8 @@ static void test_i440fx_defaults(gconstpointer opaque)
     /* 3.2.26 */
     g_assert_cmpint(qpci_config_readb(dev, 0x93), ==, 0x00); /* TRC */
 
+    g_free(dev);
+    qpci_free_pc(bus);
     qtest_end();
 }
 
@@ -270,6 +272,9 @@ static void test_i440fx_pam(gconstpointer opaque)
         /* Verify the area is not our new mask */
         g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82));
     }
+
+    g_free(dev);
+    qpci_free_pc(bus);
     qtest_end();
 }
 
diff --git a/tests/ide-test.c b/tests/ide-test.c
index fb541f88b5..139ebc0ec6 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -339,6 +339,7 @@ static void test_bmdma_simple_rw(void)
     g_assert(memcmp(buf, cmpbuf, len) == 0);
 
 
+    free_pci_device(dev);
     g_free(buf);
     g_free(cmpbuf);
 }
@@ -369,6 +370,7 @@ static void test_bmdma_short_prdt(void)
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, 0);
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+    free_pci_device(dev);
 }
 
 static void test_bmdma_one_sector_short_prdt(void)
@@ -398,6 +400,7 @@ static void test_bmdma_one_sector_short_prdt(void)
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, 0);
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+    free_pci_device(dev);
 }
 
 static void test_bmdma_long_prdt(void)
@@ -426,6 +429,7 @@ static void test_bmdma_long_prdt(void)
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_INTR);
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+    free_pci_device(dev);
 }
 
 static void test_bmdma_no_busmaster(void)
@@ -449,6 +453,7 @@ static void test_bmdma_no_busmaster(void)
      * in practice. At least we want to be aware of any changes. */
     g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR);
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+    free_pci_device(dev);
 }
 
 static void test_bmdma_setup(void)
@@ -525,6 +530,7 @@ static void test_identify(void)
     assert_bit_set(buf[85], 0x20);
 
     ide_test_quit();
+    free_pci_device(dev);
 }
 
 /*
@@ -544,6 +550,7 @@ static void make_dirty(uint8_t device)
 
     guest_buf = guest_alloc(guest_malloc, len);
     buf = g_malloc(len);
+    memset(buf, rand() % 255 + 1, len);
     g_assert(guest_buf);
     g_assert(buf);
 
@@ -562,6 +569,7 @@ static void make_dirty(uint8_t device)
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     g_free(buf);
+    free_pci_device(dev);
 }
 
 static void test_flush(void)
@@ -608,6 +616,7 @@ static void test_flush(void)
     assert_bit_clear(data, BSY | DF | ERR | DRQ);
 
     ide_test_quit();
+    free_pci_device(dev);
 }
 
 static void test_retry_flush(const char *machine)
@@ -658,6 +667,7 @@ static void test_retry_flush(const char *machine)
     assert_bit_clear(data, BSY | DF | ERR | DRQ);
 
     ide_test_quit();
+    free_pci_device(dev);
 }
 
 static void test_flush_nodev(void)
@@ -675,6 +685,7 @@ static void test_flush_nodev(void)
 
     /* Just testing that qemu doesn't crash... */
 
+    free_pci_device(dev);
     ide_test_quit();
 }
 
@@ -741,6 +752,7 @@ static uint8_t ide_wait_clear(uint8_t flag)
     while (true) {
         data = qpci_io_readb(dev, ide_bar, reg_status);
         if (!(data & flag)) {
+            free_pci_device(dev);
             return data;
         }
         if (difftime(time(NULL), st) > 5.0) {
@@ -850,6 +862,7 @@ static void cdrom_pio_impl(int nblocks)
     g_free(pattern);
     g_free(rx);
     test_bmdma_teardown();
+    free_pci_device(dev);
 }
 
 static void test_cdrom_pio(void)
diff --git a/tests/ipmi-bt-test.c b/tests/ipmi-bt-test.c
index e84dd6889b..7e21a9bbcb 100644
--- a/tests/ipmi-bt-test.c
+++ b/tests/ipmi-bt-test.c
@@ -420,6 +420,7 @@ int main(int argc, char **argv)
           " -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0"
           " -device isa-ipmi-bt,bmc=bmc0", emu_port);
     qtest_start(cmdline);
+    g_free(cmdline);
     qtest_irq_intercept_in(global_qtest, "ioapic");
     qtest_add_func("/ipmi/extern/connect", test_connect);
     qtest_add_func("/ipmi/extern/bt_base", test_bt_base);
diff --git a/tests/ipmi-kcs-test.c b/tests/ipmi-kcs-test.c
index 9cf0b34a33..178ffc1797 100644
--- a/tests/ipmi-kcs-test.c
+++ b/tests/ipmi-kcs-test.c
@@ -279,6 +279,7 @@ int main(int argc, char **argv)
     cmdline = g_strdup_printf("-device ipmi-bmc-sim,id=bmc0"
                               " -device isa-ipmi-kcs,bmc=bmc0");
     qtest_start(cmdline);
+    g_free(cmdline);
     qtest_irq_intercept_in(global_qtest, "ioapic");
     qtest_add_func("/ipmi/local/kcs_base", test_kcs_base);
     qtest_add_func("/ipmi/local/kcs_abort", test_kcs_abort);
diff --git a/tests/libqos/usb.c b/tests/libqos/usb.c
index 72d7a961fe..0cdfaecda7 100644
--- a/tests/libqos/usb.c
+++ b/tests/libqos/usb.c
@@ -24,6 +24,11 @@ void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc, uint32_t devfn, int bar)
     hc->bar = qpci_iomap(hc->dev, bar, NULL);
 }
 
+void uhci_deinit(struct qhc *hc)
+{
+    g_free(hc->dev);
+}
+
 void uhci_port_test(struct qhc *hc, int port, uint16_t expect)
 {
     uint16_t value = qpci_io_readw(hc->dev, hc->bar, 0x10 + 2 * port);
@@ -64,4 +69,5 @@ void usb_test_hotplug(const char *hcd_id, const int port,
     g_assert(response);
     g_assert(qdict_haskey(response, "event"));
     g_assert(!strcmp(qdict_get_str(response, "event"), "DEVICE_DELETED"));
+    QDECREF(response);
 }
diff --git a/tests/libqos/usb.h b/tests/libqos/usb.h
index 423dcfd82f..297cfc564d 100644
--- a/tests/libqos/usb.h
+++ b/tests/libqos/usb.h
@@ -11,6 +11,7 @@ struct qhc {
 void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc,
                        uint32_t devfn, int bar);
 void uhci_port_test(struct qhc *hc, int port, uint16_t expect);
+void uhci_deinit(struct qhc *hc);
 
 void usb_test_hotplug(const char *bus_name, const int port,
                       void (*port_check)(void));
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index d4bf841f23..7ac15c04e1 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -24,9 +24,17 @@
 typedef struct QVirtioPCIForeachData {
     void (*func)(QVirtioDevice *d, void *data);
     uint16_t device_type;
+    bool has_slot;
+    int slot;
     void *user_data;
 } QVirtioPCIForeachData;
 
+void qvirtio_pci_device_free(QVirtioPCIDevice *dev)
+{
+    g_free(dev->pdev);
+    g_free(dev);
+}
+
 static QVirtioPCIDevice *qpcidevice_to_qvirtiodevice(QPCIDevice *pdev)
 {
     QVirtioPCIDevice *vpcidev;
@@ -49,16 +57,18 @@ static void qvirtio_pci_foreach_callback(
     QVirtioPCIForeachData *d = data;
     QVirtioPCIDevice *vpcidev = qpcidevice_to_qvirtiodevice(dev);
 
-    if (vpcidev->vdev.device_type == d->device_type) {
+    if (vpcidev->vdev.device_type == d->device_type &&
+        (!d->has_slot || vpcidev->pdev->devfn == d->slot << 3)) {
         d->func(&vpcidev->vdev, d->user_data);
     } else {
-        g_free(vpcidev);
+        qvirtio_pci_device_free(vpcidev);
     }
 }
 
 static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data)
 {
     QVirtioPCIDevice **vpcidev = data;
+    assert(!*vpcidev);
     *vpcidev = (QVirtioPCIDevice *)d;
 }
 
@@ -284,21 +294,39 @@ const QVirtioBus qvirtio_pci = {
     .virtqueue_kick = qvirtio_pci_virtqueue_kick,
 };
 
-void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+static void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+                bool has_slot, int slot,
                 void (*func)(QVirtioDevice *d, void *data), void *data)
 {
     QVirtioPCIForeachData d = { .func = func,
                                 .device_type = device_type,
+                                .has_slot = has_slot,
+                                .slot = slot,
                                 .user_data = data };
 
     qpci_device_foreach(bus, PCI_VENDOR_ID_REDHAT_QUMRANET, -1,
-                                qvirtio_pci_foreach_callback, &d);
+                        qvirtio_pci_foreach_callback, &d);
 }
 
 QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type)
 {
     QVirtioPCIDevice *dev = NULL;
-    qvirtio_pci_foreach(bus, device_type, qvirtio_pci_assign_device, &dev);
+
+    qvirtio_pci_foreach(bus, device_type, false, 0,
+                        qvirtio_pci_assign_device, &dev);
+
+    dev->vdev.bus = &qvirtio_pci;
+
+    return dev;
+}
+
+QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus,
+                                               uint16_t device_type, int slot)
+{
+    QVirtioPCIDevice *dev = NULL;
+
+    qvirtio_pci_foreach(bus, device_type, true, slot,
+                        qvirtio_pci_assign_device, &dev);
 
     dev->vdev.bus = &qvirtio_pci;
 
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
index 38c54c63ea..6ef19094cb 100644
--- a/tests/libqos/virtio-pci.h
+++ b/tests/libqos/virtio-pci.h
@@ -31,9 +31,11 @@ typedef struct QVirtQueuePCI {
 
 extern const QVirtioBus qvirtio_pci;
 
-void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
-                void (*func)(QVirtioDevice *d, void *data), void *data);
 QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type);
+QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus,
+                                               uint16_t device_type, int slot);
+void qvirtio_pci_device_free(QVirtioPCIDevice *dev);
+
 void qvirtio_pci_device_enable(QVirtioPCIDevice *d);
 void qvirtio_pci_device_disable(QVirtioPCIDevice *d);
 
diff --git a/tests/libqtest.c b/tests/libqtest.c
index e54354de8a..3a0e0d63a7 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -805,17 +805,7 @@ void qtest_add_data_func_full(const char *str, void *data,
                               GDestroyNotify data_free_func)
 {
     gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str);
-#if GLIB_CHECK_VERSION(2, 34, 0)
     g_test_add_data_func_full(path, data, fn, data_free_func);
-#elif GLIB_CHECK_VERSION(2, 26, 0)
-    /* back-compat casts, remove this once we can require new-enough glib */
-    g_test_add_vtable(path, 0, data, NULL,
-                      (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
-#else
-    /* back-compat casts, remove this once we can require new-enough glib */
-    g_test_add_vtable(path, 0, data, NULL,
-                      (void (*)(void)) fn, (void (*)(void)) data_free_func);
-#endif
     g_free(path);
 }
 
diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
index dafe8beba4..de35a18903 100644
--- a/tests/postcopy-test.c
+++ b/tests/postcopy-test.c
@@ -482,7 +482,7 @@ static void test_migrate(void)
         usleep(10 * 1000);
     } while (dest_byte_a == dest_byte_b);
 
-    qmp("{ 'execute' : 'stop'}");
+    qmp_discard_response("{ 'execute' : 'stop'}");
     /* With it stopped, check nothing changes */
     qtest_memread(to, start_address, &dest_byte_c, 1);
     sleep(1);
diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c
index bd33bc353d..eac207b30e 100644
--- a/tests/prom-env-test.c
+++ b/tests/prom-env-test.c
@@ -76,7 +76,7 @@ static void add_tests(const char *machines[])
 int main(int argc, char *argv[])
 {
     const char *sparc_machines[] = { "SPARCbook", "Voyager", "SS-20", NULL };
-    const char *sparc64_machines[] = { "sun4u", "sun4v", NULL };
+    const char *sparc64_machines[] = { "sun4u", NULL };
     const char *ppc_machines[] = { "mac99", "g3beige", NULL };
     const char *ppc64_machines[] = { "mac99", "g3beige", "pseries", NULL };
     const char *arch = qtest_get_arch();
diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c
index 21d4ebb0fe..8a1b0a336c 100644
--- a/tests/ptimer-test-stubs.c
+++ b/tests/ptimer-test-stubs.c
@@ -108,6 +108,11 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
     return bh;
 }
 
+void qemu_bh_delete(QEMUBH *bh)
+{
+    g_free(bh);
+}
+
 void replay_bh_schedule_event(QEMUBH *bh)
 {
     bh->cb(bh->opaque);
diff --git a/tests/ptimer-test.c b/tests/ptimer-test.c
index b36a476483..5d1a2a8188 100644
--- a/tests/ptimer-test.c
+++ b/tests/ptimer-test.c
@@ -73,6 +73,7 @@ static void check_set_count(gconstpointer arg)
     ptimer_set_count(ptimer, 1000);
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 1000);
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_set_limit(gconstpointer arg)
@@ -92,6 +93,7 @@ static void check_set_limit(gconstpointer arg)
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 2000);
     g_assert_cmpuint(ptimer_get_limit(ptimer), ==, 2000);
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_oneshot(gconstpointer arg)
@@ -194,6 +196,7 @@ static void check_oneshot(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_periodic(gconstpointer arg)
@@ -360,6 +363,7 @@ static void check_periodic(gconstpointer arg)
     g_assert_cmpuint(ptimer_get_count(ptimer), ==,
                     (no_round_down ? 8 : 7) + (wrap_policy ? 1 : 0));
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_on_the_fly_mode_change(gconstpointer arg)
@@ -406,6 +410,7 @@ static void check_on_the_fly_mode_change(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
     g_assert_true(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_on_the_fly_period_change(gconstpointer arg)
@@ -438,6 +443,7 @@ static void check_on_the_fly_period_change(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
     g_assert_true(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_on_the_fly_freq_change(gconstpointer arg)
@@ -470,6 +476,7 @@ static void check_on_the_fly_freq_change(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
     g_assert_true(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_run_with_period_0(gconstpointer arg)
@@ -487,6 +494,7 @@ static void check_run_with_period_0(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 99);
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_run_with_delta_0(gconstpointer arg)
@@ -591,6 +599,7 @@ static void check_run_with_delta_0(gconstpointer arg)
     g_assert_true(triggered);
 
     ptimer_stop(ptimer);
+    ptimer_free(ptimer);
 }
 
 static void check_periodic_with_load_0(gconstpointer arg)
@@ -649,6 +658,7 @@ static void check_periodic_with_load_0(gconstpointer arg)
     }
 
     ptimer_stop(ptimer);
+    ptimer_free(ptimer);
 }
 
 static void check_oneshot_with_load_0(gconstpointer arg)
@@ -682,14 +692,14 @@ static void check_oneshot_with_load_0(gconstpointer arg)
     } else {
         g_assert_false(triggered);
     }
+
+    ptimer_free(ptimer);
 }
 
 static void add_ptimer_tests(uint8_t policy)
 {
-    uint8_t *ppolicy = g_malloc(1);
-    char *policy_name = g_malloc0(256);
-
-    *ppolicy = policy;
+    char policy_name[256] = "";
+    char *tmp;
 
     if (policy == PTIMER_POLICY_DEFAULT) {
         g_sprintf(policy_name, "default");
@@ -715,49 +725,67 @@ static void add_ptimer_tests(uint8_t policy)
         g_strlcat(policy_name, "no_counter_rounddown,", 256);
     }
 
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
-        ppolicy, check_set_count);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
-        ppolicy, check_set_limit);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
-        ppolicy, check_oneshot);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
-        ppolicy, check_periodic);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s", policy_name),
-        ppolicy, check_on_the_fly_mode_change);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s", policy_name),
-        ppolicy, check_on_the_fly_period_change);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s", policy_name),
-        ppolicy, check_on_the_fly_freq_change);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/run_with_period_0 policy=%s", policy_name),
-        ppolicy, check_run_with_period_0);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/run_with_delta_0 policy=%s", policy_name),
-        ppolicy, check_run_with_delta_0);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s", policy_name),
-        ppolicy, check_periodic_with_load_0);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s", policy_name),
-        ppolicy, check_oneshot_with_load_0);
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
+        g_memdup(&policy, 1), check_set_count, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
+        g_memdup(&policy, 1), check_set_limit, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
+        g_memdup(&policy, 1), check_oneshot, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
+        g_memdup(&policy, 1), check_periodic, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_on_the_fly_mode_change, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_on_the_fly_period_change, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_on_the_fly_freq_change, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/run_with_period_0 policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_run_with_period_0, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/run_with_delta_0 policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_run_with_delta_0, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_periodic_with_load_0, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_oneshot_with_load_0, g_free);
+    g_free(tmp);
 }
 
 static void add_all_ptimer_policies_comb_tests(void)
diff --git a/tests/pvpanic-test.c b/tests/pvpanic-test.c
index 3bfa678667..71ebb5c02c 100644
--- a/tests/pvpanic-test.c
+++ b/tests/pvpanic-test.c
@@ -27,6 +27,7 @@ static void test_panic(void)
     data = qdict_get_qdict(response, "data");
     g_assert(qdict_haskey(data, "action"));
     g_assert_cmpstr(qdict_get_str(data, "action"), ==, "pause");
+    QDECREF(response);
 }
 
 int main(int argc, char **argv)
diff --git a/tests/q35-test.c b/tests/q35-test.c
index 763fe3d6ae..cc58f3ecf4 100644
--- a/tests/q35-test.c
+++ b/tests/q35-test.c
@@ -71,6 +71,9 @@ static void test_smram_lock(void)
     g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false);
     smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, true);
     g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == true);
+
+    g_free(pcidev);
+    qpci_free_pc(pcibus);
 }
 
 int main(int argc, char **argv)
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 4673b67f37..34e66db691 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -95,14 +95,14 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024
 qemu-img: Image size must be less than 8 EiB!
 
 qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: Value '-1024' is out of range for parameter 'size'
 qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k
 qemu-img: Image size must be less than 8 EiB!
 
 qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: Value '-1k' is out of range for parameter 'size'
 qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte
@@ -110,15 +110,19 @@ qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes
 qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
 
 qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2
-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16
+qemu-img: Parameter 'size' expects a non-negative number below 2^64
+Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
+and exabytes, respectively.
+qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar
 qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for
 qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
 
 qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a size
-You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes.
+qemu-img: Parameter 'size' expects a non-negative number below 2^64
+Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
+and exabytes, respectively.
 qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 == Check correct interpretation of suffixes for cluster size ==
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index e206ad6c29..c6f4eef215 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -179,7 +179,7 @@ qququiquit
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: Can't use a read-only drive
+(qemu) QEMU_PROG: Block node is read-only
 QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed.
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=scsi,readonly=on
@@ -201,12 +201,12 @@ QEMU X.Y.Z monitor - type 'help' for more information
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-drive,drive=disk
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive
+(qemu) QEMU_PROG: -device ide-drive,drive=disk: Block node is read-only
 QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed.
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive
+(qemu) QEMU_PROG: -device ide-hd,drive=disk: Block node is read-only
 QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed.
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index 1d3fd04b65..aafcd249f6 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -48,7 +48,8 @@ class TestSingleDrive(iotests.QMPTestCase):
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
-        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.add_drive(blockdev_target_img, interface="none")
         if iotests.qemu_default_machine == 'pc':
             self.vm.add_drive(None, 'media=cdrom', 'ide')
         self.vm.launch()
@@ -164,7 +165,8 @@ class TestSetSpeed(iotests.QMPTestCase):
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
-        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.add_drive(blockdev_target_img, interface="none")
         self.vm.launch()
 
     def tearDown(self):
@@ -247,7 +249,8 @@ class TestSingleTransaction(iotests.QMPTestCase):
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
-        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.add_drive(blockdev_target_img, interface="none")
         if iotests.qemu_default_machine == 'pc':
             self.vm.add_drive(None, 'media=cdrom', 'ide')
         self.vm.launch()
@@ -460,7 +463,7 @@ class TestDriveCompression(iotests.QMPTestCase):
 
         qemu_img('create', '-f', fmt, blockdev_target_img,
                  str(TestDriveCompression.image_len), *args)
-        self.vm.add_drive(blockdev_target_img, format=fmt)
+        self.vm.add_drive(blockdev_target_img, format=fmt, interface="none")
 
         self.vm.launch()
 
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 08e4bb7218..182acb42cf 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -74,7 +74,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
 
 === Invalid command - snapshot node used as backing hd ===
 
-{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}}
+{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}}
 
 === Invalid command - snapshot node has a backing image ===
 
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
index 3ba79f027a..6d8f0a1a84 100755
--- a/tests/qemu-iotests/141
+++ b/tests/qemu-iotests/141
@@ -67,7 +67,7 @@ test_blockjob()
     _send_qemu_cmd $QEMU_HANDLE \
         "{'execute': 'x-blockdev-del',
           'arguments': {'node-name': 'drv0'}}" \
-        'error'
+        'error' | _filter_generated_node_ids
 
     _send_qemu_cmd $QEMU_HANDLE \
         "{'execute': 'block-job-cancel',
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
index 195ca1a604..82e763b68d 100644
--- a/tests/qemu-iotests/141.out
+++ b/tests/qemu-iotests/141.out
@@ -20,7 +20,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
 Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
 {"return": {}}
@@ -30,7 +30,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 {"return": {}}
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index 6b7edaf28f..54b53293d7 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out
@@ -28,6 +28,7 @@ Testing:
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 
@@ -57,6 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2
@@ -83,6 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -93,6 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
@@ -119,6 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -129,6 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -158,6 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
@@ -184,6 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -194,6 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
@@ -220,6 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -230,6 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -259,6 +270,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
@@ -285,6 +297,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
@@ -311,6 +324,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -321,6 +335,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -350,6 +365,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -376,6 +392,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1
@@ -402,6 +419,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -412,6 +430,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -441,6 +460,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -451,6 +471,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
@@ -477,6 +498,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -487,6 +509,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
@@ -513,6 +536,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
@@ -539,6 +563,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -568,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -578,6 +604,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -604,6 +631,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -614,6 +642,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
@@ -640,6 +669,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 1 (0x1)
@@ -650,6 +680,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -676,6 +707,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 1 (0x1)
@@ -686,6 +718,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -723,6 +756,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -733,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -759,6 +794,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -769,6 +805,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -802,6 +839,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -812,6 +850,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
@@ -838,6 +877,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -848,6 +888,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
@@ -874,6 +915,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 1 (0x1)
@@ -884,6 +926,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
@@ -910,6 +953,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 1 (0x1)
@@ -920,6 +964,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
@@ -964,6 +1009,7 @@ Testing: -device floppy
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 Testing: -device floppy,drive-type=120
@@ -990,6 +1036,7 @@ Testing: -device floppy,drive-type=120
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "120"
 
 Testing: -device floppy,drive-type=144
@@ -1016,6 +1063,7 @@ Testing: -device floppy,drive-type=144
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -device floppy,drive-type=288
@@ -1042,6 +1090,7 @@ Testing: -device floppy,drive-type=288
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 
@@ -1071,6 +1120,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "120"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288
@@ -1097,6 +1147,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 
@@ -1126,6 +1177,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512
@@ -1152,6 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096
diff --git a/tests/tco-test.c b/tests/tco-test.c
index ef02ec5903..c4c264eb3d 100644
--- a/tests/tco-test.c
+++ b/tests/tco-test.c
@@ -42,11 +42,18 @@ typedef struct {
     bool noreboot;
     QPCIDevice *dev;
     QPCIBar tco_io_bar;
+    QPCIBus *bus;
 } TestData;
 
+static void test_end(TestData *d)
+{
+    g_free(d->dev);
+    qpci_free_pc(d->bus);
+    qtest_end();
+}
+
 static void test_init(TestData *d)
 {
-    QPCIBus *bus;
     QTestState *qs;
     char *s;
 
@@ -57,8 +64,8 @@ static void test_init(TestData *d)
     qtest_irq_intercept_in(qs, "ioapic");
     g_free(s);
 
-    bus = qpci_init_pc(NULL);
-    d->dev = qpci_device_find(bus, QPCI_DEVFN(0x1f, 0x00));
+    d->bus = qpci_init_pc(NULL);
+    d->dev = qpci_device_find(d->bus, QPCI_DEVFN(0x1f, 0x00));
     g_assert(d->dev != NULL);
 
     qpci_device_enable(d->dev);
@@ -148,7 +155,7 @@ static void test_tco_defaults(void)
                     SW_IRQ_GEN_DEFAULT);
     g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_TMR), ==,
                     TCO_TMR_DEFAULT);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco_timeout(void)
@@ -192,7 +199,7 @@ static void test_tco_timeout(void)
     g_assert(ret == 1);
 
     stop_tco(&d);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco_max_timeout(void)
@@ -225,7 +232,7 @@ static void test_tco_max_timeout(void)
     g_assert(ret == 1);
 
     stop_tco(&d);
-    qtest_end();
+    test_end(&d);
 }
 
 static QDict *get_watchdog_action(void)
@@ -262,7 +269,7 @@ static void test_tco_second_timeout_pause(void)
     QDECREF(ad);
 
     stop_tco(&td);
-    qtest_end();
+    test_end(&td);
 }
 
 static void test_tco_second_timeout_reset(void)
@@ -287,7 +294,7 @@ static void test_tco_second_timeout_reset(void)
     QDECREF(ad);
 
     stop_tco(&td);
-    qtest_end();
+    test_end(&td);
 }
 
 static void test_tco_second_timeout_shutdown(void)
@@ -312,7 +319,7 @@ static void test_tco_second_timeout_shutdown(void)
     QDECREF(ad);
 
     stop_tco(&td);
-    qtest_end();
+    test_end(&td);
 }
 
 static void test_tco_second_timeout_none(void)
@@ -337,7 +344,7 @@ static void test_tco_second_timeout_none(void)
     QDECREF(ad);
 
     stop_tco(&td);
-    qtest_end();
+    test_end(&td);
 }
 
 static void test_tco_ticks_counter(void)
@@ -365,7 +372,7 @@ static void test_tco_ticks_counter(void)
     } while (!(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS) & TCO_TIMEOUT));
 
     stop_tco(&d);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco1_control_bits(void)
@@ -383,7 +390,7 @@ static void test_tco1_control_bits(void)
     qpci_io_writew(d.dev, d.tco_io_bar, TCO1_CNT, val);
     g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_CNT), ==,
                     TCO_LOCK);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco1_status_bits(void)
@@ -412,7 +419,7 @@ static void test_tco1_status_bits(void)
     g_assert(ret == 1);
     qpci_io_writew(d.dev, d.tco_io_bar, TCO1_STS, val);
     g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS), ==, 0);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco2_status_bits(void)
@@ -439,7 +446,7 @@ static void test_tco2_status_bits(void)
     g_assert(ret == 1);
     qpci_io_writew(d.dev, d.tco_io_bar, TCO2_STS, val);
     g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO2_STS), ==, 0);
-    qtest_end();
+    test_end(&d);
 }
 
 int main(int argc, char **argv)
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index f6dfd08746..4ccbda14af 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -101,9 +101,9 @@ static BlockJob *test_block_job_start(unsigned int iterations,
     g_assert_nonnull(bs);
 
     snprintf(job_id, sizeof(job_id), "job%u", counter++);
-    s = block_job_create(job_id, &test_block_job_driver, bs, 0,
-                         BLOCK_JOB_DEFAULT, test_block_job_cb,
-                         data, &error_abort);
+    s = block_job_create(job_id, &test_block_job_driver, bs,
+                         0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT,
+                         test_block_job_cb, data, &error_abort);
     s->iterations = iterations;
     s->use_timer = use_timer;
     s->rc = rc;
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 068c9e419b..740e740398 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -30,8 +30,9 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
     BlockJob *job;
     Error *errp = NULL;
 
-    job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0,
-                           BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp);
+    job = block_job_create(id, &test_block_job_driver, blk_bs(blk),
+                           0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb,
+                           NULL, &errp);
     if (should_succeed) {
         g_assert_null(errp);
         g_assert_nonnull(job);
@@ -53,13 +54,14 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
  * BlockDriverState inserted. */
 static BlockBackend *create_blk(const char *name)
 {
-    BlockBackend *blk = blk_new();
+    /* No I/O is performed on this device */
+    BlockBackend *blk = blk_new(0, BLK_PERM_ALL);
     BlockDriverState *bs;
 
     bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort);
     g_assert_nonnull(bs);
 
-    blk_insert_bs(blk, bs);
+    blk_insert_bs(blk, bs, &error_abort);
     bdrv_unref(bs);
 
     if (name) {
diff --git a/tests/test-filter-mirror.c b/tests/test-filter-mirror.c
index ffaaffabd0..9f84402493 100644
--- a/tests/test-filter-mirror.c
+++ b/tests/test-filter-mirror.c
@@ -57,7 +57,7 @@ static void test_mirror(void)
     };
 
     /* send a qmp command to guarantee that 'connected' is setting to true. */
-    qmp("{ 'execute' : 'query-status'}");
+    qmp_discard_response("{ 'execute' : 'query-status'}");
     ret = iov_send(send_sock[0], iov, 2, 0, sizeof(size) + sizeof(send_buf));
     g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
     close(send_sock[0]);
diff --git a/tests/test-filter-redirector.c b/tests/test-filter-redirector.c
index c63b68f03a..0c4b8d52ef 100644
--- a/tests/test-filter-redirector.c
+++ b/tests/test-filter-redirector.c
@@ -99,7 +99,7 @@ static void test_redirector_tx(void)
     g_assert_cmpint(recv_sock, !=, -1);
 
     /* send a qmp command to guarantee that 'connected' is setting to true. */
-    qmp("{ 'execute' : 'query-status'}");
+    qmp_discard_response("{ 'execute' : 'query-status'}");
 
     struct iovec iov[] = {
         {
@@ -184,7 +184,7 @@ static void test_redirector_rx(void)
     send_sock = unix_connect(sock_path1, NULL);
     g_assert_cmpint(send_sock, !=, -1);
     /* send a qmp command to guarantee that 'connected' is setting to true. */
-    qmp("{ 'execute' : 'query-status'}");
+    qmp_discard_response("{ 'execute' : 'query-status'}");
 
     ret = iov_send(send_sock, iov, 2, 0, sizeof(size) + sizeof(send_buf));
     g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 363b59a38f..bd7c501b2e 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -593,9 +593,10 @@ static void test_groups(void)
     BlockBackend *blk1, *blk2, *blk3;
     BlockBackendPublic *blkp1, *blkp2, *blkp3;
 
-    blk1 = blk_new();
-    blk2 = blk_new();
-    blk3 = blk_new();
+    /* No actual I/O is performed on these devices */
+    blk1 = blk_new(0, BLK_PERM_ALL);
+    blk2 = blk_new(0, BLK_PERM_ALL);
+    blk3 = blk_new(0, BLK_PERM_ALL);
 
     blkp1 = blk_get_public(blk1);
     blkp2 = blk_get_public(blk2);
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 39f338a4c4..f694a89782 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -476,6 +476,8 @@ const VMStateDescription vmsd_tst = {
     }
 };
 
+/* test array migration */
+
 #define AR_SIZE 4
 
 typedef struct {
@@ -492,20 +494,22 @@ const VMStateDescription vmsd_arps = {
         VMSTATE_END_OF_LIST()
     }
 };
+
+static uint8_t wire_arr_ptr_no0[] = {
+    0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x01,
+    0x00, 0x00, 0x00, 0x02,
+    0x00, 0x00, 0x00, 0x03,
+    QEMU_VM_EOF
+};
+
 static void test_arr_ptr_str_no0_save(void)
 {
     TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
     TestArrayOfPtrToStuct sample = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
-    uint8_t wire_sample[] = {
-        0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x01,
-        0x00, 0x00, 0x00, 0x02,
-        0x00, 0x00, 0x00, 0x03,
-        QEMU_VM_EOF
-    };
 
     save_vmstate(&vmsd_arps, &sample);
-    compare_vmstate(wire_sample, sizeof(wire_sample));
+    compare_vmstate(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
 }
 
 static void test_arr_ptr_str_no0_load(void)
@@ -514,21 +518,98 @@ static void test_arr_ptr_str_no0_load(void)
     TestStructTriv ar[AR_SIZE] = {};
     TestArrayOfPtrToStuct obj = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
     int idx;
-    uint8_t wire_sample[] = {
-        0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x01,
-        0x00, 0x00, 0x00, 0x02,
-        0x00, 0x00, 0x00, 0x03,
-        QEMU_VM_EOF
-    };
 
-    save_buffer(wire_sample, sizeof(wire_sample));
+    save_buffer(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
+    SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
+                          wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)));
+    for (idx = 0; idx < AR_SIZE; ++idx) {
+        /* compare the target array ar with the ground truth array ar_gt */
+        g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
+    }
+}
+
+static uint8_t wire_arr_ptr_0[] = {
+    0x00, 0x00, 0x00, 0x00,
+    VMS_NULLPTR_MARKER,
+    0x00, 0x00, 0x00, 0x02,
+    0x00, 0x00, 0x00, 0x03,
+    QEMU_VM_EOF
+};
+
+static void test_arr_ptr_str_0_save(void)
+{
+    TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
+    TestArrayOfPtrToStuct sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+    save_vmstate(&vmsd_arps, &sample);
+    compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_str_0_load(void)
+{
+    TestStructTriv ar_gt[AR_SIZE] = {{.i = 0}, {.i = 0}, {.i = 2}, {.i = 3} };
+    TestStructTriv ar[AR_SIZE] = {};
+    TestArrayOfPtrToStuct obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+    int idx;
+
+    save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
     SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
-                          wire_sample, sizeof(wire_sample)));
+                          wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
     for (idx = 0; idx < AR_SIZE; ++idx) {
         /* compare the target array ar with the ground truth array ar_gt */
         g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
     }
+    for (idx = 0; idx < AR_SIZE; ++idx) {
+        if (idx == 1) {
+            g_assert_cmpint((uintptr_t)(obj.ar[idx]), ==, 0);
+        } else {
+            g_assert_cmpint((uintptr_t)(obj.ar[idx]), !=, 0);
+        }
+    }
+}
+
+typedef struct TestArrayOfPtrToInt {
+    int32_t *ar[AR_SIZE];
+} TestArrayOfPtrToInt;
+
+const VMStateDescription vmsd_arpp = {
+    .name = "test/arps",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_ARRAY_OF_POINTER(ar, TestArrayOfPtrToInt,
+                AR_SIZE, 0, vmstate_info_int32, int32_t*),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void test_arr_ptr_prim_0_save(void)
+{
+    int32_t ar[AR_SIZE] = {0 , 1, 2, 3};
+    TestArrayOfPtrToInt  sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+    save_vmstate(&vmsd_arpp, &sample);
+    compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_prim_0_load(void)
+{
+    int32_t ar_gt[AR_SIZE] = {0, 1, 2, 3};
+    int32_t ar[AR_SIZE] = {3 , 42, 1, 0};
+    TestArrayOfPtrToInt obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+    int idx;
+
+    save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+    SUCCESS(load_vmstate_one(&vmsd_arpp, &obj, 1,
+                          wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
+    for (idx = 0; idx < AR_SIZE; ++idx) {
+        /* compare the target array ar with the ground truth array ar_gt */
+        if (idx == 1) {
+            g_assert_cmpint(42, ==, ar[idx]);
+        } else {
+            g_assert_cmpint(ar_gt[idx], ==, ar[idx]);
+        }
+    }
 }
 
 /* test QTAILQ migration */
@@ -781,6 +862,13 @@ int main(int argc, char **argv)
                     test_arr_ptr_str_no0_save);
     g_test_add_func("/vmstate/array/ptr/str/no0/load",
                     test_arr_ptr_str_no0_load);
+    g_test_add_func("/vmstate/array/ptr/str/0/save", test_arr_ptr_str_0_save);
+    g_test_add_func("/vmstate/array/ptr/str/0/load",
+                    test_arr_ptr_str_0_load);
+    g_test_add_func("/vmstate/array/ptr/prim/0/save",
+                    test_arr_ptr_prim_0_save);
+    g_test_add_func("/vmstate/array/ptr/prim/0/load",
+                    test_arr_ptr_prim_0_load);
     g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
     g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
     g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
diff --git a/tests/usb-hcd-ehci-test.c b/tests/usb-hcd-ehci-test.c
index 57af8a034e..944eb1c088 100644
--- a/tests/usb-hcd-ehci-test.c
+++ b/tests/usb-hcd-ehci-test.c
@@ -50,11 +50,8 @@ static void ehci_port_test(struct qhc *hc, int port, uint32_t expect)
 
 /* tests */
 
-static void pci_init(void)
+static void test_init(void)
 {
-    if (pcibus) {
-        return;
-    }
     pcibus = qpci_init_pc(NULL);
     g_assert(pcibus != NULL);
 
@@ -64,6 +61,15 @@ static void pci_init(void)
     qusb_pci_init_one(pcibus, &ehci1, QPCI_DEVFN(0x1d, 7), 0);
 }
 
+static void test_deinit(void)
+{
+    uhci_deinit(&uhci1);
+    uhci_deinit(&uhci2);
+    uhci_deinit(&uhci3);
+    uhci_deinit(&ehci1);
+    qpci_free_pc(pcibus);
+}
+
 static void pci_uhci_port_1(void)
 {
     g_assert(pcibus != NULL);
@@ -142,7 +148,7 @@ int main(int argc, char **argv)
     int ret;
 
     g_test_init(&argc, &argv, NULL);
-    qtest_add_func("/ehci/pci/init", pci_init);
+
     qtest_add_func("/ehci/pci/uhci-port-1", pci_uhci_port_1);
     qtest_add_func("/ehci/pci/ehci-port-1", pci_ehci_port_1);
     qtest_add_func("/ehci/pci/ehci-config", pci_ehci_config);
@@ -161,7 +167,10 @@ int main(int argc, char **argv)
                 "-drive if=none,id=usbcdrom,media=cdrom "
                 "-device usb-tablet,bus=ich9-ehci-1.0,port=1,usb_version=1 "
                 "-device usb-storage,bus=ich9-ehci-1.0,port=2,drive=usbcdrom ");
+
+    test_init();
     ret = g_test_run();
+    test_deinit();
 
     qtest_end();
 
diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c
index e956b9ccb7..f25bae5e6c 100644
--- a/tests/usb-hcd-uhci-test.c
+++ b/tests/usb-hcd-uhci-test.c
@@ -28,6 +28,7 @@ static void test_port(int port)
     g_assert(port > 0);
     qusb_pci_init_one(qs->pcibus, &uhci, QPCI_DEVFN(0x1d, 0), 4);
     uhci_port_test(&uhci, port - 1, UHCI_PORT_CCS);
+    uhci_deinit(&uhci);
 }
 
 static void test_port_1(void)
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 2c45c7b29f..a61896c32d 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -139,6 +139,7 @@ enum {
 };
 
 typedef struct TestServer {
+    QPCIBus *bus;
     gchar *socket_path;
     gchar *mig_path;
     gchar *chr_name;
@@ -160,14 +161,13 @@ static const char *root;
 
 static void init_virtio_dev(TestServer *s)
 {
-    QPCIBus *bus;
     QVirtioPCIDevice *dev;
     uint32_t features;
 
-    bus = qpci_init_pc(NULL);
-    g_assert_nonnull(bus);
+    s->bus = qpci_init_pc(NULL);
+    g_assert_nonnull(s->bus);
 
-    dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
+    dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET);
     g_assert_nonnull(dev);
 
     qvirtio_pci_device_enable(dev);
@@ -180,6 +180,7 @@ static void init_virtio_dev(TestServer *s)
     qvirtio_set_features(&dev->vdev, features);
 
     qvirtio_set_driver_ok(&dev->vdev);
+    qvirtio_pci_device_free(dev);
 }
 
 static void wait_for_fds(TestServer *s)
@@ -507,6 +508,8 @@ static gboolean _test_server_free(TestServer *server)
     g_free(server->mig_path);
 
     g_free(server->chr_name);
+    qpci_free_pc(server->bus);
+
     g_free(server);
 
     return FALSE;
diff --git a/tests/virtio-9p-test.c b/tests/virtio-9p-test.c
index 9556291567..43a1ad813f 100644
--- a/tests/virtio-9p-test.c
+++ b/tests/virtio-9p-test.c
@@ -80,7 +80,7 @@ static void qvirtio_9p_pci_stop(QVirtIO9P *v9p)
 {
     qvirtqueue_cleanup(v9p->dev->bus, v9p->vq, v9p->qs->alloc);
     qvirtio_pci_device_disable(container_of(v9p->dev, QVirtioPCIDevice, vdev));
-    g_free(v9p->dev);
+    qvirtio_pci_device_free((QVirtioPCIDevice *)v9p->dev);
     qvirtio_9p_stop(v9p);
 }
 
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 0e32e416dd..1eee95df49 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -108,7 +108,7 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot)
 {
     QVirtioPCIDevice *dev;
 
-    dev = qvirtio_pci_device_find(bus, VIRTIO_ID_BLOCK);
+    dev = qvirtio_pci_device_find_slot(bus, VIRTIO_ID_BLOCK, slot);
     g_assert(dev != NULL);
     g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK);
     g_assert_cmphex(dev->pdev->devfn, ==, ((slot << 3) | PCI_FN));
@@ -296,7 +296,7 @@ static void pci_basic(void)
     /* End test */
     qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
     qtest_shutdown(qs);
 }
 
@@ -389,7 +389,7 @@ static void pci_indirect(void)
     /* End test */
     qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
     qtest_shutdown(qs);
 }
 
@@ -409,15 +409,16 @@ static void pci_config(void)
 
     qvirtio_set_driver_ok(&dev->vdev);
 
-    qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
-                                                    " 'size': %d } }", n_size);
+    qmp_discard_response("{ 'execute': 'block_resize', "
+                         " 'arguments': { 'device': 'drive0', "
+                         " 'size': %d } }", n_size);
     qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
 
     capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, n_size / 512);
 
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
 
     qtest_shutdown(qs);
 }
@@ -458,8 +459,9 @@ static void pci_msix(void)
 
     qvirtio_set_driver_ok(&dev->vdev);
 
-    qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
-                                                    " 'size': %d } }", n_size);
+    qmp_discard_response("{ 'execute': 'block_resize', "
+                         " 'arguments': { 'device': 'drive0', "
+                         " 'size': %d } }", n_size);
 
     qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
 
@@ -524,7 +526,7 @@ static void pci_msix(void)
     qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qpci_msix_disable(dev->pdev);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
     qtest_shutdown(qs);
 }
 
@@ -640,7 +642,7 @@ static void pci_idx(void)
     qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qpci_msix_disable(dev->pdev);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
     qtest_shutdown(qs);
 }
 
@@ -659,7 +661,7 @@ static void pci_hotplug(void)
     dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT_HP);
     g_assert(dev);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
 
     /* unplug secondary disk */
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
@@ -691,8 +693,9 @@ static void mmio_basic(void)
 
     test_basic(&dev->vdev, alloc, vq);
 
-    qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
-                                                    " 'size': %d } }", n_size);
+    qmp_discard_response("{ 'execute': 'block_resize', "
+                         " 'arguments': { 'device': 'drive0', "
+                         " 'size': %d } }", n_size);
 
     qvirtio_wait_queue_isr(&dev->vdev, vq, QVIRTIO_BLK_TIMEOUT_US);
 
diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 69220ef07b..0eabd56fd9 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -63,7 +63,7 @@ static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs)
         qvirtqueue_cleanup(vs->dev->bus, vs->vq[i], vs->qs->alloc);
     }
     qvirtio_pci_device_disable(container_of(vs->dev, QVirtioPCIDevice, vdev));
-    g_free(vs->dev);
+    qvirtio_pci_device_free((QVirtioPCIDevice *)vs->dev);
     qvirtio_scsi_stop(vs->qs);
     g_free(vs);
 }
diff --git a/ui/console.c b/ui/console.c
index 49d0740b40..d1ff7504ec 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1735,16 +1735,30 @@ QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con)
     return con->gl->ops->dpy_gl_ctx_get_current(con->gl);
 }
 
-void dpy_gl_scanout(QemuConsole *con,
-                    uint32_t backing_id, bool backing_y_0_top,
-                    uint32_t backing_width, uint32_t backing_height,
-                    uint32_t x, uint32_t y, uint32_t width, uint32_t height)
+void dpy_gl_scanout_disable(QemuConsole *con)
 {
     assert(con->gl);
-    con->gl->ops->dpy_gl_scanout(con->gl, backing_id,
-                                 backing_y_0_top,
-                                 backing_width, backing_height,
-                                 x, y, width, height);
+    if (con->gl->ops->dpy_gl_scanout_disable) {
+        con->gl->ops->dpy_gl_scanout_disable(con->gl);
+    } else {
+        con->gl->ops->dpy_gl_scanout_texture(con->gl, 0, false, 0, 0,
+                                             0, 0, 0, 0);
+    }
+}
+
+void dpy_gl_scanout_texture(QemuConsole *con,
+                            uint32_t backing_id,
+                            bool backing_y_0_top,
+                            uint32_t backing_width,
+                            uint32_t backing_height,
+                            uint32_t x, uint32_t y,
+                            uint32_t width, uint32_t height)
+{
+    assert(con->gl);
+    con->gl->ops->dpy_gl_scanout_texture(con->gl, backing_id,
+                                         backing_y_0_top,
+                                         backing_width, backing_height,
+                                         x, y, width, height);
 }
 
 void dpy_gl_update(QemuConsole *con,
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index 3f5d328c7b..d53288f027 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -170,11 +170,21 @@ QEMUGLContext gd_egl_create_context(DisplayChangeListener *dcl,
     return qemu_egl_create_context(dcl, params);
 }
 
-void gd_egl_scanout(DisplayChangeListener *dcl,
-                    uint32_t backing_id, bool backing_y_0_top,
-                    uint32_t backing_width, uint32_t backing_height,
-                    uint32_t x, uint32_t y,
-                    uint32_t w, uint32_t h)
+void gd_egl_scanout_disable(DisplayChangeListener *dcl)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    vc->gfx.w = 0;
+    vc->gfx.h = 0;
+    vc->gfx.tex_id = 0;
+    gtk_egl_set_scanout_mode(vc, false);
+}
+
+void gd_egl_scanout_texture(DisplayChangeListener *dcl,
+                            uint32_t backing_id, bool backing_y_0_top,
+                            uint32_t backing_width, uint32_t backing_height,
+                            uint32_t x, uint32_t y,
+                            uint32_t w, uint32_t h)
 {
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
 
@@ -188,11 +198,6 @@ void gd_egl_scanout(DisplayChangeListener *dcl,
     eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
                    vc->gfx.esurface, vc->gfx.ectx);
 
-    if (vc->gfx.tex_id == 0 || vc->gfx.w == 0 || vc->gfx.h == 0) {
-        gtk_egl_set_scanout_mode(vc, false);
-        return;
-    }
-
     gtk_egl_set_scanout_mode(vc, true);
     if (!vc->gfx.fbo_id) {
         glGenFramebuffers(1, &vc->gfx.fbo_id);
diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
index 0df5a36a9f..b05c665cbb 100644
--- a/ui/gtk-gl-area.c
+++ b/ui/gtk-gl-area.c
@@ -167,11 +167,13 @@ void gd_gl_area_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx)
     /* FIXME */
 }
 
-void gd_gl_area_scanout(DisplayChangeListener *dcl,
-                        uint32_t backing_id, bool backing_y_0_top,
-                        uint32_t backing_width, uint32_t backing_height,
-                        uint32_t x, uint32_t y,
-                        uint32_t w, uint32_t h)
+void gd_gl_area_scanout_texture(DisplayChangeListener *dcl,
+                                uint32_t backing_id,
+                                bool backing_y_0_top,
+                                uint32_t backing_width,
+                                uint32_t backing_height,
+                                uint32_t x, uint32_t y,
+                                uint32_t w, uint32_t h)
 {
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
 
diff --git a/ui/gtk.c b/ui/gtk.c
index f21e9e7f7b..a86848f3b0 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -669,7 +669,7 @@ static const DisplayChangeListenerOps dcl_gl_area_ops = {
     .dpy_gl_ctx_destroy      = gd_gl_area_destroy_context,
     .dpy_gl_ctx_make_current = gd_gl_area_make_current,
     .dpy_gl_ctx_get_current  = gd_gl_area_get_current_context,
-    .dpy_gl_scanout          = gd_gl_area_scanout,
+    .dpy_gl_scanout_texture  = gd_gl_area_scanout_texture,
     .dpy_gl_update           = gd_gl_area_scanout_flush,
 };
 
@@ -688,7 +688,8 @@ static const DisplayChangeListenerOps dcl_egl_ops = {
     .dpy_gl_ctx_destroy      = qemu_egl_destroy_context,
     .dpy_gl_ctx_make_current = gd_egl_make_current,
     .dpy_gl_ctx_get_current  = qemu_egl_get_current_context,
-    .dpy_gl_scanout          = gd_egl_scanout,
+    .dpy_gl_scanout_disable  = gd_egl_scanout_disable,
+    .dpy_gl_scanout_texture  = gd_egl_scanout_texture,
     .dpy_gl_update           = gd_egl_scanout_flush,
 };
 
@@ -2200,11 +2201,12 @@ static void gd_set_keycode_type(GtkDisplayState *s)
     GdkDisplay *display = gtk_widget_get_display(s->window);
     if (GDK_IS_X11_DISPLAY(display)) {
         Display *x11_display = gdk_x11_display_get_xdisplay(display);
-        XkbDescPtr desc = XkbGetKeyboard(x11_display, XkbGBN_AllComponentsMask,
-                                         XkbUseCoreKbd);
+        XkbDescPtr desc = XkbGetMap(x11_display, XkbGBN_AllComponentsMask,
+                                    XkbUseCoreKbd);
         char *keycodes = NULL;
 
-        if (desc && desc->names) {
+        if (desc &&
+            (XkbGetNames(x11_display, XkbKeycodesNameMask, desc) == Success)) {
             keycodes = XGetAtomName(x11_display, desc->names->keycodes);
         }
         if (keycodes == NULL) {
diff --git a/ui/sdl.c b/ui/sdl.c
index 19e8a848a7..37c21a00fb 100644
--- a/ui/sdl.c
+++ b/ui/sdl.c
@@ -233,10 +233,12 @@ static int check_for_evdev(void)
     if (!SDL_GetWMInfo(&info)) {
         return 0;
     }
-    desc = XkbGetKeyboard(info.info.x11.display,
-                          XkbGBN_AllComponentsMask,
-                          XkbUseCoreKbd);
-    if (desc && desc->names) {
+    desc = XkbGetMap(info.info.x11.display,
+                     XkbGBN_AllComponentsMask,
+                     XkbUseCoreKbd);
+    if (desc &&
+        (XkbGetNames(info.info.x11.display,
+                     XkbKeycodesNameMask, desc) == Success)) {
         keycodes = XGetAtomName(info.info.x11.display, desc->names->keycodes);
         if (keycodes == NULL) {
             fprintf(stderr, "could not lookup keycode name\n");
diff --git a/ui/sdl2-gl.c b/ui/sdl2-gl.c
index 039645df3e..1cd77e2c16 100644
--- a/ui/sdl2-gl.c
+++ b/ui/sdl2-gl.c
@@ -184,11 +184,24 @@ QEMUGLContext sdl2_gl_get_current_context(DisplayChangeListener *dcl)
     return (QEMUGLContext)sdlctx;
 }
 
-void sdl2_gl_scanout(DisplayChangeListener *dcl,
-                     uint32_t backing_id, bool backing_y_0_top,
-                     uint32_t backing_width, uint32_t backing_height,
-                     uint32_t x, uint32_t y,
-                     uint32_t w, uint32_t h)
+void sdl2_gl_scanout_disable(DisplayChangeListener *dcl)
+{
+    struct sdl2_console *scon = container_of(dcl, struct sdl2_console, dcl);
+
+    assert(scon->opengl);
+    scon->w = 0;
+    scon->h = 0;
+    scon->tex_id = 0;
+    sdl2_set_scanout_mode(scon, false);
+}
+
+void sdl2_gl_scanout_texture(DisplayChangeListener *dcl,
+                             uint32_t backing_id,
+                             bool backing_y_0_top,
+                             uint32_t backing_width,
+                             uint32_t backing_height,
+                             uint32_t x, uint32_t y,
+                             uint32_t w, uint32_t h)
 {
     struct sdl2_console *scon = container_of(dcl, struct sdl2_console, dcl);
 
@@ -202,11 +215,6 @@ void sdl2_gl_scanout(DisplayChangeListener *dcl,
 
     SDL_GL_MakeCurrent(scon->real_window, scon->winctx);
 
-    if (scon->tex_id == 0 || scon->w == 0 || scon->h == 0) {
-        sdl2_set_scanout_mode(scon, false);
-        return;
-    }
-
     sdl2_set_scanout_mode(scon, true);
     if (!scon->fbo_id) {
         glGenFramebuffers(1, &scon->fbo_id);
diff --git a/ui/sdl2.c b/ui/sdl2.c
index 91fb111aa5..faf9bdff5c 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -733,7 +733,8 @@ static const DisplayChangeListenerOps dcl_gl_ops = {
     .dpy_gl_ctx_destroy      = sdl2_gl_destroy_context,
     .dpy_gl_ctx_make_current = sdl2_gl_make_context_current,
     .dpy_gl_ctx_get_current  = sdl2_gl_get_current_context,
-    .dpy_gl_scanout          = sdl2_gl_scanout,
+    .dpy_gl_scanout_disable  = sdl2_gl_scanout_disable,
+    .dpy_gl_scanout_texture  = sdl2_gl_scanout_texture,
     .dpy_gl_update           = sdl2_gl_scanout_flush,
 };
 #endif
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 39ccab7561..804abc5c0f 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -497,6 +497,12 @@ static QemuOptsList qemu_spice_opts = {
         },{
             .name = "seamless-migration",
             .type = QEMU_OPT_BOOL,
+        },{
+            .name = "display",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "head",
+            .type = QEMU_OPT_NUMBER,
 #ifdef HAVE_SPICE_GL
         },{
             .name = "gl",
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 64e472eeb0..b353445f58 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -928,39 +928,44 @@ static QEMUGLContext qemu_spice_gl_create_context(DisplayChangeListener *dcl,
     return qemu_egl_create_context(dcl, params);
 }
 
-static void qemu_spice_gl_scanout(DisplayChangeListener *dcl,
-                                  uint32_t tex_id,
-                                  bool y_0_top,
-                                  uint32_t backing_width,
-                                  uint32_t backing_height,
-                                  uint32_t x, uint32_t y,
-                                  uint32_t w, uint32_t h)
+static void qemu_spice_gl_scanout_disable(DisplayChangeListener *dcl)
+{
+    SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
+
+    dprint(1, "%s: no framebuffer\n", __func__);
+    spice_qxl_gl_scanout(&ssd->qxl, -1, 0, 0, 0, 0, false);
+    qemu_spice_gl_monitor_config(ssd, 0, 0, 0, 0);
+    ssd->have_surface = false;
+    ssd->have_scanout = false;
+}
+
+static void qemu_spice_gl_scanout_texture(DisplayChangeListener *dcl,
+                                          uint32_t tex_id,
+                                          bool y_0_top,
+                                          uint32_t backing_width,
+                                          uint32_t backing_height,
+                                          uint32_t x, uint32_t y,
+                                          uint32_t w, uint32_t h)
 {
     SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
     EGLint stride = 0, fourcc = 0;
     int fd = -1;
 
-    if (tex_id) {
-        fd = egl_get_fd_for_texture(tex_id, &stride, &fourcc);
-        if (fd < 0) {
-            fprintf(stderr, "%s: failed to get fd for texture\n", __func__);
-            return;
-        }
-        dprint(1, "%s: %dx%d (stride %d, fourcc 0x%x)\n", __func__,
-               w, h, stride, fourcc);
-    } else {
-        dprint(1, "%s: no texture (no framebuffer)\n", __func__);
+    assert(tex_id);
+    fd = egl_get_fd_for_texture(tex_id, &stride, &fourcc);
+    if (fd < 0) {
+        fprintf(stderr, "%s: failed to get fd for texture\n", __func__);
+        return;
     }
-
-    assert(!tex_id || fd >= 0);
+    dprint(1, "%s: %dx%d (stride %d, fourcc 0x%x)\n", __func__,
+           w, h, stride, fourcc);
 
     /* note: spice server will close the fd */
     spice_qxl_gl_scanout(&ssd->qxl, fd, backing_width, backing_height,
                          stride, fourcc, y_0_top);
-    ssd->have_surface = false;
-    ssd->have_scanout = (tex_id != 0);
-
     qemu_spice_gl_monitor_config(ssd, x, y, w, h);
+    ssd->have_surface = false;
+    ssd->have_scanout = true;
 }
 
 static void qemu_spice_gl_update(DisplayChangeListener *dcl,
@@ -993,7 +998,8 @@ static const DisplayChangeListenerOps display_listener_gl_ops = {
     .dpy_gl_ctx_make_current = qemu_egl_make_context_current,
     .dpy_gl_ctx_get_current  = qemu_egl_get_current_context,
 
-    .dpy_gl_scanout          = qemu_spice_gl_scanout,
+    .dpy_gl_scanout_disable  = qemu_spice_gl_scanout_disable,
+    .dpy_gl_scanout_texture  = qemu_spice_gl_scanout_texture,
     .dpy_gl_update           = qemu_spice_gl_update,
 };
 
@@ -1029,9 +1035,26 @@ static void qemu_spice_display_init_one(QemuConsole *con)
 
 void qemu_spice_display_init(void)
 {
-    QemuConsole *con;
+    QemuOptsList *olist = qemu_find_opts("spice");
+    QemuOpts *opts = QTAILQ_FIRST(&olist->head);
+    QemuConsole *spice_con, *con;
+    const char *str;
     int i;
 
+    str = qemu_opt_get(opts, "display");
+    if (str) {
+        int head = qemu_opt_get_number(opts, "head", 0);
+        Error *err = NULL;
+
+        spice_con = qemu_console_lookup_by_device_name(str, head, &err);
+        if (err) {
+            error_report("Failed to lookup display/head");
+            exit(1);
+        }
+    } else {
+        spice_con = NULL;
+    }
+
     for (i = 0;; i++) {
         con = qemu_console_lookup_by_index(i);
         if (!con || !qemu_console_is_graphic(con)) {
@@ -1040,6 +1063,9 @@ void qemu_spice_display_init(void)
         if (qemu_spice_have_display_interface(con)) {
             continue;
         }
+        if (spice_con != NULL && spice_con != con) {
+            continue;
+        }
         qemu_spice_display_init_one(con);
     }
 }
diff --git a/ui/vnc.c b/ui/vnc.c
index 62e85edf5d..51f4b30959 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3181,6 +3181,7 @@ static void vnc_display_close(VncDisplay *vd)
     g_free(vd->lsock);
     g_free(vd->lsock_tag);
     vd->lsock = NULL;
+    vd->lsock_tag = NULL;
     vd->nlsock = 0;
 
     for (i = 0; i < vd->nlwebsock; i++) {
@@ -3192,6 +3193,7 @@ static void vnc_display_close(VncDisplay *vd)
     g_free(vd->lwebsock);
     g_free(vd->lwebsock_tag);
     vd->lwebsock = NULL;
+    vd->lwebsock_tag = NULL;
     vd->nlwebsock = 0;
 
     vd->auth = VNC_AUTH_INVALID;
@@ -3204,6 +3206,7 @@ static void vnc_display_close(VncDisplay *vd)
     vd->tlsaclname = NULL;
     if (vd->lock_key_sync) {
         qemu_remove_led_event_handler(vd->led);
+        vd->led = NULL;
     }
 }
 
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 419f2528b8..5ce1b5c246 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -179,7 +179,7 @@ void parse_option_size(const char *name, const char *value,
 
     err = qemu_strtosz(value, NULL, &size);
     if (err == -ERANGE) {
-        error_setg(errp, "Value '%s' is too large for parameter '%s'",
+        error_setg(errp, "Value '%s' is out of range for parameter '%s'",
                    value, name);
         return;
     }
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index ff620ecff7..6cf70b96f6 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -355,11 +355,6 @@ void timer_deinit(QEMUTimer *ts)
     ts->timer_list = NULL;
 }
 
-void timer_free(QEMUTimer *ts)
-{
-    g_free(ts);
-}
-
 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
 {
     QEMUTimer **pt, *t;