summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/acpi/core.c4
-rw-r--r--hw/alpha/typhoon.c2
-rw-r--r--hw/arm/highbank.c4
-rw-r--r--hw/arm/integratorcp.c2
-rw-r--r--hw/arm/musicpal.c2
-rw-r--r--hw/arm/omap1.c10
-rw-r--r--hw/arm/omap2.c2
-rw-r--r--hw/arm/spitz.c2
-rw-r--r--hw/arm/stellaris.c2
-rw-r--r--hw/arm/tosa.c2
-rw-r--r--hw/core/machine.c3
-rw-r--r--hw/dma/rc4030.c2
-rw-r--r--hw/i386/acpi-build.c7
-rw-r--r--hw/i386/amd_iommu.c4
-rw-r--r--hw/i386/intel_iommu.c313
-rw-r--r--hw/i386/intel_iommu_internal.h1
-rw-r--r--hw/i386/pc.c2
-rw-r--r--hw/i386/trace-events2
-rw-r--r--hw/i386/x86-iommu.c48
-rw-r--r--hw/i386/xen/xen-hvm.c9
-rw-r--r--hw/input/pckbd.c4
-rw-r--r--hw/intc/xics.c5
-rw-r--r--hw/intc/xics_kvm.c33
-rw-r--r--hw/intc/xics_spapr.c3
-rw-r--r--hw/ipmi/ipmi.c4
-rw-r--r--hw/isa/lpc_ich9.c2
-rw-r--r--hw/mips/boston.c2
-rw-r--r--hw/mips/mips_malta.c2
-rw-r--r--hw/mips/mips_r4k.c4
-rw-r--r--hw/misc/arm_sysctl.c8
-rw-r--r--hw/misc/cbus.c2
-rw-r--r--hw/misc/macio/cuda.c4
-rw-r--r--hw/misc/slavio_misc.c4
-rw-r--r--hw/misc/zynq_slcr.c2
-rw-r--r--hw/net/virtio-net.c17
-rw-r--r--hw/pci-host/apb.c6
-rw-r--r--hw/pci-host/bonito.c2
-rw-r--r--hw/pci-host/piix.c2
-rw-r--r--hw/ppc/e500.c2
-rw-r--r--hw/ppc/mpc8544_guts.c2
-rw-r--r--hw/ppc/ppc.c2
-rw-r--r--hw/ppc/ppc405_uc.c2
-rw-r--r--hw/ppc/spapr.c239
-rw-r--r--hw/ppc/spapr_cpu_core.c19
-rw-r--r--hw/ppc/spapr_drc.c92
-rw-r--r--hw/ppc/spapr_events.c52
-rw-r--r--hw/ppc/spapr_hcall.c56
-rw-r--r--hw/ppc/spapr_iommu.c2
-rw-r--r--hw/ppc/spapr_pci.c5
-rw-r--r--hw/ppc/spapr_rtas.c4
-rw-r--r--hw/s390x/3270-ccw.c6
-rw-r--r--hw/s390x/Makefile.objs1
-rw-r--r--hw/s390x/css-bridge.c2
-rw-r--r--hw/s390x/css.c290
-rw-r--r--hw/s390x/ipl.c2
-rw-r--r--hw/s390x/s390-ccw.c153
-rw-r--r--hw/s390x/s390-pci-bus.c2
-rw-r--r--hw/s390x/s390-pci-inst.c2
-rw-r--r--hw/s390x/s390-virtio-ccw.c32
-rw-r--r--hw/s390x/virtio-ccw.c7
-rw-r--r--hw/sh4/r2d.c2
-rw-r--r--hw/timer/etraxfs_timer.c2
-rw-r--r--hw/timer/m48t59.c4
-rw-r--r--hw/timer/milkymist-sysctl.c4
-rw-r--r--hw/timer/pxa2xx_timer.c2
-rw-r--r--hw/vfio/Makefile.objs1
-rw-r--r--hw/vfio/ccw.c434
-rw-r--r--hw/vfio/common.c2
-rw-r--r--hw/virtio/vhost-user.c12
-rw-r--r--hw/watchdog/watchdog.c2
-rw-r--r--hw/xenpv/xen_domainbuild.c2
-rw-r--r--hw/xtensa/xtfpga.c2
72 files changed, 1573 insertions, 399 deletions
diff --git a/hw/acpi/core.c b/hw/acpi/core.c
index e890a5d675..95fcac95a2 100644
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -561,7 +561,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
         uint16_t sus_typ = (val >> 10) & 7;
         switch(sus_typ) {
         case 0: /* soft power off */
-            qemu_system_shutdown_request();
+            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
             break;
         case 1:
             qemu_system_suspend_request();
@@ -569,7 +569,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
         default:
             if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */
                 qapi_event_send_suspend_disk(&error_abort);
-                qemu_system_shutdown_request();
+                qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
             }
             break;
         }
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index f50f5cf186..c1cf7802a4 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -664,7 +664,7 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr,
 /* TODO: A translation failure here ought to set PCI error codes on the
    Pchip and generate a machine check interrupt.  */
 static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr,
-                                             bool is_write)
+                                             IOMMUAccessFlags flag)
 {
     TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu);
     IOMMUTLBEntry ret;
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index 0a4508cef3..d209b97dee 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -108,9 +108,9 @@ static void hb_regs_write(void *opaque, hwaddr offset,
 
     if (offset == 0xf00) {
         if (value == 1 || value == 2) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         } else if (value == 3) {
-            qemu_system_shutdown_request();
+            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
         }
     }
 
diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
index 5610ffc9ce..ca3eca1d16 100644
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -158,7 +158,7 @@ static void integratorcm_do_remap(IntegratorCMState *s)
 static void integratorcm_set_ctrl(IntegratorCMState *s, uint32_t value)
 {
     if (value & 8) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     }
     if ((s->cm_ctrl ^ value) & 1) {
         /* (value & 1) != 0 means the green "MISC LED" is lit.
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index cbbca4e17a..9c710f74b4 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -898,7 +898,7 @@ static void mv88w8618_pit_write(void *opaque, hwaddr offset,
 
     case MP_BOARD_RESET:
         if (value == MP_BOARD_RESET_MAGIC) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
         break;
     }
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
index b3cf0ec690..54582bd148 100644
--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -355,7 +355,7 @@ static void omap_wd_timer_write(void *opaque, hwaddr addr,
                 /* XXX: on T|E hardware somehow this has no effect,
                  * on Zire 71 it works as specified.  */
                 s->reset = 1;
-                qemu_system_reset_request();
+                qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
             }
         }
         s->last_wr = value & 0xff;
@@ -1545,8 +1545,10 @@ static inline void omap_clkm_idlect1_update(struct omap_mpu_state_s *s,
     if (value & (1 << 11)) {                            /* SETARM_IDLE */
         cpu_interrupt(CPU(s->cpu), CPU_INTERRUPT_HALT);
     }
-    if (!(value & (1 << 10)))				/* WKUP_MODE */
-        qemu_system_shutdown_request();	/* XXX: disable wakeup from IRQ */
+    if (!(value & (1 << 10))) {                         /* WKUP_MODE */
+        /* XXX: disable wakeup from IRQ */
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+    }
 
 #define SET_CANIDLE(clock, bit)				\
     if (diff & (1 << bit)) {				\
@@ -1693,7 +1695,7 @@ static void omap_clkm_write(void *opaque, hwaddr addr,
         diff = s->clkm.arm_rstct1 ^ value;
         s->clkm.arm_rstct1 = value & 0x0007;
         if (value & 9) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
             s->clkm.cold_start = 0xa;
         }
         if (diff & ~value & 4) {				/* DSP_RST */
diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c
index cf1b4ba58f..8afb854c74 100644
--- a/hw/arm/omap2.c
+++ b/hw/arm/omap2.c
@@ -1610,7 +1610,7 @@ static void omap_prcm_write(void *opaque, hwaddr addr,
     case 0x450:	/* RM_RSTCTRL_WKUP */
         /* TODO: reset */
         if (value & 2)
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         break;
     case 0x454:	/* RM_RSTTIME_WKUP */
         s->rsttime_wkup = value & 0x1fff;
diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
index 324626847c..93bde14743 100644
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -848,7 +848,7 @@ static void spitz_lcd_hsync_handler(void *opaque, int line, int level)
 static void spitz_reset(void *opaque, int line, int level)
 {
     if (level) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     }
 }
 
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index ea7a8094e1..cf6e7be083 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -1197,7 +1197,7 @@ static
 void do_sys_reset(void *opaque, int n, int level)
 {
     if (level) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     }
 }
 
diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c
index 9f58a23fb5..2421b8150d 100644
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -90,7 +90,7 @@ static void tosa_out_switch(void *opaque, int line, int level)
 static void tosa_reset(void *opaque, int line, int level)
 {
     if (level) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     }
 }
 
diff --git a/hw/core/machine.c b/hw/core/machine.c
index fd6a436064..3adebf14c4 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -21,6 +21,7 @@
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
 #include "sysemu/numa.h"
+#include "sysemu/qtest.h"
 
 static char *machine_get_accel(Object *obj, Error **errp)
 {
@@ -722,7 +723,7 @@ static void machine_numa_validate(MachineState *machine)
             g_free(cpu_str);
         }
     }
-    if (s->len) {
+    if (s->len && !qtest_enabled()) {
         error_report("warning: CPU(s) not present in any NUMA nodes: %s",
                      s->str);
         error_report("warning: All CPU(s) up to maxcpus should be described "
diff --git a/hw/dma/rc4030.c b/hw/dma/rc4030.c
index 0080141905..edf9432051 100644
--- a/hw/dma/rc4030.c
+++ b/hw/dma/rc4030.c
@@ -489,7 +489,7 @@ static const MemoryRegionOps jazzio_ops = {
 };
 
 static IOMMUTLBEntry rc4030_dma_translate(MemoryRegion *iommu, hwaddr addr,
-                                          bool is_write)
+                                          IOMMUAccessFlags flag)
 {
     rc4030State *s = container_of(iommu, rc4030State, dma_mr);
     IOMMUTLBEntry ret = {
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index afcadacd2e..82bd44f38e 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2404,14 +2404,17 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
     }
 
     /*
-     * Entry is required for Windows to enable memory hotplug in OS.
+     * Entry is required for Windows to enable memory hotplug in OS
+     * and for Linux to enable SWIOTLB when booted with less than
+     * 4G of RAM. Windows works better if the entry sets proximity
+     * to the highest NUMA node in the machine.
      * Memory devices may override proximity set by this entry,
      * providing _PXM method if necessary.
      */
     if (hotplugabble_address_space_size) {
         numamem = acpi_data_push(table_data, sizeof *numamem);
         build_srat_memory(numamem, pcms->hotplug_memory.base,
-                          hotplugabble_address_space_size, 0,
+                          hotplugabble_address_space_size, pcms->numa_nodes - 1,
                           MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
     }
 
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 329058dac8..7b6d4ea3f3 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -988,7 +988,7 @@ static inline bool amdvi_is_interrupt_addr(hwaddr addr)
 }
 
 static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
-                                     bool is_write)
+                                     IOMMUAccessFlags flag)
 {
     AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
     AMDVIState *s = as->iommu_state;
@@ -1017,7 +1017,7 @@ static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
         return ret;
     }
 
-    amdvi_do_translate(as, addr, is_write, &ret);
+    amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
     trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
             PCI_FUNC(as->devfn), addr, ret.translated_addr);
     return ret;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 9ba2162cd9..15610b9de8 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -512,7 +512,7 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
     return 0;
 }
 
-static inline bool vtd_context_entry_present(VTDContextEntry *context)
+static inline bool vtd_ce_present(VTDContextEntry *context)
 {
     return context->lo & VTD_CONTEXT_ENTRY_P;
 }
@@ -533,7 +533,7 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
     return 0;
 }
 
-static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce)
+static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce)
 {
     return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
 }
@@ -585,19 +585,49 @@ static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level)
 /* Get the page-table level that hardware should use for the second-level
  * page-table walk from the Address Width field of context-entry.
  */
-static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce)
+static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce)
 {
     return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW);
 }
 
-static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce)
+static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce)
 {
     return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
 }
 
+static inline uint32_t vtd_ce_get_type(VTDContextEntry *ce)
+{
+    return ce->lo & VTD_CONTEXT_ENTRY_TT;
+}
+
+/* Return true if check passed, otherwise false */
+static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
+                                     VTDContextEntry *ce)
+{
+    switch (vtd_ce_get_type(ce)) {
+    case VTD_CONTEXT_TT_MULTI_LEVEL:
+        /* Always supported */
+        break;
+    case VTD_CONTEXT_TT_DEV_IOTLB:
+        if (!x86_iommu->dt_supported) {
+            return false;
+        }
+        break;
+    case VTD_CONTEXT_TT_PASS_THROUGH:
+        if (!x86_iommu->pt_supported) {
+            return false;
+        }
+        break;
+    default:
+        /* Unknwon type */
+        return false;
+    }
+    return true;
+}
+
 static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
 {
-    uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
+    uint32_t ce_agaw = vtd_ce_get_agaw(ce);
     return 1ULL << MIN(ce_agaw, VTD_MGAW);
 }
 
@@ -635,6 +665,29 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
     }
 }
 
+/* Find the VTD address space associated with a given bus number */
+static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
+{
+    VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
+    if (!vtd_bus) {
+        /*
+         * Iterate over the registered buses to find the one which
+         * currently hold this bus number, and update the bus_num
+         * lookup table:
+         */
+        GHashTableIter iter;
+
+        g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
+        while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
+            if (pci_bus_num(vtd_bus->bus) == bus_num) {
+                s->vtd_as_by_bus_num[bus_num] = vtd_bus;
+                return vtd_bus;
+            }
+        }
+    }
+    return vtd_bus;
+}
+
 /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
  * of the translation, can be used for deciding the size of large page.
  */
@@ -642,8 +695,8 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
                              uint64_t *slptep, uint32_t *slpte_level,
                              bool *reads, bool *writes)
 {
-    dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
-    uint32_t level = vtd_get_level_from_context_entry(ce);
+    dma_addr_t addr = vtd_ce_get_slpt_base(ce);
+    uint32_t level = vtd_ce_get_level(ce);
     uint32_t offset;
     uint64_t slpte;
     uint64_t access_right_check;
@@ -664,7 +717,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
             VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
                         "entry at level %"PRIu32 " for iova 0x%"PRIx64,
                         level, iova);
-            if (level == vtd_get_level_from_context_entry(ce)) {
+            if (level == vtd_ce_get_level(ce)) {
                 /* Invalid programming of context-entry */
                 return -VTD_FR_CONTEXT_ENTRY_INV;
             } else {
@@ -809,8 +862,8 @@ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
                          vtd_page_walk_hook hook_fn, void *private,
                          bool notify_unmap)
 {
-    dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
-    uint32_t level = vtd_get_level_from_context_entry(ce);
+    dma_addr_t addr = vtd_ce_get_slpt_base(ce);
+    uint32_t level = vtd_ce_get_level(ce);
 
     if (!vtd_iova_range_check(start, ce)) {
         return -VTD_FR_ADDR_BEYOND_MGAW;
@@ -831,6 +884,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
 {
     VTDRootEntry re;
     int ret_fr;
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
 
     ret_fr = vtd_get_root_entry(s, bus_num, &re);
     if (ret_fr) {
@@ -841,7 +895,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
         /* Not error - it's okay we don't have root entry. */
         trace_vtd_re_not_present(bus_num);
         return -VTD_FR_ROOT_ENTRY_P;
-    } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
+    }
+
+    if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
         trace_vtd_re_invalid(re.rsvd, re.val);
         return -VTD_FR_ROOT_ENTRY_RSVD;
     }
@@ -851,31 +907,116 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
         return ret_fr;
     }
 
-    if (!vtd_context_entry_present(ce)) {
+    if (!vtd_ce_present(ce)) {
         /* Not error - it's okay we don't have context entry. */
         trace_vtd_ce_not_present(bus_num, devfn);
         return -VTD_FR_CONTEXT_ENTRY_P;
-    } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
-               (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
+    }
+
+    if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
+        (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
         trace_vtd_ce_invalid(ce->hi, ce->lo);
         return -VTD_FR_CONTEXT_ENTRY_RSVD;
     }
+
     /* Check if the programming of context-entry is valid */
-    if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) {
+    if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
+        trace_vtd_ce_invalid(ce->hi, ce->lo);
+        return -VTD_FR_CONTEXT_ENTRY_INV;
+    }
+
+    /* Do translation type check */
+    if (!vtd_ce_type_check(x86_iommu, ce)) {
         trace_vtd_ce_invalid(ce->hi, ce->lo);
         return -VTD_FR_CONTEXT_ENTRY_INV;
+    }
+
+    return 0;
+}
+
+/*
+ * Fetch translation type for specific device. Returns <0 if error
+ * happens, otherwise return the shifted type to check against
+ * VTD_CONTEXT_TT_*.
+ */
+static int vtd_dev_get_trans_type(VTDAddressSpace *as)
+{
+    IntelIOMMUState *s;
+    VTDContextEntry ce;
+    int ret;
+
+    s = as->iommu_state;
+
+    ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
+                                   as->devfn, &ce);
+    if (ret) {
+        return ret;
+    }
+
+    return vtd_ce_get_type(&ce);
+}
+
+static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
+{
+    int ret;
+
+    assert(as);
+
+    ret = vtd_dev_get_trans_type(as);
+    if (ret < 0) {
+        /*
+         * Possibly failed to parse the context entry for some reason
+         * (e.g., during init, or any guest configuration errors on
+         * context entries). We should assume PT not enabled for
+         * safety.
+         */
+        return false;
+    }
+
+    return ret == VTD_CONTEXT_TT_PASS_THROUGH;
+}
+
+/* Return whether the device is using IOMMU translation. */
+static bool vtd_switch_address_space(VTDAddressSpace *as)
+{
+    bool use_iommu;
+
+    assert(as);
+
+    use_iommu = as->iommu_state->dmar_enabled & !vtd_dev_pt_enabled(as);
+
+    trace_vtd_switch_address_space(pci_bus_num(as->bus),
+                                   VTD_PCI_SLOT(as->devfn),
+                                   VTD_PCI_FUNC(as->devfn),
+                                   use_iommu);
+
+    /* Turn off first then on the other */
+    if (use_iommu) {
+        memory_region_set_enabled(&as->sys_alias, false);
+        memory_region_set_enabled(&as->iommu, true);
     } else {
-        switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
-        case VTD_CONTEXT_TT_MULTI_LEVEL:
-            /* fall through */
-        case VTD_CONTEXT_TT_DEV_IOTLB:
-            break;
-        default:
-            trace_vtd_ce_invalid(ce->hi, ce->lo);
-            return -VTD_FR_CONTEXT_ENTRY_INV;
+        memory_region_set_enabled(&as->iommu, false);
+        memory_region_set_enabled(&as->sys_alias, true);
+    }
+
+    return use_iommu;
+}
+
+static void vtd_switch_address_space_all(IntelIOMMUState *s)
+{
+    GHashTableIter iter;
+    VTDBus *vtd_bus;
+    int i;
+
+    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
+    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
+        for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) {
+            if (!vtd_bus->dev_as[i]) {
+                continue;
+            }
+            vtd_switch_address_space(vtd_bus->dev_as[i]);
         }
     }
-    return 0;
 }
 
 static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
@@ -915,6 +1056,31 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
     return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
 }
 
+static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
+{
+    VTDBus *vtd_bus;
+    VTDAddressSpace *vtd_as;
+    bool success = false;
+
+    vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
+    if (!vtd_bus) {
+        goto out;
+    }
+
+    vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
+    if (!vtd_as) {
+        goto out;
+    }
+
+    if (vtd_switch_address_space(vtd_as) == false) {
+        /* We switched off IOMMU region successfully. */
+        success = true;
+    }
+
+out:
+    trace_vtd_pt_enable_fast_path(source_id, success);
+}
+
 /* Map dev to context-entry then do a paging-structures walk to do a iommu
  * translation.
  *
@@ -986,6 +1152,30 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         cc_entry->context_cache_gen = s->context_cache_gen;
     }
 
+    /*
+     * We don't need to translate for pass-through context entries.
+     * Also, let's ignore IOTLB caching as well for PT devices.
+     */
+    if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
+        entry->translated_addr = entry->iova;
+        entry->addr_mask = VTD_PAGE_SIZE - 1;
+        entry->perm = IOMMU_RW;
+        trace_vtd_translate_pt(source_id, entry->iova);
+
+        /*
+         * When this happens, it means firstly caching-mode is not
+         * enabled, and this is the first passthrough translation for
+         * the device. Let's enable the fast path for passthrough.
+         *
+         * When passthrough is disabled again for the device, we can
+         * capture it via the context entry invalidation, then the
+         * IOMMU region can be swapped back.
+         */
+        vtd_pt_enable_fast_path(s, source_id);
+
+        return;
+    }
+
     ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
                                &reads, &writes);
     if (ret_fr) {
@@ -1005,7 +1195,7 @@ out:
     entry->iova = addr & page_mask;
     entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
     entry->addr_mask = ~page_mask;
-    entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0);
+    entry->perm = IOMMU_ACCESS_FLAG(reads, writes);
 }
 
 static void vtd_root_table_setup(IntelIOMMUState *s)
@@ -1055,6 +1245,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
     if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
         vtd_reset_context_cache(s);
     }
+    vtd_switch_address_space_all(s);
     /*
      * From VT-d spec 6.5.2.1, a global context entry invalidation
      * should be followed by a IOTLB global invalidation, so we should
@@ -1065,29 +1256,6 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
     vtd_iommu_replay_all(s);
 }
 
-
-/* Find the VTD address space currently associated with a given bus number,
- */
-static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
-{
-    VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
-    if (!vtd_bus) {
-        /* Iterate over the registered buses to find the one
-         * which currently hold this bus number, and update the bus_num lookup table:
-         */
-        GHashTableIter iter;
-
-        g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
-        while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) {
-            if (pci_bus_num(vtd_bus->bus) == bus_num) {
-                s->vtd_as_by_bus_num[bus_num] = vtd_bus;
-                return vtd_bus;
-            }
-        }
-    }
-    return vtd_bus;
-}
-
 /* Do a context-cache device-selective invalidation.
  * @func_mask: FM field after shifting
  */
@@ -1130,6 +1298,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
                                              VTD_PCI_FUNC(devfn_it));
                 vtd_as->context_cache_entry.context_cache_gen = 0;
                 /*
+                 * Do switch address space when needed, in case if the
+                 * device passthrough bit is switched.
+                 */
+                vtd_switch_address_space(vtd_as);
+                /*
                  * So a device is moving out of (or moving into) a
                  * domain, a replay() suites here to notify all the
                  * IOMMU_NOTIFIER_MAP registers about this change.
@@ -1361,42 +1534,6 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
     vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
 }
 
-static void vtd_switch_address_space(VTDAddressSpace *as)
-{
-    assert(as);
-
-    trace_vtd_switch_address_space(pci_bus_num(as->bus),
-                                   VTD_PCI_SLOT(as->devfn),
-                                   VTD_PCI_FUNC(as->devfn),
-                                   as->iommu_state->dmar_enabled);
-
-    /* Turn off first then on the other */
-    if (as->iommu_state->dmar_enabled) {
-        memory_region_set_enabled(&as->sys_alias, false);
-        memory_region_set_enabled(&as->iommu, true);
-    } else {
-        memory_region_set_enabled(&as->iommu, false);
-        memory_region_set_enabled(&as->sys_alias, true);
-    }
-}
-
-static void vtd_switch_address_space_all(IntelIOMMUState *s)
-{
-    GHashTableIter iter;
-    VTDBus *vtd_bus;
-    int i;
-
-    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
-        for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) {
-            if (!vtd_bus->dev_as[i]) {
-                continue;
-            }
-            vtd_switch_address_space(vtd_bus->dev_as[i]);
-        }
-    }
-}
-
 /* Handle Translation Enable/Disable */
 static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
 {
@@ -2221,7 +2358,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
 }
 
 static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
-                                         bool is_write)
+                                         IOMMUAccessFlags flag)
 {
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
     IntelIOMMUState *s = vtd_as->iommu_state;
@@ -2243,7 +2380,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
     }
 
     vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
-                           is_write, &ret);
+                           flag & IOMMU_WO, &ret);
     VTD_DPRINTF(MMU,
                 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
                 " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
@@ -2844,6 +2981,10 @@ static void vtd_init(IntelIOMMUState *s)
         s->ecap |= VTD_ECAP_DT;
     }
 
+    if (x86_iommu->pt_supported) {
+        s->ecap |= VTD_ECAP_PT;
+    }
+
     if (s->caching_mode) {
         s->cap |= VTD_CAP_CM;
     }
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 29d67075f4..0e73a65bf2 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -187,6 +187,7 @@
 /* Interrupt Remapping support */
 #define VTD_ECAP_IR                 (1ULL << 3)
 #define VTD_ECAP_EIM                (1ULL << 4)
+#define VTD_ECAP_PT                 (1ULL << 6)
 #define VTD_ECAP_MHMV               (15ULL << 20)
 
 /* CAP_REG */
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 816bfa872c..107a34125b 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -519,7 +519,7 @@ static void port92_write(void *opaque, hwaddr addr, uint64_t val,
     s->outport = val;
     qemu_set_irq(s->a20_out, (val >> 1) & 1);
     if ((val & 1) && !(oldval & 1)) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     }
 }
 
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 04a6980800..72556dad48 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -38,6 +38,8 @@ vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P
 vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
 vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
 vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
+vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64
+vtd_pt_enable_fast_path(uint16_t sid, bool success) "sid 0x%"PRIu16" %d"
 
 # hw/i386/amd_iommu.c
 amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 23dcd3f039..293caf83ef 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -88,55 +88,23 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
     x86_iommu_set_default(X86_IOMMU_DEVICE(dev));
 }
 
+static Property x86_iommu_properties[] = {
+    DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
+    DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
+    DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void x86_iommu_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     dc->realize = x86_iommu_realize;
-}
-
-static bool x86_iommu_intremap_prop_get(Object *o, Error **errp)
-{
-    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
-    return s->intr_supported;
-}
-
-static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp)
-{
-    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
-    s->intr_supported = value;
-}
-
-static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp)
-{
-    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
-    return s->dt_supported;
-}
-
-static void x86_iommu_device_iotlb_prop_set(Object *o, bool value, Error **errp)
-{
-    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
-    s->dt_supported = value;
-}
-
-static void x86_iommu_instance_init(Object *o)
-{
-    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
-
-    /* By default, do not support IR */
-    s->intr_supported = false;
-    object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get,
-                             x86_iommu_intremap_prop_set, NULL);
-    s->dt_supported = false;
-    object_property_add_bool(o, "device-iotlb",
-                             x86_iommu_device_iotlb_prop_get,
-                             x86_iommu_device_iotlb_prop_set,
-                             NULL);
+    dc->props = x86_iommu_properties;
 }
 
 static const TypeInfo x86_iommu_info = {
     .name          = TYPE_X86_IOMMU_DEVICE,
     .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_init = x86_iommu_instance_init,
     .instance_size = sizeof(X86IOMMUState),
     .class_init    = x86_iommu_class_init,
     .class_size    = sizeof(X86IOMMUClass),
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index b1c05ffb86..919f09b694 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -1089,11 +1089,14 @@ static void cpu_handle_ioreq(void *opaque)
          * causes Xen to powerdown the domain.
          */
         if (runstate_is_running()) {
+            ShutdownCause request;
+
             if (qemu_shutdown_requested_get()) {
                 destroy_hvm_domain(false);
             }
-            if (qemu_reset_requested_get()) {
-                qemu_system_reset(VMRESET_REPORT);
+            request = qemu_reset_requested_get();
+            if (request) {
+                qemu_system_reset(request);
                 destroy_hvm_domain(true);
             }
         }
@@ -1395,7 +1398,7 @@ void xen_shutdown_fatal_error(const char *fmt, ...)
     va_end(ap);
     fprintf(stderr, "Will destroy the domain.\n");
     /* destroy the domain */
-    qemu_system_shutdown_request();
+    qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
 }
 
 void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index d414288839..c479f827b6 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -226,7 +226,7 @@ static void outport_write(KBDState *s, uint32_t val)
     s->outport = val;
     qemu_set_irq(s->a20_out, (val >> 1) & 1);
     if (!(val & 1)) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     }
 }
 
@@ -301,7 +301,7 @@ static void kbd_write_command(void *opaque, hwaddr addr,
         s->outport &= ~KBD_OUT_A20;
         break;
     case KBD_CCMD_RESET:
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         break;
     case KBD_CCMD_NO_OP:
         /* ignore that */
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 292fffecd3..ea3516794a 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -357,6 +357,10 @@ static void icp_realize(DeviceState *dev, Error **errp)
     qemu_register_reset(icp_reset, dev);
 }
 
+static void icp_unrealize(DeviceState *dev, Error **errp)
+{
+    qemu_unregister_reset(icp_reset, dev);
+}
 
 static void icp_class_init(ObjectClass *klass, void *data)
 {
@@ -364,6 +368,7 @@ static void icp_class_init(ObjectClass *klass, void *data)
 
     dc->vmsd = &vmstate_icp_server;
     dc->realize = icp_realize;
+    dc->unrealize = icp_unrealize;
 }
 
 static const TypeInfo icp_info = {
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index dd93531ae3..14b8f6f6e4 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -42,6 +42,14 @@
 
 static int kernel_xics_fd = -1;
 
+typedef struct KVMEnabledICP {
+    unsigned long vcpu_id;
+    QLIST_ENTRY(KVMEnabledICP) node;
+} KVMEnabledICP;
+
+static QLIST_HEAD(, KVMEnabledICP)
+    kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps);
+
 /*
  * ICP-KVM
  */
@@ -121,6 +129,8 @@ static void icp_kvm_reset(void *dev)
 static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
+    KVMEnabledICP *enabled_icp;
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
     int ret;
 
     if (kernel_xics_fd == -1) {
@@ -132,18 +142,21 @@ static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
      * which was hot-removed earlier we don't have to renable
      * KVM_CAP_IRQ_XICS capability again.
      */
-    if (icp->cap_irq_xics_enabled) {
-        return;
+    QLIST_FOREACH(enabled_icp, &kvm_enabled_icps, node) {
+        if (enabled_icp->vcpu_id == vcpu_id) {
+            return;
+        }
     }
 
-    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
-                              kvm_arch_vcpu_id(cs));
+    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, vcpu_id);
     if (ret < 0) {
-        error_report("Unable to connect CPU%ld to kernel XICS: %s",
-                     kvm_arch_vcpu_id(cs), strerror(errno));
+        error_report("Unable to connect CPU%ld to kernel XICS: %s", vcpu_id,
+                     strerror(errno));
         exit(1);
     }
-    icp->cap_irq_xics_enabled = true;
+    enabled_icp = g_malloc(sizeof(*enabled_icp));
+    enabled_icp->vcpu_id = vcpu_id;
+    QLIST_INSERT_HEAD(&kvm_enabled_icps, enabled_icp, node);
 }
 
 static void icp_kvm_realize(DeviceState *dev, Error **errp)
@@ -151,12 +164,18 @@ static void icp_kvm_realize(DeviceState *dev, Error **errp)
     qemu_register_reset(icp_kvm_reset, dev);
 }
 
+static void icp_kvm_unrealize(DeviceState *dev, Error **errp)
+{
+    qemu_unregister_reset(icp_kvm_reset, dev);
+}
+
 static void icp_kvm_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICPStateClass *icpc = ICP_CLASS(klass);
 
     dc->realize = icp_kvm_realize;
+    dc->unrealize = icp_kvm_unrealize;
     icpc->pre_save = icp_get_kvm_state;
     icpc->post_load = icp_set_kvm_state;
     icpc->cpu_setup = icp_kvm_cpu_setup;
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index f05308b897..d98ea8b130 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -229,7 +229,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-int xics_spapr_init(sPAPRMachineState *spapr, Error **errp)
+void xics_spapr_init(sPAPRMachineState *spapr)
 {
     /* Registration of global state belongs into realize */
     spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
@@ -243,7 +243,6 @@ int xics_spapr_init(sPAPRMachineState *spapr, Error **errp)
     spapr_register_hypercall(H_XIRR_X, h_xirr_x);
     spapr_register_hypercall(H_EOI, h_eoi);
     spapr_register_hypercall(H_IPOLL, h_ipoll);
-    return 0;
 }
 
 #define ICS_IRQ_FREE(ics, srcno)   \
diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c
index 5cf1caa88a..afafe1400f 100644
--- a/hw/ipmi/ipmi.c
+++ b/hw/ipmi/ipmi.c
@@ -44,14 +44,14 @@ static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly)
         if (checkonly) {
             return 0;
         }
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         return 0;
 
     case IPMI_POWEROFF_CHASSIS:
         if (checkonly) {
             return 0;
         }
-        qemu_system_shutdown_request();
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
         return 0;
 
     case IPMI_SEND_NMI:
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index e2215dcf4d..ac8416d42b 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -606,7 +606,7 @@ static void ich9_rst_cnt_write(void *opaque, hwaddr addr, uint64_t val,
     ICH9LPCState *lpc = opaque;
 
     if (val & 4) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         return;
     }
     lpc->rst_cnt = val & 0xA; /* keep FULL_RST (bit 3) and SYS_RST (bit 1) */
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index 83f7b82386..53d1e0ce45 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -232,7 +232,7 @@ static void boston_platreg_write(void *opaque, hwaddr addr,
         break;
     case PLAT_SOFTRST_CTL:
         if (val & PLAT_SOFTRST_CTL_SYSRESET) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
         break;
     default:
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 5dd177e961..7814c39654 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -470,7 +470,7 @@ static void malta_fpga_write(void *opaque, hwaddr addr,
     /* SOFTRES Register */
     case 0x00500:
         if (val == 0x42)
-            qemu_system_reset_request ();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         break;
 
     /* BRKRES Register */
diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c
index 748586ed77..f4de9fc343 100644
--- a/hw/mips/mips_r4k.c
+++ b/hw/mips/mips_r4k.c
@@ -53,9 +53,9 @@ static void mips_qemu_write (void *opaque, hwaddr addr,
                              uint64_t val, unsigned size)
 {
     if ((addr & 0xffff) == 0 && val == 42)
-        qemu_system_reset_request ();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     else if ((addr & 0xffff) == 4 && val == 42)
-        qemu_system_shutdown_request ();
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 }
 
 static uint64_t mips_qemu_read (void *opaque, hwaddr addr,
diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c
index 8524008708..b20b44ea20 100644
--- a/hw/misc/arm_sysctl.c
+++ b/hw/misc/arm_sysctl.c
@@ -351,13 +351,13 @@ static bool vexpress_cfgctrl_write(arm_sysctl_state *s, unsigned int dcc,
         break;
     case SYS_CFG_SHUTDOWN:
         if (site == SYS_CFG_SITE_MB && device == 0) {
-            qemu_system_shutdown_request();
+            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
             return true;
         }
         break;
     case SYS_CFG_REBOOT:
         if (site == SYS_CFG_SITE_MB && device == 0) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
             return true;
         }
         break;
@@ -429,7 +429,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset,
             if (s->lockval == LOCK_VALUE) {
                 s->resetlevel = val;
                 if (val & 0x100) {
-                    qemu_system_reset_request();
+                    qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
                 }
             }
             break;
@@ -438,7 +438,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset,
             if (s->lockval == LOCK_VALUE) {
                 s->resetlevel = val;
                 if (val & 0x04) {
-                    qemu_system_reset_request();
+                    qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
                 }
             }
             break;
diff --git a/hw/misc/cbus.c b/hw/misc/cbus.c
index 0c207e3104..677274ce3e 100644
--- a/hw/misc/cbus.c
+++ b/hw/misc/cbus.c
@@ -356,7 +356,7 @@ static inline void retu_write(CBusRetu *s, int reg, uint16_t val)
 
     case RETU_REG_WATCHDOG:
         if (val == 0 && (s->cc[0] & 2))
-            qemu_system_shutdown_request();
+            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
         break;
 
     case RETU_REG_TXCR:
diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
index 05c02fb3a4..008d8bd4d5 100644
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c
@@ -612,7 +612,7 @@ static bool cuda_cmd_powerdown(CUDAState *s,
         return false;
     }
 
-    qemu_system_shutdown_request();
+    qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
     return true;
 }
 
@@ -624,7 +624,7 @@ static bool cuda_cmd_reset_system(CUDAState *s,
         return false;
     }
 
-    qemu_system_reset_request();
+    qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     return true;
 }
 
diff --git a/hw/misc/slavio_misc.c b/hw/misc/slavio_misc.c
index edd5de0702..18ff677512 100644
--- a/hw/misc/slavio_misc.c
+++ b/hw/misc/slavio_misc.c
@@ -258,7 +258,7 @@ static void slavio_aux2_mem_writeb(void *opaque, hwaddr addr,
         val &= AUX2_PWROFF;
     s->aux2 = val;
     if (val & AUX2_PWROFF)
-        qemu_system_shutdown_request();
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
     slavio_misc_update_irq(s);
 }
 
@@ -338,7 +338,7 @@ static void slavio_sysctrl_mem_writel(void *opaque, hwaddr addr,
     case 0:
         if (val & SYS_RESET) {
             s->sysctrl = SYS_RESETSTAT;
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
         break;
     default:
diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c
index 7891219001..44304d48be 100644
--- a/hw/misc/zynq_slcr.c
+++ b/hw/misc/zynq_slcr.c
@@ -405,7 +405,7 @@ static void zynq_slcr_write(void *opaque, hwaddr offset,
     switch (offset) {
     case PSS_RST_CTRL:
         if (val & R_PSS_RST_CTRL_SOFT_RST) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
         break;
     }
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 98bd683f31..9a3d769aa2 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -589,7 +589,15 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
     if (!get_vhost_net(nc->peer)) {
         return features;
     }
-    return vhost_net_get_features(get_vhost_net(nc->peer), features);
+    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
+    vdev->backend_features = features;
+
+    if (n->mtu_bypass_backend &&
+            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
+        features |= (1ULL << VIRTIO_NET_F_MTU);
+    }
+
+    return features;
 }
 
 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
@@ -640,6 +648,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
     VirtIONet *n = VIRTIO_NET(vdev);
     int i;
 
+    if (n->mtu_bypass_backend &&
+            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
+        features &= ~(1ULL << VIRTIO_NET_F_MTU);
+    }
+
     virtio_net_set_multiqueue(n,
                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
 
@@ -2093,6 +2106,8 @@ static Property virtio_net_properties[] = {
     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
+    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
+                     true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index edc88f4c65..326f5ef024 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -209,7 +209,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 
 /* Called from RCU critical section */
 static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
-                                         bool is_write)
+                                         IOMMUAccessFlags flag)
 {
     IOMMUState *is = container_of(iommu, IOMMUState, iommu);
     hwaddr baseaddr, offset;
@@ -482,9 +482,9 @@ static void apb_config_writel (void *opaque, hwaddr addr,
             s->reset_control |= val & RESET_WMASK;
             if (val & SOFT_POR) {
                 s->nr_resets = 0;
-                qemu_system_reset_request();
+                qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
             } else if (val & SOFT_XIR) {
-                qemu_system_reset_request();
+                qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
             }
         }
         break;
diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 85a3bb0dd2..89133a9dd3 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -269,7 +269,7 @@ static void bonito_writel(void *opaque, hwaddr addr,
         }
         s->regs[saddr] = val;
         if (reset) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
         break;
     case BONITO_INTENSET:
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 2d02de12d9..4ce201ea65 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -632,7 +632,7 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len)
     PIIX3State *d = opaque;
 
     if (val & 4) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         return;
     }
     d->rcr = val & 2; /* keep System Reset type only */
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index f7df2388c1..62f1857206 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -774,7 +774,7 @@ static qemu_irq *ppce500_init_mpic(MachineState *machine, PPCE500Params *params,
 static void ppce500_power_off(void *opaque, int line, int on)
 {
     if (on) {
-        qemu_system_shutdown_request();
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
     }
 }
 
diff --git a/hw/ppc/mpc8544_guts.c b/hw/ppc/mpc8544_guts.c
index ba69178d69..ce1254b5d4 100644
--- a/hw/ppc/mpc8544_guts.c
+++ b/hw/ppc/mpc8544_guts.c
@@ -98,7 +98,7 @@ static void mpc8544_guts_write(void *opaque, hwaddr addr,
     switch (addr) {
     case MPC8544_GUTS_ADDR_RSTCR:
         if (value & MPC8544_GUTS_RSTCR_RESET) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
         break;
     default:
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 5f93083d4a..224184d66d 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -412,7 +412,7 @@ static void ppce500_set_irq(void *opaque, int pin, int level)
             if (level) {
                 LOG_IRQ("%s: reset the PowerPC system\n",
                             __func__);
-                qemu_system_reset_request();
+                qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
             }
             break;
         case PPCE500_INPUT_RESET_CORE:
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index d5df94aa6e..fc32e96bf4 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -1807,7 +1807,7 @@ void ppc40x_chip_reset(PowerPCCPU *cpu)
 void ppc40x_system_reset(PowerPCCPU *cpu)
 {
     printf("Reset PowerPC system\n");
-    qemu_system_reset_request();
+    qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 }
 
 void store_40x_dbcr0 (CPUPPCState *env, uint32_t val)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0980d733cd..ab3aab1279 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -101,21 +101,26 @@ static ICSState *spapr_ics_create(sPAPRMachineState *spapr,
                                   const char *type_ics,
                                   int nr_irqs, Error **errp)
 {
-    Error *err = NULL, *local_err = NULL;
+    Error *local_err = NULL;
     Object *obj;
 
     obj = object_new(type_ics);
-    object_property_add_child(OBJECT(spapr), "ics", obj, NULL);
+    object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
     object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort);
-    object_property_set_int(obj, nr_irqs, "nr-irqs", &err);
+    object_property_set_int(obj, nr_irqs, "nr-irqs", &local_err);
+    if (local_err) {
+        goto error;
+    }
     object_property_set_bool(obj, true, "realized", &local_err);
-    error_propagate(&err, local_err);
-    if (err) {
-        error_propagate(errp, err);
-        return NULL;
+    if (local_err) {
+        goto error;
     }
 
     return ICS_SIMPLE(obj);
+
+error:
+    error_propagate(errp, local_err);
+    return NULL;
 }
 
 static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
@@ -123,25 +128,24 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
     sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
 
     if (kvm_enabled()) {
-        Error *err = NULL;
-
         if (machine_kernel_irqchip_allowed(machine) &&
             !xics_kvm_init(spapr, errp)) {
             spapr->icp_type = TYPE_KVM_ICP;
-            spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, &err);
+            spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, errp);
         }
         if (machine_kernel_irqchip_required(machine) && !spapr->ics) {
-            error_reportf_err(err,
-                              "kernel_irqchip requested but unavailable: ");
-        } else {
-            error_free(err);
+            error_prepend(errp, "kernel_irqchip requested but unavailable: ");
+            return;
         }
     }
 
     if (!spapr->ics) {
-        xics_spapr_init(spapr, errp);
+        xics_spapr_init(spapr);
         spapr->icp_type = TYPE_ICP;
         spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp);
+        if (!spapr->ics) {
+            return;
+        }
     }
 }
 
@@ -1222,16 +1226,21 @@ static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
     return shift;
 }
 
+void spapr_free_hpt(sPAPRMachineState *spapr)
+{
+    g_free(spapr->htab);
+    spapr->htab = NULL;
+    spapr->htab_shift = 0;
+    close_htab_fd(spapr);
+}
+
 static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
                                  Error **errp)
 {
     long rc;
 
     /* Clean up any HPT info from a previous boot */
-    g_free(spapr->htab);
-    spapr->htab = NULL;
-    spapr->htab_shift = 0;
-    close_htab_fd(spapr);
+    spapr_free_hpt(spapr);
 
     rc = kvmppc_reset_htab(shift);
     if (rc < 0) {
@@ -2050,6 +2059,7 @@ static void ppc_spapr_init(MachineState *machine)
     msi_nonbroken = true;
 
     QLIST_INIT(&spapr->phbs);
+    QTAILQ_INIT(&spapr->pending_dimm_unplugs);
 
     /* Allocate RMA if necessary */
     rma_alloc_size = kvmppc_alloc_rma(&rma);
@@ -2569,20 +2579,6 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     uint64_t align = memory_region_get_alignment(mr);
     uint64_t size = memory_region_size(mr);
     uint64_t addr;
-    char *mem_dev;
-
-    if (size % SPAPR_MEMORY_BLOCK_SIZE) {
-        error_setg(&local_err, "Hotplugged memory size must be a multiple of "
-                      "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE);
-        goto out;
-    }
-
-    mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL);
-    if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) {
-        error_setg(&local_err, "Memory backend has bad page size. "
-                   "Use 'memory-backend-file' with correct mem-path.");
-        goto out;
-    }
 
     pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
     if (local_err) {
@@ -2603,56 +2599,121 @@ out:
     error_propagate(errp, local_err);
 }
 
-typedef struct sPAPRDIMMState {
+static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                  Error **errp)
+{
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
+    uint64_t size = memory_region_size(mr);
+    char *mem_dev;
+
+    if (size % SPAPR_MEMORY_BLOCK_SIZE) {
+        error_setg(errp, "Hotplugged memory size must be a multiple of "
+                      "%lld MB", SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
+        return;
+    }
+
+    mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL);
+    if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) {
+        error_setg(errp, "Memory backend has bad page size. "
+                   "Use 'memory-backend-file' with correct mem-path.");
+        return;
+    }
+}
+
+struct sPAPRDIMMState {
+    PCDIMMDevice *dimm;
     uint32_t nr_lmbs;
-} sPAPRDIMMState;
+    QTAILQ_ENTRY(sPAPRDIMMState) next;
+};
 
-static void spapr_lmb_release(DeviceState *dev, void *opaque)
+static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s,
+                                                       PCDIMMDevice *dimm)
 {
-    sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque;
-    HotplugHandler *hotplug_ctrl;
+    sPAPRDIMMState *dimm_state = NULL;
 
-    if (--ds->nr_lmbs) {
-        return;
+    QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) {
+        if (dimm_state->dimm == dimm) {
+            break;
+        }
     }
+    return dimm_state;
+}
 
-    g_free(ds);
+static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
+                                           sPAPRDIMMState *dimm_state)
+{
+    g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm));
+    QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next);
+}
 
-    /*
-     * Now that all the LMBs have been removed by the guest, call the
-     * pc-dimm unplug handler to cleanup up the pc-dimm device.
-     */
-    hotplug_ctrl = qdev_get_hotplug_handler(dev);
-    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr,
+                                              sPAPRDIMMState *dimm_state)
+{
+    QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next);
+    g_free(dimm_state);
 }
 
-static void spapr_del_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
-                           Error **errp)
+static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
+                                                        PCDIMMDevice *dimm)
 {
     sPAPRDRConnector *drc;
-    sPAPRDRConnectorClass *drck;
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
+    uint64_t size = memory_region_size(mr);
     uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+    uint32_t avail_lmbs = 0;
+    uint64_t addr_start, addr;
     int i;
-    sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState));
-    uint64_t addr = addr_start;
+    sPAPRDIMMState *ds;
 
-    ds->nr_lmbs = nr_lmbs;
+    addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+                                         &error_abort);
+
+    addr = addr_start;
     for (i = 0; i < nr_lmbs; i++) {
         drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
-                addr / SPAPR_MEMORY_BLOCK_SIZE);
+                                       addr / SPAPR_MEMORY_BLOCK_SIZE);
         g_assert(drc);
-
-        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-        drck->detach(drc, dev, spapr_lmb_release, ds, errp);
+        if (drc->indicator_state != SPAPR_DR_INDICATOR_STATE_INACTIVE) {
+            avail_lmbs++;
+        }
         addr += SPAPR_MEMORY_BLOCK_SIZE;
     }
 
-    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
-                                   addr_start / SPAPR_MEMORY_BLOCK_SIZE);
-    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
-                                              nr_lmbs,
-                                              drck->get_index(drc));
+    ds = g_malloc0(sizeof(sPAPRDIMMState));
+    ds->nr_lmbs = avail_lmbs;
+    ds->dimm = dimm;
+    spapr_pending_dimm_unplugs_add(ms, ds);
+    return ds;
+}
+
+/* Callback to be called during DRC release. */
+void spapr_lmb_release(DeviceState *dev)
+{
+    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl);
+    sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
+
+    /* This information will get lost if a migration occurs
+     * during the unplug process. In this case recover it. */
+    if (ds == NULL) {
+        ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
+        if (ds->nr_lmbs) {
+            return;
+        }
+    } else if (--ds->nr_lmbs) {
+        return;
+    }
+
+    spapr_pending_dimm_unplugs_remove(spapr, ds);
+
+    /*
+     * Now that all the LMBs have been removed by the guest, call the
+     * pc-dimm unplug handler to cleanup up the pc-dimm device.
+     */
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
 }
 
 static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
@@ -2670,19 +2731,47 @@ static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
 static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
                                         DeviceState *dev, Error **errp)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
     Error *local_err = NULL;
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm);
     uint64_t size = memory_region_size(mr);
-    uint64_t addr;
+    uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+    uint64_t addr_start, addr;
+    int i;
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    sPAPRDIMMState *ds;
 
-    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
+    addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+                                         &local_err);
     if (local_err) {
         goto out;
     }
 
-    spapr_del_lmbs(dev, addr, size, &error_abort);
+    ds = g_malloc0(sizeof(sPAPRDIMMState));
+    ds->nr_lmbs = nr_lmbs;
+    ds->dimm = dimm;
+    spapr_pending_dimm_unplugs_add(spapr, ds);
+
+    addr = addr_start;
+    for (i = 0; i < nr_lmbs; i++) {
+        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                addr / SPAPR_MEMORY_BLOCK_SIZE);
+        g_assert(drc);
+
+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+        drck->detach(drc, dev, errp);
+        addr += SPAPR_MEMORY_BLOCK_SIZE;
+    }
+
+    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                   addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                              nr_lmbs,
+                                              drck->get_index(drc));
 out:
     error_propagate(errp, local_err);
 }
@@ -2715,11 +2804,13 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
     CPUCore *cc = CPU_CORE(dev);
     CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
 
+    assert(core_slot);
     core_slot->cpu = NULL;
     object_unparent(OBJECT(dev));
 }
 
-static void spapr_core_release(DeviceState *dev, void *opaque)
+/* Callback to be called during DRC release. */
+void spapr_core_release(DeviceState *dev)
 {
     HotplugHandler *hotplug_ctrl;
 
@@ -2752,7 +2843,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
     g_assert(drc);
 
     drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    drck->detach(drc, dev, spapr_core_release, NULL, &local_err);
+    drck->detach(drc, dev, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -2853,7 +2944,13 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    if (cc->nr_threads != smp_threads) {
+    /*
+     * In general we should have homogeneous threads-per-core, but old
+     * (pre hotplug support) machine types allow the last core to have
+     * reduced threads as a compatibility hack for when we allowed
+     * total vcpus not a multiple of threads-per-core.
+     */
+    if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) {
         error_setg(errp, "invalid nr-threads %d, must be %d",
                    cc->nr_threads, smp_threads);
         return;
@@ -2990,7 +3087,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
 static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev,
                                           DeviceState *dev, Error **errp)
 {
-    if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        spapr_memory_pre_plug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
         spapr_core_pre_plug(hotplug_dev, dev, errp);
     }
 }
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index a17ea07ef1..ff7058ecc0 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -143,29 +143,30 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp)
     Object *obj;
 
     obj = object_new(spapr->icp_type);
-    object_property_add_child(OBJECT(cpu), "icp", obj, NULL);
+    object_property_add_child(OBJECT(cpu), "icp", obj, &error_abort);
+    object_unref(obj);
     object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort);
     object_property_set_bool(obj, true, "realized", &local_err);
     if (local_err) {
-        error_propagate(errp, local_err);
-        return;
+        goto error;
     }
 
     object_property_set_bool(child, true, "realized", &local_err);
     if (local_err) {
-        object_unparent(obj);
-        error_propagate(errp, local_err);
-        return;
+        goto error;
     }
 
     spapr_cpu_init(spapr, cpu, &local_err);
     if (local_err) {
-        object_unparent(obj);
-        error_propagate(errp, local_err);
-        return;
+        goto error;
     }
 
     xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
+    return;
+
+error:
+    object_unparent(obj);
+    error_propagate(errp, local_err);
 }
 
 static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 9fa5545991..cc2400bcd5 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -20,6 +20,7 @@
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/ppc/spapr.h" /* for RTAS return codes */
+#include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */
 #include "trace.h"
 
 #define DRC_CONTAINER_PATH "/dr-connector"
@@ -99,8 +100,7 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
         if (drc->awaiting_release) {
             if (drc->configured) {
                 trace_spapr_drc_set_isolation_state_finalizing(get_index(drc));
-                drck->detach(drc, DEVICE(drc->dev), drc->detach_cb,
-                             drc->detach_cb_opaque, NULL);
+                drck->detach(drc, DEVICE(drc->dev), NULL);
             } else {
                 trace_spapr_drc_set_isolation_state_deferring(get_index(drc));
             }
@@ -153,8 +153,7 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc,
         if (drc->awaiting_release &&
             drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
             trace_spapr_drc_set_allocation_state_finalizing(get_index(drc));
-            drck->detach(drc, DEVICE(drc->dev), drc->detach_cb,
-                         drc->detach_cb_opaque, NULL);
+            drck->detach(drc, DEVICE(drc->dev), NULL);
         } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) {
             drc->awaiting_allocation = false;
         }
@@ -404,15 +403,10 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
                              NULL, 0, NULL);
 }
 
-static void detach(sPAPRDRConnector *drc, DeviceState *d,
-                   spapr_drc_detach_cb *detach_cb,
-                   void *detach_cb_opaque, Error **errp)
+static void detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp)
 {
     trace_spapr_drc_detach(get_index(drc));
 
-    drc->detach_cb = detach_cb;
-    drc->detach_cb_opaque = detach_cb_opaque;
-
     /* if we've signalled device presence to the guest, or if the guest
      * has gone ahead and configured the device (via manually-executed
      * device add via drmgr in guest, namely), we need to wait
@@ -456,8 +450,21 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d,
 
     drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE;
 
-    if (drc->detach_cb) {
-        drc->detach_cb(drc->dev, drc->detach_cb_opaque);
+    /* Calling release callbacks based on drc->type. */
+    switch (drc->type) {
+    case SPAPR_DR_CONNECTOR_TYPE_CPU:
+        spapr_core_release(drc->dev);
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_PCI:
+        spapr_phb_remove_pci_device_cb(drc->dev);
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_LMB:
+        spapr_lmb_release(drc->dev);
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_PHB:
+    case SPAPR_DR_CONNECTOR_TYPE_VIO:
+    default:
+        g_assert(false);
     }
 
     drc->awaiting_release = false;
@@ -467,8 +474,6 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d,
     drc->fdt_start_offset = 0;
     object_property_del(OBJECT(drc), "device", NULL);
     drc->dev = NULL;
-    drc->detach_cb = NULL;
-    drc->detach_cb_opaque = NULL;
 }
 
 static bool release_pending(sPAPRDRConnector *drc)
@@ -498,8 +503,7 @@ static void reset(DeviceState *d)
          * force removal if we are
          */
         if (drc->awaiting_release) {
-            drck->detach(drc, DEVICE(drc->dev), drc->detach_cb,
-                         drc->detach_cb_opaque, NULL);
+            drck->detach(drc, DEVICE(drc->dev), NULL);
         }
 
         /* non-PCI devices may be awaiting a transition to UNUSABLE */
@@ -515,6 +519,60 @@ static void reset(DeviceState *d)
     }
 }
 
+static bool spapr_drc_needed(void *opaque)
+{
+    sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    bool rc = false;
+    sPAPRDREntitySense value;
+    drck->entity_sense(drc, &value);
+
+    /* If no dev is plugged in there is no need to migrate the DRC state */
+    if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) {
+        return false;
+    }
+
+    /*
+     * If there is dev plugged in, we need to migrate the DRC state when
+     * it is different from cold-plugged state
+     */
+    switch (drc->type) {
+    case SPAPR_DR_CONNECTOR_TYPE_PCI:
+        rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) &&
+               (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) &&
+               drc->configured && drc->signalled && !drc->awaiting_release);
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_CPU:
+    case SPAPR_DR_CONNECTOR_TYPE_LMB:
+        rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
+               (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) &&
+               drc->configured && drc->signalled && !drc->awaiting_release);
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_PHB:
+    case SPAPR_DR_CONNECTOR_TYPE_VIO:
+    default:
+        g_assert(false);
+    }
+    return rc;
+}
+
+static const VMStateDescription vmstate_spapr_drc = {
+    .name = "spapr_drc",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_drc_needed,
+    .fields  = (VMStateField []) {
+        VMSTATE_UINT32(isolation_state, sPAPRDRConnector),
+        VMSTATE_UINT32(allocation_state, sPAPRDRConnector),
+        VMSTATE_UINT32(indicator_state, sPAPRDRConnector),
+        VMSTATE_BOOL(configured, sPAPRDRConnector),
+        VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
+        VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
+        VMSTATE_BOOL(signalled, sPAPRDRConnector),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void realize(DeviceState *d, Error **errp)
 {
     sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d);
@@ -543,6 +601,8 @@ static void realize(DeviceState *d, Error **errp)
         object_unref(OBJECT(drc));
     }
     g_free(child_name);
+    vmstate_register(DEVICE(drc), drck->get_index(drc), &vmstate_spapr_drc,
+                     drc);
     trace_spapr_drc_realize_complete(drck->get_index(drc));
 }
 
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index f0b28d8112..73e2a1884f 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -342,20 +342,18 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type)
     return source->irq;
 }
 
-static void rtas_event_log_queue(int log_type, void *data, bool exception)
+static void rtas_event_log_queue(int log_type, void *data)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1);
 
     g_assert(data);
     entry->log_type = log_type;
-    entry->exception = exception;
     entry->data = data;
     QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
 }
 
-static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
-                                                  bool exception)
+static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
@@ -364,10 +362,6 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
         const sPAPREventSource *source =
             rtas_event_log_to_source(spapr, entry->log_type);
 
-        if (entry->exception != exception) {
-            continue;
-        }
-
         if (source->mask & event_mask) {
             break;
         }
@@ -380,7 +374,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
     return entry;
 }
 
-static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
+static bool rtas_event_log_contains(uint32_t event_mask)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
@@ -389,10 +383,6 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
         const sPAPREventSource *source =
             rtas_event_log_to_source(spapr, entry->log_type);
 
-        if (entry->exception != exception) {
-            continue;
-        }
-
         if (source->mask & event_mask) {
             return true;
         }
@@ -479,7 +469,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
     epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
     epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
 
-    rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
+    rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow);
 
     qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
@@ -572,7 +562,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
             cpu_to_be32(drc_id->count_indexed.index);
     }
 
-    rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
+    rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp);
 
     qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
@@ -667,7 +657,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
     }
 
-    event = rtas_event_log_dequeue(mask, true);
+    event = rtas_event_log_dequeue(mask);
     if (!event) {
         goto out_no_events;
     }
@@ -690,7 +680,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
      * interrupts.
      */
     for (i = 0; i < EVENT_CLASS_MAX; i++) {
-        if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) {
+        if (rtas_event_log_contains(EVENT_CLASS_MASK(i))) {
             const sPAPREventSource *source =
                 spapr_event_sources_get_source(spapr->event_sources, i);
 
@@ -710,38 +700,10 @@ static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                        target_ulong args,
                        uint32_t nret, target_ulong rets)
 {
-    uint32_t mask, buf, len, event_len;
-    sPAPREventLogEntry *event;
-    struct rtas_error_log *hdr;
-
     if (nargs != 4 || nret != 1) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
-
-    mask = rtas_ld(args, 0);
-    buf = rtas_ld(args, 2);
-    len = rtas_ld(args, 3);
-
-    event = rtas_event_log_dequeue(mask, false);
-    if (!event) {
-        goto out_no_events;
-    }
-
-    hdr = event->data;
-    event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr);
-
-    if (event_len < len) {
-        len = event_len;
-    }
-
-    cpu_physical_memory_write(buf, event->data, len);
-    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-    g_free(event->data);
-    g_free(event);
-    return;
-
-out_no_events:
     rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
 }
 
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 0d608d6e28..aae5a62a61 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -913,10 +913,7 @@ static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr,
         /* We assume RADIX, so this catches all the "Do Nothing" cases */
     } else if (!(patbe_old & PATBE1_GR)) {
         /* HASH->RADIX : Free HPT */
-        g_free(spapr->htab);
-        spapr->htab = NULL;
-        spapr->htab_shift = 0;
-        close_htab_fd(spapr);
+        spapr_free_hpt(spapr);
     } else if (!(patbe_new & PATBE1_GR)) {
         /* RADIX->HASH || NOTHING->HASH : Allocate HPT */
         spapr_setup_hpt_and_vrma(spapr);
@@ -1047,19 +1044,13 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu,
     }
 }
 
-static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
-                                                  sPAPRMachineState *spapr,
-                                                  target_ulong opcode,
-                                                  target_ulong *args)
+static uint32_t cas_check_pvr(PowerPCCPU *cpu, target_ulong *addr,
+                              Error **errp)
 {
-    target_ulong list = ppc64_phys_to_real(args[0]);
-    target_ulong ov_table;
     bool explicit_match = false; /* Matched the CPU's real PVR */
     uint32_t max_compat = cpu->max_compat;
     uint32_t best_compat = 0;
     int i;
-    sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates;
-    bool guest_radix;
 
     /*
      * We scan the supplied table of PVRs looking for two things
@@ -1069,9 +1060,9 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     for (i = 0; i < 512; ++i) {
         uint32_t pvr, pvr_mask;
 
-        pvr_mask = ldl_be_phys(&address_space_memory, list);
-        pvr = ldl_be_phys(&address_space_memory, list + 4);
-        list += 8;
+        pvr_mask = ldl_be_phys(&address_space_memory, *addr);
+        pvr = ldl_be_phys(&address_space_memory, *addr + 4);
+        *addr += 8;
 
         if (~pvr_mask & pvr) {
             break; /* Terminator record */
@@ -1090,17 +1081,38 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
         /* We couldn't find a suitable compatibility mode, and either
          * the guest doesn't support "raw" mode for this CPU, or raw
          * mode is disabled because a maximum compat mode is set */
-        return H_HARDWARE;
+        error_setg(errp, "Couldn't negotiate a suitable PVR during CAS");
+        return 0;
     }
 
     /* Parsing finished */
     trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat);
 
-    /* Update CPUs */
-    if (cpu->compat_pvr != best_compat) {
-        Error *local_err = NULL;
+    return best_compat;
+}
 
-        ppc_set_compat_all(best_compat, &local_err);
+static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
+                                                  sPAPRMachineState *spapr,
+                                                  target_ulong opcode,
+                                                  target_ulong *args)
+{
+    /* Working address in data buffer */
+    target_ulong addr = ppc64_phys_to_real(args[0]);
+    target_ulong ov_table;
+    uint32_t cas_pvr;
+    sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates;
+    bool guest_radix;
+    Error *local_err = NULL;
+
+    cas_pvr = cas_check_pvr(cpu, &addr, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return H_HARDWARE;
+    }
+
+    /* Update CPUs */
+    if (cpu->compat_pvr != cas_pvr) {
+        ppc_set_compat_all(cas_pvr, &local_err);
         if (local_err) {
             error_report_err(local_err);
             return H_HARDWARE;
@@ -1108,7 +1120,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     }
 
     /* For the future use: here @ov_table points to the first option vector */
-    ov_table = list;
+    ov_table = addr;
 
     ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
     ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
@@ -1162,7 +1174,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     spapr_ovec_cleanup(ov5_updates);
 
     if (spapr->cas_reboot) {
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     } else {
         /* If ppc_spapr_reset() did not set up a HPT but one is necessary
          * (because the guest isn't going to use radix) then set it up here. */
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 29c80bb3c8..0341bc069d 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -111,7 +111,7 @@ static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
 
 /* Called from RCU critical section */
 static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
-                                               bool is_write)
+                                               IOMMUAccessFlags flag)
 {
     sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
     uint64_t tce;
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index a7cff32bbf..e4daf8d5f1 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1369,7 +1369,8 @@ out:
     }
 }
 
-static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque)
+/* Callback to be called during DRC release. */
+void spapr_phb_remove_pci_device_cb(DeviceState *dev)
 {
     /* some version guests do not wait for completion of a device
      * cleanup (generally done asynchronously by the kernel) before
@@ -1392,7 +1393,7 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
-    drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
+    drck->detach(drc, DEVICE(pdev), errp);
 }
 
 static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb,
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 619f32c054..128d993d04 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -110,7 +110,7 @@ static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
-    qemu_system_shutdown_request();
+    qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
     cpu_stop_current();
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
@@ -124,7 +124,7 @@ static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
-    qemu_system_reset_request();
+    qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
diff --git a/hw/s390x/3270-ccw.c b/hw/s390x/3270-ccw.c
index a7a5b412e4..6e6eee4e90 100644
--- a/hw/s390x/3270-ccw.c
+++ b/hw/s390x/3270-ccw.c
@@ -98,9 +98,13 @@ static void emulated_ccw_3270_realize(DeviceState *ds, Error **errp)
     EmulatedCcw3270Class *ck = EMULATED_CCW_3270_GET_CLASS(dev);
     CcwDevice *cdev = CCW_DEVICE(ds);
     CCWDeviceClass *cdk = CCW_DEVICE_GET_CLASS(cdev);
-    SubchDev *sch = css_create_virtual_sch(cdev->devno, errp);
+    DeviceState *parent = DEVICE(cdev);
+    BusState *qbus = qdev_get_parent_bus(parent);
+    VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus);
+    SubchDev *sch;
     Error *err = NULL;
 
+    sch = css_create_sch(cdev->devno, true, cbus->squash_mcss, errp);
     if (!sch) {
         return;
     }
diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index 36bd4b1645..a8e5575a8a 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -14,3 +14,4 @@ obj-y += ccw-device.o
 obj-y += s390-pci-bus.o s390-pci-inst.o
 obj-y += s390-skeys.o
 obj-$(CONFIG_KVM) += s390-skeys-kvm.o
+obj-y += s390-ccw.o
diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c
index b54ac01d37..823747fcd7 100644
--- a/hw/s390x/css-bridge.c
+++ b/hw/s390x/css-bridge.c
@@ -17,6 +17,7 @@
 #include "hw/s390x/css.h"
 #include "ccw-device.h"
 #include "hw/s390x/css-bridge.h"
+#include "cpu.h"
 
 /*
  * Invoke device-specific unplug handler, disable the subchannel
@@ -103,6 +104,7 @@ VirtualCssBus *virtual_css_bus_init(void)
     /* Create bus on bridge device */
     bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css");
     cbus = VIRTUAL_CSS_BUS(bus);
+    cbus->squash_mcss = s390_get_squash_mcss();
 
     /* Enable hotplugging */
     qbus_set_hotplug_handler(bus, dev, &error_abort);
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 15c4f4b249..1e2f26b65a 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -13,6 +13,7 @@
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "hw/qdev.h"
+#include "qemu/error-report.h"
 #include "qemu/bitops.h"
 #include "exec/address-spaces.h"
 #include "cpu.h"
@@ -258,7 +259,7 @@ uint16_t css_build_subchannel_id(SubchDev *sch)
     return css_do_build_subchannel_id(sch->cssid, sch->ssid);
 }
 
-static void css_inject_io_interrupt(SubchDev *sch)
+void css_inject_io_interrupt(SubchDev *sch)
 {
     uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11;
 
@@ -523,7 +524,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr,
     return ret;
 }
 
-static void sch_handle_start_func(SubchDev *sch, ORB *orb)
+static void sch_handle_start_func_virtual(SubchDev *sch, ORB *orb)
 {
 
     PMCW *p = &sch->curr_status.pmcw;
@@ -625,13 +626,58 @@ static void sch_handle_start_func(SubchDev *sch, ORB *orb)
 
 }
 
+static int sch_handle_start_func_passthrough(SubchDev *sch, ORB *orb)
+{
+
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    int ret;
+
+    if (!(s->ctrl & SCSW_ACTL_SUSP)) {
+        assert(orb != NULL);
+        p->intparm = orb->intparm;
+    }
+
+    /*
+     * Only support prefetch enable mode.
+     * Only support 64bit addressing idal.
+     */
+    if (!(orb->ctrl0 & ORB_CTRL0_MASK_PFCH) ||
+        !(orb->ctrl0 & ORB_CTRL0_MASK_C64)) {
+        return -EINVAL;
+    }
+
+    ret = s390_ccw_cmd_request(orb, s, sch->driver_data);
+    switch (ret) {
+    /* Currently we don't update control block and just return the cc code. */
+    case 0:
+        break;
+    case -EBUSY:
+        break;
+    case -ENODEV:
+        break;
+    case -EACCES:
+        /* Let's reflect an inaccessible host device by cc 3. */
+        ret = -ENODEV;
+        break;
+    default:
+       /*
+        * All other return codes will trigger a program check,
+        * or set cc to 1.
+        */
+       break;
+    };
+
+    return ret;
+}
+
 /*
  * On real machines, this would run asynchronously to the main vcpus.
  * We might want to make some parts of the ssch handling (interpreting
  * read/writes) asynchronous later on if we start supporting more than
  * our current very simple devices.
  */
-static void do_subchannel_work(SubchDev *sch, ORB *orb)
+int do_subchannel_work_virtual(SubchDev *sch, ORB *orb)
 {
 
     SCSW *s = &sch->curr_status.scsw;
@@ -642,12 +688,45 @@ static void do_subchannel_work(SubchDev *sch, ORB *orb)
         sch_handle_halt_func(sch);
     } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
         /* Triggered by both ssch and rsch. */
-        sch_handle_start_func(sch, orb);
+        sch_handle_start_func_virtual(sch, orb);
     } else {
         /* Cannot happen. */
-        return;
+        return 0;
     }
     css_inject_io_interrupt(sch);
+    return 0;
+}
+
+int do_subchannel_work_passthrough(SubchDev *sch, ORB *orb)
+{
+    int ret;
+    SCSW *s = &sch->curr_status.scsw;
+
+    if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
+        /* TODO: Clear handling */
+        sch_handle_clear_func(sch);
+        ret = 0;
+    } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
+        /* TODO: Halt handling */
+        sch_handle_halt_func(sch);
+        ret = 0;
+    } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
+        ret = sch_handle_start_func_passthrough(sch, orb);
+    } else {
+        /* Cannot happen. */
+        return -ENODEV;
+    }
+
+    return ret;
+}
+
+static int do_subchannel_work(SubchDev *sch, ORB *orb)
+{
+    if (sch->do_subchannel_work) {
+        return sch->do_subchannel_work(sch, orb);
+    } else {
+        return -EINVAL;
+    }
 }
 
 static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src)
@@ -670,7 +749,7 @@ static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src)
     dest->chars = cpu_to_be32(src->chars);
 }
 
-static void copy_scsw_to_guest(SCSW *dest, const SCSW *src)
+void copy_scsw_to_guest(SCSW *dest, const SCSW *src)
 {
     dest->flags = cpu_to_be16(src->flags);
     dest->ctrl = cpu_to_be16(src->ctrl);
@@ -966,8 +1045,7 @@ int css_do_ssch(SubchDev *sch, ORB *orb)
     s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND);
     s->flags &= ~SCSW_FLAGS_MASK_PNO;
 
-    do_subchannel_work(sch, orb);
-    ret = 0;
+    ret = do_subchannel_work(sch, orb);
 
 out:
     return ret;
@@ -1326,7 +1404,8 @@ unsigned int css_find_free_chpid(uint8_t cssid)
     return MAX_CHPID + 1;
 }
 
-static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type)
+static int css_add_chpid(uint8_t cssid, uint8_t chpid, uint8_t type,
+                         bool is_virt)
 {
     CssImage *css;
 
@@ -1340,7 +1419,7 @@ static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type)
     }
     css->chpids[chpid].in_use = 1;
     css->chpids[chpid].type = type;
-    css->chpids[chpid].is_virtual = 1;
+    css->chpids[chpid].is_virtual = is_virt;
 
     css_generate_chp_crws(cssid, chpid);
 
@@ -1364,7 +1443,7 @@ void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type)
     p->pam = 0x80;
     p->chpid[0] = chpid;
     if (!css->chpids[chpid].in_use) {
-        css_add_virtual_chpid(sch->cssid, chpid, type);
+        css_add_chpid(sch->cssid, chpid, type, true);
     }
 
     memset(s, 0, sizeof(SCSW));
@@ -1946,28 +2025,59 @@ PropertyInfo css_devid_ro_propinfo = {
     .get = get_css_devid,
 };
 
-SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp)
+SubchDev *css_create_sch(CssDevId bus_id, bool is_virtual, bool squash_mcss,
+                         Error **errp)
 {
     uint16_t schid = 0;
     SubchDev *sch;
 
     if (bus_id.valid) {
-        /* Enforce use of virtual cssid. */
-        if (bus_id.cssid != VIRTUAL_CSSID) {
-            error_setg(errp, "cssid %hhx not valid for virtual devices",
-                       bus_id.cssid);
+        if (is_virtual != (bus_id.cssid == VIRTUAL_CSSID)) {
+            error_setg(errp, "cssid %hhx not valid for %s devices",
+                       bus_id.cssid,
+                       (is_virtual ? "virtual" : "non-virtual"));
             return NULL;
         }
+    }
+
+    if (bus_id.valid) {
+        if (squash_mcss) {
+            bus_id.cssid = channel_subsys.default_cssid;
+        } else if (!channel_subsys.css[bus_id.cssid]) {
+            css_create_css_image(bus_id.cssid, false);
+        }
+
         if (!css_find_free_subch_for_devno(bus_id.cssid, bus_id.ssid,
                                            bus_id.devid, &schid, errp)) {
             return NULL;
         }
-    } else {
-        bus_id.cssid = VIRTUAL_CSSID;
+    } else if (squash_mcss || is_virtual) {
+        bus_id.cssid = channel_subsys.default_cssid;
+
         if (!css_find_free_subch_and_devno(bus_id.cssid, &bus_id.ssid,
                                            &bus_id.devid, &schid, errp)) {
             return NULL;
         }
+    } else {
+        for (bus_id.cssid = 0; bus_id.cssid < MAX_CSSID; ++bus_id.cssid) {
+            if (bus_id.cssid == VIRTUAL_CSSID) {
+                continue;
+            }
+
+            if (!channel_subsys.css[bus_id.cssid]) {
+                css_create_css_image(bus_id.cssid, false);
+            }
+
+            if   (css_find_free_subch_and_devno(bus_id.cssid, &bus_id.ssid,
+                                                &bus_id.devid, &schid,
+                                                NULL)) {
+                break;
+            }
+            if (bus_id.cssid == MAX_CSSID) {
+                error_setg(errp, "Virtual channel subsystem is full!");
+                return NULL;
+            }
+        }
     }
 
     sch = g_malloc0(sizeof(*sch));
@@ -1978,3 +2088,147 @@ SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp)
     css_subch_assign(sch->cssid, sch->ssid, schid, sch->devno, sch);
     return sch;
 }
+
+static int css_sch_get_chpids(SubchDev *sch, CssDevId *dev_id)
+{
+    char *fid_path;
+    FILE *fd;
+    uint32_t chpid[8];
+    int i;
+    PMCW *p = &sch->curr_status.pmcw;
+
+    fid_path = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/chpids",
+                               dev_id->cssid, dev_id->ssid, dev_id->devid);
+    fd = fopen(fid_path, "r");
+    if (fd == NULL) {
+        error_report("%s: open %s failed", __func__, fid_path);
+        g_free(fid_path);
+        return -EINVAL;
+    }
+
+    if (fscanf(fd, "%x %x %x %x %x %x %x %x",
+        &chpid[0], &chpid[1], &chpid[2], &chpid[3],
+        &chpid[4], &chpid[5], &chpid[6], &chpid[7]) != 8) {
+        fclose(fd);
+        g_free(fid_path);
+        return -EINVAL;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(p->chpid); i++) {
+        p->chpid[i] = chpid[i];
+    }
+
+    fclose(fd);
+    g_free(fid_path);
+
+    return 0;
+}
+
+static int css_sch_get_path_masks(SubchDev *sch, CssDevId *dev_id)
+{
+    char *fid_path;
+    FILE *fd;
+    uint32_t pim, pam, pom;
+    PMCW *p = &sch->curr_status.pmcw;
+
+    fid_path = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/pimpampom",
+                               dev_id->cssid, dev_id->ssid, dev_id->devid);
+    fd = fopen(fid_path, "r");
+    if (fd == NULL) {
+        error_report("%s: open %s failed", __func__, fid_path);
+        g_free(fid_path);
+        return -EINVAL;
+    }
+
+    if (fscanf(fd, "%x %x %x", &pim, &pam, &pom) != 3) {
+        fclose(fd);
+        g_free(fid_path);
+        return -EINVAL;
+    }
+
+    p->pim = pim;
+    p->pam = pam;
+    p->pom = pom;
+    fclose(fd);
+    g_free(fid_path);
+
+    return 0;
+}
+
+static int css_sch_get_chpid_type(uint8_t chpid, uint32_t *type,
+                                  CssDevId *dev_id)
+{
+    char *fid_path;
+    FILE *fd;
+
+    fid_path = g_strdup_printf("/sys/devices/css%x/chp0.%02x/type",
+                               dev_id->cssid, chpid);
+    fd = fopen(fid_path, "r");
+    if (fd == NULL) {
+        error_report("%s: open %s failed", __func__, fid_path);
+        g_free(fid_path);
+        return -EINVAL;
+    }
+
+    if (fscanf(fd, "%x", type) != 1) {
+        fclose(fd);
+        g_free(fid_path);
+        return -EINVAL;
+    }
+
+    fclose(fd);
+    g_free(fid_path);
+
+    return 0;
+}
+
+/*
+ * We currently retrieve the real device information from sysfs to build the
+ * guest subchannel information block without considering the migration feature.
+ * We need to revisit this problem when we want to add migration support.
+ */
+int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id)
+{
+    CssImage *css = channel_subsys.css[sch->cssid];
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    uint32_t type;
+    int i, ret;
+
+    assert(css != NULL);
+    memset(p, 0, sizeof(PMCW));
+    p->flags |= PMCW_FLAGS_MASK_DNV;
+    /* We are dealing with I/O subchannels only. */
+    p->devno = sch->devno;
+
+    /* Grab path mask from sysfs. */
+    ret = css_sch_get_path_masks(sch, dev_id);
+    if (ret) {
+        return ret;
+    }
+
+    /* Grab chpids from sysfs. */
+    ret = css_sch_get_chpids(sch, dev_id);
+    if (ret) {
+        return ret;
+    }
+
+   /* Build chpid type. */
+    for (i = 0; i < ARRAY_SIZE(p->chpid); i++) {
+        if (p->chpid[i] && !css->chpids[p->chpid[i]].in_use) {
+            ret = css_sch_get_chpid_type(p->chpid[i], &type, dev_id);
+            if (ret) {
+                return ret;
+            }
+            css_add_chpid(sch->cssid, p->chpid[i], type, false);
+        }
+    }
+
+    memset(s, 0, sizeof(SCSW));
+    sch->curr_status.mba = 0;
+    for (i = 0; i < ARRAY_SIZE(sch->curr_status.mda); i++) {
+        sch->curr_status.mda[i] = 0;
+    }
+
+    return 0;
+}
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 75d3c681a4..4e6469db0f 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -396,7 +396,7 @@ void s390_reipl_request(void)
     S390IPLState *ipl = get_ipl_device();
 
     ipl->reipl_requested = true;
-    qemu_system_reset_request();
+    qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 }
 
 void s390_ipl_prepare_cpu(S390CPU *cpu)
diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c
new file mode 100644
index 0000000000..8614dda6f8
--- /dev/null
+++ b/hw/s390x/s390-ccw.c
@@ -0,0 +1,153 @@
+/*
+ * s390 CCW Assignment Support
+ *
+ * Copyright 2017 IBM Corp
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Pierre Morel <pmorel@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2
+ * or (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "libgen.h"
+#include "hw/s390x/css.h"
+#include "hw/s390x/css-bridge.h"
+#include "hw/s390x/s390-ccw.h"
+
+int s390_ccw_cmd_request(ORB *orb, SCSW *scsw, void *data)
+{
+    S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(data);
+
+    if (cdc->handle_request) {
+        return cdc->handle_request(orb, scsw, data);
+    } else {
+        return -ENOSYS;
+    }
+}
+
+static void s390_ccw_get_dev_info(S390CCWDevice *cdev,
+                                  char *sysfsdev,
+                                  Error **errp)
+{
+    unsigned int cssid, ssid, devid;
+    char dev_path[PATH_MAX] = {0}, *tmp;
+
+    if (!sysfsdev) {
+        error_setg(errp, "No host device provided");
+        error_append_hint(errp,
+                          "Use -device vfio-ccw,sysfsdev=PATH_TO_DEVICE\n");
+        return;
+    }
+
+    if (!realpath(sysfsdev, dev_path)) {
+        error_setg_errno(errp, errno, "Host device '%s' not found", sysfsdev);
+        return;
+    }
+
+    cdev->mdevid = g_strdup(basename(dev_path));
+
+    tmp = basename(dirname(dev_path));
+    if (sscanf(tmp, "%2x.%1x.%4x", &cssid, &ssid, &devid) != 3) {
+        error_setg_errno(errp, errno, "Failed to read %s", tmp);
+        return;
+    }
+
+    cdev->hostid.cssid = cssid;
+    cdev->hostid.ssid = ssid;
+    cdev->hostid.devid = devid;
+    cdev->hostid.valid = true;
+}
+
+static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp)
+{
+    CcwDevice *ccw_dev = CCW_DEVICE(cdev);
+    CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev);
+    DeviceState *parent = DEVICE(ccw_dev);
+    BusState *qbus = qdev_get_parent_bus(parent);
+    VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus);
+    SubchDev *sch;
+    int ret;
+    Error *err = NULL;
+
+    s390_ccw_get_dev_info(cdev, sysfsdev, &err);
+    if (err) {
+        goto out_err_propagate;
+    }
+
+    sch = css_create_sch(ccw_dev->devno, false, cbus->squash_mcss, &err);
+    if (!sch) {
+        goto out_mdevid_free;
+    }
+    sch->driver_data = cdev;
+    sch->do_subchannel_work = do_subchannel_work_passthrough;
+
+    ccw_dev->sch = sch;
+    ret = css_sch_build_schib(sch, &cdev->hostid);
+    if (ret) {
+        error_setg_errno(&err, -ret, "%s: Failed to build initial schib",
+                         __func__);
+        goto out_err;
+    }
+
+    ck->realize(ccw_dev, &err);
+    if (err) {
+        goto out_err;
+    }
+
+    css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid,
+                          parent->hotplugged, 1);
+    return;
+
+out_err:
+    css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL);
+    ccw_dev->sch = NULL;
+    g_free(sch);
+out_mdevid_free:
+    g_free(cdev->mdevid);
+out_err_propagate:
+    error_propagate(errp, err);
+}
+
+static void s390_ccw_unrealize(S390CCWDevice *cdev, Error **errp)
+{
+    CcwDevice *ccw_dev = CCW_DEVICE(cdev);
+    SubchDev *sch = ccw_dev->sch;
+
+    if (sch) {
+        css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL);
+        g_free(sch);
+        ccw_dev->sch = NULL;
+    }
+
+    g_free(cdev->mdevid);
+}
+
+static void s390_ccw_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
+
+    dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
+    cdc->realize = s390_ccw_realize;
+    cdc->unrealize = s390_ccw_unrealize;
+}
+
+static const TypeInfo s390_ccw_info = {
+    .name          = TYPE_S390_CCW,
+    .parent        = TYPE_CCW_DEVICE,
+    .instance_size = sizeof(S390CCWDevice),
+    .class_size    = sizeof(S390CCWDeviceClass),
+    .class_init    = s390_ccw_class_init,
+    .abstract      = true,
+};
+
+static void register_s390_ccw_type(void)
+{
+    type_register_static(&s390_ccw_info);
+}
+
+type_init(register_s390_ccw_type)
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 66a6fbeb8c..5651483781 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -357,7 +357,7 @@ out:
 }
 
 static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *mr, hwaddr addr,
-                                          bool is_write)
+                                          IOMMUAccessFlags flag)
 {
     uint64_t pte;
     uint32_t flags;
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 314a9cbad4..8bc7c98682 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -624,7 +624,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
 
     mr = &iommu->iommu_mr;
     while (start < end) {
-        entry = mr->iommu_ops->translate(mr, start, 0);
+        entry = mr->iommu_ops->translate(mr, start, IOMMU_NONE);
 
         if (!entry.translated_addr) {
             pbdev->state = ZPCI_FS_ERROR;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index fdd4384ff0..c9021f2fa9 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -136,10 +136,15 @@ static void ccw_init(MachineState *machine)
         kvm_s390_enable_css_support(s390_cpu_addr2state(0));
     }
     /*
-     * Create virtual css and set it as default so that non mcss-e
-     * enabled guests only see virtio devices.
+     * Non mcss-e enabled guests only see the devices from the default
+     * css, which is determined by the value of the squash_mcss property.
+     * Note: we must not squash non virtual devices to css 0xFE.
      */
-    ret = css_create_css_image(VIRTUAL_CSSID, true);
+    if (css_bus->squash_mcss) {
+        ret = css_create_css_image(0, true);
+    } else {
+        ret = css_create_css_image(VIRTUAL_CSSID, true);
+    }
     assert(ret == 0);
 
     /* Create VirtIO network adapters */
@@ -303,6 +308,20 @@ static void machine_set_loadparm(Object *obj, const char *val, Error **errp)
         ms->loadparm[i] = ' '; /* pad right with spaces */
     }
 }
+static inline bool machine_get_squash_mcss(Object *obj, Error **errp)
+{
+    S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
+
+    return ms->s390_squash_mcss;
+}
+
+static inline void machine_set_squash_mcss(Object *obj, bool value,
+                                           Error **errp)
+{
+    S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
+
+    ms->s390_squash_mcss = value;
+}
 
 static inline void s390_machine_initfn(Object *obj)
 {
@@ -328,6 +347,13 @@ static inline void s390_machine_initfn(Object *obj)
             " to upper case) to pass to machine loader, boot manager,"
             " and guest kernel",
             NULL);
+    object_property_add_bool(obj, "s390-squash-mcss",
+                             machine_get_squash_mcss,
+                             machine_set_squash_mcss, NULL);
+    object_property_set_description(obj, "s390-squash-mcss",
+            "enable/disable squashing subchannels into the default css",
+            NULL);
+    object_property_set_bool(obj, false, "s390-squash-mcss", NULL);
 }
 
 static const TypeInfo ccw_machine_info = {
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index e7167e3d05..e6a6f74be3 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -680,9 +680,13 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
     VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
     CcwDevice *ccw_dev = CCW_DEVICE(dev);
     CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev);
-    SubchDev *sch = css_create_virtual_sch(ccw_dev->devno, errp);
+    DeviceState *parent = DEVICE(ccw_dev);
+    BusState *qbus = qdev_get_parent_bus(parent);
+    VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus);
+    SubchDev *sch;
     Error *err = NULL;
 
+    sch = css_create_sch(ccw_dev->devno, true, cbus->squash_mcss, errp);
     if (!sch) {
         return;
     }
@@ -697,6 +701,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
     sch->disable_cb = virtio_sch_disable_cb;
     sch->id.reserved = 0xff;
     sch->id.cu_type = VIRTIO_CCW_CU_TYPE;
+    sch->do_subchannel_work = do_subchannel_work_virtual;
     ccw_dev->sch = sch;
     dev->indicators = NULL;
     dev->revision = -1;
diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c
index 8f520cec1c..e6fc74ed87 100644
--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -164,7 +164,7 @@ r2d_fpga_write(void *opaque, hwaddr addr, uint64_t value, unsigned int size)
 	break;
     case PA_POWOFF:
         if (value & 1) {
-            qemu_system_shutdown_request();
+            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
         }
         break;
     case PA_VERREG:
diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c
index 8e18236c5a..d13bc30b2d 100644
--- a/hw/timer/etraxfs_timer.c
+++ b/hw/timer/etraxfs_timer.c
@@ -207,7 +207,7 @@ static void watchdog_hit(void *opaque)
         qemu_irq_raise(t->nmi);
     }
     else
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 
     t->wd_hits++;
 }
diff --git a/hw/timer/m48t59.c b/hw/timer/m48t59.c
index 474981a6ac..4a064fbfd2 100644
--- a/hw/timer/m48t59.c
+++ b/hw/timer/m48t59.c
@@ -1,7 +1,7 @@
 /*
  * QEMU M48T59 and M48T08 NVRAM emulation for PPC PREP and Sparc platforms
  *
- * Copyright (c) 2003-2005, 2007 Jocelyn Mayer
+ * Copyright (c) 2003-2005, 2007, 2017 Jocelyn Mayer
  * Copyright (c) 2013 Hervé Poussineau
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -159,7 +159,7 @@ static void watchdog_cb (void *opaque)
 	NVRAM->buffer[0x1FF7] = 0x00;
 	NVRAM->buffer[0x1FFC] &= ~0x40;
         /* May it be a hw CPU Reset instead ? */
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
     } else {
 	qemu_set_irq(NVRAM->IRQ, 1);
 	qemu_set_irq(NVRAM->IRQ, 0);
diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c
index 44885907c9..93bc6e1790 100644
--- a/hw/timer/milkymist-sysctl.c
+++ b/hw/timer/milkymist-sysctl.c
@@ -90,7 +90,7 @@ static void sysctl_icap_write(MilkymistSysctlState *s, uint32_t value)
     trace_milkymist_sysctl_icap_write(value);
     switch (value & 0xffff) {
     case 0x000e:
-        qemu_system_shutdown_request();
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
         break;
     }
 }
@@ -195,7 +195,7 @@ static void sysctl_write(void *opaque, hwaddr addr, uint64_t value,
         s->regs[addr] = 1;
         break;
     case R_SYSTEM_ID:
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         break;
 
     case R_GPIO_IN:
diff --git a/hw/timer/pxa2xx_timer.c b/hw/timer/pxa2xx_timer.c
index 59002b407e..68ba5a70b3 100644
--- a/hw/timer/pxa2xx_timer.c
+++ b/hw/timer/pxa2xx_timer.c
@@ -401,7 +401,7 @@ static void pxa2xx_timer_tick(void *opaque)
     if (t->num == 3)
         if (i->reset3 & 1) {
             i->reset3 = 0;
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
 }
 
diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
index 05e7fbb93f..c3ab9097f1 100644
--- a/hw/vfio/Makefile.objs
+++ b/hw/vfio/Makefile.objs
@@ -1,6 +1,7 @@
 ifeq ($(CONFIG_LINUX), y)
 obj-$(CONFIG_SOFTMMU) += common.o
 obj-$(CONFIG_PCI) += pci.o pci-quirks.o
+obj-$(CONFIG_VFIO_CCW) += ccw.o
 obj-$(CONFIG_SOFTMMU) += platform.o
 obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o
 obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
new file mode 100644
index 0000000000..12d0262336
--- /dev/null
+++ b/hw/vfio/ccw.c
@@ -0,0 +1,434 @@
+/*
+ * vfio based subchannel assignment support
+ *
+ * Copyright 2017 IBM Corp.
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Pierre Morel <pmorel@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or(at
+ * your option) any version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <linux/vfio.h>
+#include <linux/vfio_ccw.h>
+#include <sys/ioctl.h>
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "hw/vfio/vfio.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/s390x/s390-ccw.h"
+#include "hw/s390x/ccw-device.h"
+#include "qemu/error-report.h"
+
+#define TYPE_VFIO_CCW "vfio-ccw"
+typedef struct VFIOCCWDevice {
+    S390CCWDevice cdev;
+    VFIODevice vdev;
+    uint64_t io_region_size;
+    uint64_t io_region_offset;
+    struct ccw_io_region *io_region;
+    EventNotifier io_notifier;
+} VFIOCCWDevice;
+
+static void vfio_ccw_compute_needs_reset(VFIODevice *vdev)
+{
+    vdev->needs_reset = false;
+}
+
+/*
+ * We don't need vfio_hot_reset_multi and vfio_eoi operations for
+ * vfio_ccw device now.
+ */
+struct VFIODeviceOps vfio_ccw_ops = {
+    .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset,
+};
+
+static int vfio_ccw_handle_request(ORB *orb, SCSW *scsw, void *data)
+{
+    S390CCWDevice *cdev = data;
+    VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
+    struct ccw_io_region *region = vcdev->io_region;
+    int ret;
+
+    QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB));
+    QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW));
+    QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB));
+
+    memset(region, 0, sizeof(*region));
+
+    memcpy(region->orb_area, orb, sizeof(ORB));
+    memcpy(region->scsw_area, scsw, sizeof(SCSW));
+
+again:
+    ret = pwrite(vcdev->vdev.fd, region,
+                 vcdev->io_region_size, vcdev->io_region_offset);
+    if (ret != vcdev->io_region_size) {
+        if (errno == EAGAIN) {
+            goto again;
+        }
+        error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno);
+        return -errno;
+    }
+
+    return region->ret_code;
+}
+
+static void vfio_ccw_reset(DeviceState *dev)
+{
+    CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
+    S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
+    VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
+
+    ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET);
+}
+
+static void vfio_ccw_io_notifier_handler(void *opaque)
+{
+    VFIOCCWDevice *vcdev = opaque;
+    struct ccw_io_region *region = vcdev->io_region;
+    S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev);
+    CcwDevice *ccw_dev = CCW_DEVICE(cdev);
+    SubchDev *sch = ccw_dev->sch;
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    IRB irb;
+    int size;
+
+    if (!event_notifier_test_and_clear(&vcdev->io_notifier)) {
+        return;
+    }
+
+    size = pread(vcdev->vdev.fd, region, vcdev->io_region_size,
+                 vcdev->io_region_offset);
+    if (size == -1) {
+        switch (errno) {
+        case ENODEV:
+            /* Generate a deferred cc 3 condition. */
+            s->flags |= SCSW_FLAGS_MASK_CC;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
+            goto read_err;
+        case EFAULT:
+            /* Memory problem, generate channel data check. */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->cstat = SCSW_CSTAT_DATA_CHECK;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                       SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            goto read_err;
+        default:
+            /* Error, generate channel program check. */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->cstat = SCSW_CSTAT_PROG_CHECK;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                       SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            goto read_err;
+        }
+    } else if (size != vcdev->io_region_size) {
+        /* Information transfer error, generate channel-control check. */
+        s->ctrl &= ~SCSW_ACTL_START_PEND;
+        s->cstat = SCSW_CSTAT_CHN_CTRL_CHK;
+        s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+        s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                   SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+        goto read_err;
+    }
+
+    memcpy(&irb, region->irb_area, sizeof(IRB));
+
+    /* Update control block via irb. */
+    copy_scsw_to_guest(s, &irb.scsw);
+
+    /* If a uint check is pending, copy sense data. */
+    if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) &&
+        (p->chars & PMCW_CHARS_MASK_CSENSE)) {
+        memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw));
+    }
+
+read_err:
+    css_inject_io_interrupt(sch);
+}
+
+static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
+{
+    VFIODevice *vdev = &vcdev->vdev;
+    struct vfio_irq_info *irq_info;
+    struct vfio_irq_set *irq_set;
+    size_t argsz;
+    int32_t *pfd;
+
+    if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) {
+        error_setg(errp, "vfio: unexpected number of io irqs %u",
+                   vdev->num_irqs);
+        return;
+    }
+
+    argsz = sizeof(*irq_set);
+    irq_info = g_malloc0(argsz);
+    irq_info->index = VFIO_CCW_IO_IRQ_INDEX;
+    irq_info->argsz = argsz;
+    if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
+              irq_info) < 0 || irq_info->count < 1) {
+        error_setg_errno(errp, errno, "vfio: Error getting irq info");
+        goto out_free_info;
+    }
+
+    if (event_notifier_init(&vcdev->io_notifier, 0)) {
+        error_setg_errno(errp, errno,
+                         "vfio: Unable to init event notifier for IO");
+        goto out_free_info;
+    }
+
+    argsz = sizeof(*irq_set) + sizeof(*pfd);
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                     VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = VFIO_CCW_IO_IRQ_INDEX;
+    irq_set->start = 0;
+    irq_set->count = 1;
+    pfd = (int32_t *) &irq_set->data;
+
+    *pfd = event_notifier_get_fd(&vcdev->io_notifier);
+    qemu_set_fd_handler(*pfd, vfio_ccw_io_notifier_handler, NULL, vcdev);
+    if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
+        error_setg(errp, "vfio: Failed to set up io notification");
+        qemu_set_fd_handler(*pfd, NULL, NULL, vcdev);
+        event_notifier_cleanup(&vcdev->io_notifier);
+    }
+
+    g_free(irq_set);
+
+out_free_info:
+    g_free(irq_info);
+}
+
+static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev)
+{
+    struct vfio_irq_set *irq_set;
+    size_t argsz;
+    int32_t *pfd;
+
+    argsz = sizeof(*irq_set) + sizeof(*pfd);
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                     VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = VFIO_CCW_IO_IRQ_INDEX;
+    irq_set->start = 0;
+    irq_set->count = 1;
+    pfd = (int32_t *) &irq_set->data;
+    *pfd = -1;
+
+    if (ioctl(vcdev->vdev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
+        error_report("vfio: Failed to de-assign device io fd: %m");
+    }
+
+    qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier),
+                        NULL, NULL, vcdev);
+    event_notifier_cleanup(&vcdev->io_notifier);
+
+    g_free(irq_set);
+}
+
+static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
+{
+    VFIODevice *vdev = &vcdev->vdev;
+    struct vfio_region_info *info;
+    int ret;
+
+    /* Sanity check device */
+    if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) {
+        error_setg(errp, "vfio: Um, this isn't a vfio-ccw device");
+        return;
+    }
+
+    if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) {
+        error_setg(errp, "vfio: Unexpected number of the I/O region %u",
+                   vdev->num_regions);
+        return;
+    }
+
+    ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info);
+    if (ret) {
+        error_setg_errno(errp, -ret, "vfio: Error getting config info");
+        return;
+    }
+
+    vcdev->io_region_size = info->size;
+    if (sizeof(*vcdev->io_region) != vcdev->io_region_size) {
+        error_setg(errp, "vfio: Unexpected size of the I/O region");
+        g_free(info);
+        return;
+    }
+
+    vcdev->io_region_offset = info->offset;
+    vcdev->io_region = g_malloc0(info->size);
+
+    g_free(info);
+}
+
+static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
+{
+    g_free(vcdev->io_region);
+}
+
+static void vfio_put_device(VFIOCCWDevice *vcdev)
+{
+    g_free(vcdev->vdev.name);
+    vfio_put_base_device(&vcdev->vdev);
+}
+
+static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp)
+{
+    char *tmp, group_path[PATH_MAX];
+    ssize_t len;
+    int groupid;
+
+    tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group",
+                          cdev->hostid.cssid, cdev->hostid.ssid,
+                          cdev->hostid.devid, cdev->mdevid);
+    len = readlink(tmp, group_path, sizeof(group_path));
+    g_free(tmp);
+
+    if (len <= 0 || len >= sizeof(group_path)) {
+        error_setg(errp, "vfio: no iommu_group found");
+        return NULL;
+    }
+
+    group_path[len] = 0;
+
+    if (sscanf(basename(group_path), "%d", &groupid) != 1) {
+        error_setg(errp, "vfio: failed to read %s", group_path);
+        return NULL;
+    }
+
+    return vfio_get_group(groupid, &address_space_memory, errp);
+}
+
+static void vfio_ccw_realize(DeviceState *dev, Error **errp)
+{
+    VFIODevice *vbasedev;
+    VFIOGroup *group;
+    CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
+    S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
+    VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
+    S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
+    Error *err = NULL;
+
+    /* Call the class init function for subchannel. */
+    if (cdc->realize) {
+        cdc->realize(cdev, vcdev->vdev.sysfsdev, &err);
+        if (err) {
+            goto out_err_propagate;
+        }
+    }
+
+    group = vfio_ccw_get_group(cdev, &err);
+    if (!group) {
+        goto out_group_err;
+    }
+
+    vcdev->vdev.ops = &vfio_ccw_ops;
+    vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW;
+    vcdev->vdev.name = g_strdup_printf("%x.%x.%04x", cdev->hostid.cssid,
+                                       cdev->hostid.ssid, cdev->hostid.devid);
+    QLIST_FOREACH(vbasedev, &group->device_list, next) {
+        if (strcmp(vbasedev->name, vcdev->vdev.name) == 0) {
+            error_setg(&err, "vfio: subchannel %s has already been attached",
+                       vcdev->vdev.name);
+            goto out_device_err;
+        }
+    }
+
+    if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) {
+        goto out_device_err;
+    }
+
+    vfio_ccw_get_region(vcdev, &err);
+    if (err) {
+        goto out_region_err;
+    }
+
+    vfio_ccw_register_io_notifier(vcdev, &err);
+    if (err) {
+        goto out_notifier_err;
+    }
+
+    return;
+
+out_notifier_err:
+    vfio_ccw_put_region(vcdev);
+out_region_err:
+    vfio_put_device(vcdev);
+out_device_err:
+    vfio_put_group(group);
+out_group_err:
+    if (cdc->unrealize) {
+        cdc->unrealize(cdev, NULL);
+    }
+out_err_propagate:
+    error_propagate(errp, err);
+}
+
+static void vfio_ccw_unrealize(DeviceState *dev, Error **errp)
+{
+    CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
+    S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
+    VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
+    S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
+    VFIOGroup *group = vcdev->vdev.group;
+
+    vfio_ccw_unregister_io_notifier(vcdev);
+    vfio_ccw_put_region(vcdev);
+    vfio_put_device(vcdev);
+    vfio_put_group(group);
+
+    if (cdc->unrealize) {
+        cdc->unrealize(cdev, errp);
+    }
+}
+
+static Property vfio_ccw_properties[] = {
+    DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription vfio_ccw_vmstate = {
+    .name = TYPE_VFIO_CCW,
+    .unmigratable = 1,
+};
+
+static void vfio_ccw_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
+
+    dc->props = vfio_ccw_properties;
+    dc->vmsd = &vfio_ccw_vmstate;
+    dc->desc = "VFIO-based subchannel assignment";
+    dc->realize = vfio_ccw_realize;
+    dc->unrealize = vfio_ccw_unrealize;
+    dc->reset = vfio_ccw_reset;
+
+    cdc->handle_request = vfio_ccw_handle_request;
+}
+
+static const TypeInfo vfio_ccw_info = {
+    .name = TYPE_VFIO_CCW,
+    .parent = TYPE_S390_CCW,
+    .instance_size = sizeof(VFIOCCWDevice),
+    .class_init = vfio_ccw_class_init,
+};
+
+static void register_vfio_ccw_type(void)
+{
+    type_register_static(&vfio_ccw_info);
+}
+
+type_init(register_vfio_ccw_type)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index a8f12eeb35..b9abe77f5a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -502,7 +502,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
         memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
-        memory_region_iommu_replay(giommu->iommu, &giommu->n, false);
+        memory_region_iommu_replay(giommu->iommu, &giommu->n);
 
         return;
     }
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index b87a176770..dde094abb4 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -162,11 +162,11 @@ fail:
 }
 
 static int process_message_reply(struct vhost_dev *dev,
-                                 VhostUserMsg msg)
+                                 const VhostUserMsg *msg)
 {
     VhostUserMsg msg_reply;
 
-    if ((msg.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
+    if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
         return 0;
     }
 
@@ -174,10 +174,10 @@ static int process_message_reply(struct vhost_dev *dev,
         return -1;
     }
 
-    if (msg_reply.request != msg.request) {
+    if (msg_reply.request != msg->request) {
         error_report("Received unexpected msg type."
                      "Expected %d received %d",
-                     msg.request, msg_reply.request);
+                     msg->request, msg_reply.request);
         return -1;
     }
 
@@ -324,7 +324,7 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
     }
 
     if (reply_supported) {
-        return process_message_reply(dev, msg);
+        return process_message_reply(dev, &msg);
     }
 
     return 0;
@@ -716,7 +716,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
 
     /* If reply_ack supported, slave has to ack specified MTU is valid */
     if (reply_supported) {
-        return process_message_reply(dev, msg);
+        return process_message_reply(dev, &msg);
     }
 
     return 0;
diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c
index 2aeaf1fbc9..0c5c9cde1c 100644
--- a/hw/watchdog/watchdog.c
+++ b/hw/watchdog/watchdog.c
@@ -110,7 +110,7 @@ void watchdog_perform_action(void)
     switch (watchdog_action) {
     case WDT_RESET:             /* same as 'system_reset' in monitor */
         qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_RESET, &error_abort);
-        qemu_system_reset_request();
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         break;
 
     case WDT_SHUTDOWN:          /* same as 'system_powerdown' in monitor */
diff --git a/hw/xenpv/xen_domainbuild.c b/hw/xenpv/xen_domainbuild.c
index 457a8976c3..c89ced2e88 100644
--- a/hw/xenpv/xen_domainbuild.c
+++ b/hw/xenpv/xen_domainbuild.c
@@ -148,7 +148,7 @@ static void xen_domain_poll(void *opaque)
     return;
 
 quit:
-    qemu_system_shutdown_request();
+    qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 }
 
 static int xen_domain_watcher(void)
diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
index 11176e26bd..4636f8e934 100644
--- a/hw/xtensa/xtfpga.c
+++ b/hw/xtensa/xtfpga.c
@@ -100,7 +100,7 @@ static void lx60_fpga_write(void *opaque, hwaddr addr,
 
     case 0x10: /*board reset*/
         if (val == 0xdead) {
-            qemu_system_reset_request();
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
         break;
     }