summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/intc/xics.c14
-rw-r--r--hw/pci-host/Makefile.objs2
-rw-r--r--hw/pci-host/pnv_phb3.c1197
-rw-r--r--hw/pci-host/pnv_phb3_msi.c349
-rw-r--r--hw/pci-host/pnv_phb3_pbcq.c358
-rw-r--r--hw/pci-host/pnv_phb4.c1439
-rw-r--r--hw/pci-host/pnv_phb4_pec.c595
-rw-r--r--hw/ppc/Kconfig2
-rw-r--r--hw/ppc/pnv.c204
-rw-r--r--hw/ppc/pnv_core.c33
-rw-r--r--hw/ppc/pnv_homer.c1
-rw-r--r--hw/ppc/pnv_lpc.c6
-rw-r--r--hw/ppc/pnv_occ.c1
-rw-r--r--hw/ppc/pnv_pnor.c6
-rw-r--r--hw/ppc/ppc.c18
-rw-r--r--hw/ppc/prep.c384
-rw-r--r--hw/ppc/spapr.c63
-rw-r--r--hw/ppc/spapr_caps.c49
-rw-r--r--hw/ppc/spapr_events.c269
-rw-r--r--hw/ppc/spapr_hcall.c20
-rw-r--r--hw/ppc/spapr_rtas.c87
-rw-r--r--hw/ppc/spapr_vio.c11
-rw-r--r--hw/ppc/virtex_ml507.c7
-rw-r--r--hw/tpm/Kconfig6
-rw-r--r--hw/tpm/Makefile.objs1
-rw-r--r--hw/tpm/tpm_spapr.c429
-rw-r--r--hw/tpm/tpm_tis.c32
-rw-r--r--hw/tpm/tpm_util.c25
-rw-r--r--hw/tpm/tpm_util.h3
-rw-r--r--hw/tpm/trace-events16
30 files changed, 5163 insertions, 464 deletions
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 785b607528..c5d507e707 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -217,7 +217,7 @@ void icp_eoi(ICPState *icp, uint32_t xirr)
     }
 }
 
-static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
+void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
 {
     ICPState *icp = xics_icp_get(ics->xics, server);
 
@@ -512,8 +512,14 @@ void ics_write_xive(ICSState *ics, int srcno, int server,
 
 static void ics_reject(ICSState *ics, uint32_t nr)
 {
+    ICSStateClass *isc = ICS_GET_CLASS(ics);
     ICSIRQState *irq = ics->irqs + nr - ics->offset;
 
+    if (isc->reject) {
+        isc->reject(ics, nr);
+        return;
+    }
+
     trace_xics_ics_reject(nr, nr - ics->offset);
     if (irq->flags & XICS_FLAGS_IRQ_MSI) {
         irq->status |= XICS_STATUS_REJECTED;
@@ -524,8 +530,14 @@ static void ics_reject(ICSState *ics, uint32_t nr)
 
 void ics_resend(ICSState *ics)
 {
+    ICSStateClass *isc = ICS_GET_CLASS(ics);
     int i;
 
+    if (isc->resend) {
+        isc->resend(ics);
+        return;
+    }
+
     for (i = 0; i < ics->nr_irqs; i++) {
         /* FIXME: filter by server#? */
         if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs
index 9c466fab01..8c87e8494d 100644
--- a/hw/pci-host/Makefile.objs
+++ b/hw/pci-host/Makefile.objs
@@ -20,3 +20,5 @@ common-obj-$(CONFIG_PCI_EXPRESS_GENERIC_BRIDGE) += gpex.o
 common-obj-$(CONFIG_PCI_EXPRESS_XILINX) += xilinx-pcie.o
 
 common-obj-$(CONFIG_PCI_EXPRESS_DESIGNWARE) += designware.o
+obj-$(CONFIG_POWERNV) += pnv_phb4.o pnv_phb4_pec.o
+obj-$(CONFIG_POWERNV) += pnv_phb3.o pnv_phb3_msi.o pnv_phb3_pbcq.o
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
new file mode 100644
index 0000000000..74618fadf0
--- /dev/null
+++ b/hw/pci-host/pnv_phb3.c
@@ -0,0 +1,1197 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci/pcie_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/ppc/pnv.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+
+#define phb3_error(phb, fmt, ...)                                       \
+    qemu_log_mask(LOG_GUEST_ERROR, "phb3[%d:%d]: " fmt "\n",            \
+                  (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
+
+static PCIDevice *pnv_phb3_find_cfg_dev(PnvPHB3 *phb)
+{
+    PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+    uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
+    uint8_t bus, devfn;
+
+    if (!(addr >> 63)) {
+        return NULL;
+    }
+    bus = (addr >> 52) & 0xff;
+    devfn = (addr >> 44) & 0xff;
+
+    return pci_find_device(pci->bus, bus, devfn);
+}
+
+/*
+ * The CONFIG_DATA register expects little endian accesses, but as the
+ * region is big endian, we have to swap the value.
+ */
+static void pnv_phb3_config_write(PnvPHB3 *phb, unsigned off,
+                                  unsigned size, uint64_t val)
+{
+    uint32_t cfg_addr, limit;
+    PCIDevice *pdev;
+
+    pdev = pnv_phb3_find_cfg_dev(phb);
+    if (!pdev) {
+        return;
+    }
+    cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+    cfg_addr |= off;
+    limit = pci_config_size(pdev);
+    if (limit <= cfg_addr) {
+        /*
+         * conventional pci device can be behind pcie-to-pci bridge.
+         * 256 <= addr < 4K has no effects.
+         */
+        return;
+    }
+    switch (size) {
+    case 1:
+        break;
+    case 2:
+        val = bswap16(val);
+        break;
+    case 4:
+        val = bswap32(val);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
+}
+
+static uint64_t pnv_phb3_config_read(PnvPHB3 *phb, unsigned off,
+                                     unsigned size)
+{
+    uint32_t cfg_addr, limit;
+    PCIDevice *pdev;
+    uint64_t val;
+
+    pdev = pnv_phb3_find_cfg_dev(phb);
+    if (!pdev) {
+        return ~0ull;
+    }
+    cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+    cfg_addr |= off;
+    limit = pci_config_size(pdev);
+    if (limit <= cfg_addr) {
+        /*
+         * conventional pci device can be behind pcie-to-pci bridge.
+         * 256 <= addr < 4K has no effects.
+         */
+        return ~0ull;
+    }
+    val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
+    switch (size) {
+    case 1:
+        return val;
+    case 2:
+        return bswap16(val);
+    case 4:
+        return bswap32(val);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void pnv_phb3_check_m32(PnvPHB3 *phb)
+{
+    uint64_t base, start, size;
+    MemoryRegion *parent;
+    PnvPBCQState *pbcq = &phb->pbcq;
+
+    if (memory_region_is_mapped(&phb->mr_m32)) {
+        memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32);
+    }
+
+    if (!(phb->regs[PHB_PHB3_CONFIG >> 3] & PHB_PHB3C_M32_EN)) {
+        return;
+    }
+
+    /* Grab geometry from registers */
+    base = phb->regs[PHB_M32_BASE_ADDR >> 3];
+    start = phb->regs[PHB_M32_START_ADDR >> 3];
+    size = ~(phb->regs[PHB_M32_BASE_MASK >> 3] | 0xfffc000000000000ull) + 1;
+
+    /* Check if it matches an enabled MMIO region in the PBCQ */
+    if (memory_region_is_mapped(&pbcq->mmbar0) &&
+        base >= pbcq->mmio0_base &&
+        (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
+        parent = &pbcq->mmbar0;
+        base -= pbcq->mmio0_base;
+    } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
+               base >= pbcq->mmio1_base &&
+               (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
+        parent = &pbcq->mmbar1;
+        base -= pbcq->mmio1_base;
+    } else {
+        return;
+    }
+
+    /* Create alias */
+    memory_region_init_alias(&phb->mr_m32, OBJECT(phb), "phb3-m32",
+                             &phb->pci_mmio, start, size);
+    memory_region_add_subregion(parent, base, &phb->mr_m32);
+}
+
+static void pnv_phb3_check_m64(PnvPHB3 *phb, uint32_t index)
+{
+    uint64_t base, start, size, m64;
+    MemoryRegion *parent;
+    PnvPBCQState *pbcq = &phb->pbcq;
+
+    if (memory_region_is_mapped(&phb->mr_m64[index])) {
+        /* Should we destroy it in RCU friendly way... ? */
+        memory_region_del_subregion(phb->mr_m64[index].container,
+                                    &phb->mr_m64[index]);
+    }
+
+    /* Get table entry */
+    m64 = phb->ioda_M64BT[index];
+
+    if (!(m64 & IODA2_M64BT_ENABLE)) {
+        return;
+    }
+
+    /* Grab geometry from registers */
+    base = GETFIELD(IODA2_M64BT_BASE, m64) << 20;
+    if (m64 & IODA2_M64BT_SINGLE_PE) {
+        base &= ~0x1ffffffull;
+    }
+    size = GETFIELD(IODA2_M64BT_MASK, m64) << 20;
+    size |= 0xfffc000000000000ull;
+    size = ~size + 1;
+    start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
+
+    /* Check if it matches an enabled MMIO region in the PBCQ */
+    if (memory_region_is_mapped(&pbcq->mmbar0) &&
+        base >= pbcq->mmio0_base &&
+        (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
+        parent = &pbcq->mmbar0;
+        base -= pbcq->mmio0_base;
+    } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
+               base >= pbcq->mmio1_base &&
+               (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
+        parent = &pbcq->mmbar1;
+        base -= pbcq->mmio1_base;
+    } else {
+        return;
+    }
+
+    /* Create alias */
+    memory_region_init_alias(&phb->mr_m64[index], OBJECT(phb), "phb3-m64",
+                             &phb->pci_mmio, start, size);
+    memory_region_add_subregion(parent, base, &phb->mr_m64[index]);
+}
+
+static void pnv_phb3_check_all_m64s(PnvPHB3 *phb)
+{
+    uint64_t i;
+
+    for (i = 0; i < PNV_PHB3_NUM_M64; i++) {
+        pnv_phb3_check_m64(phb, i);
+    }
+}
+
+static void pnv_phb3_lxivt_write(PnvPHB3 *phb, unsigned idx, uint64_t val)
+{
+    uint8_t server, prio;
+
+    phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER |
+                                  IODA2_LXIVT_PRIORITY |
+                                  IODA2_LXIVT_NODE_ID);
+    server = GETFIELD(IODA2_LXIVT_SERVER, val);
+    prio = GETFIELD(IODA2_LXIVT_PRIORITY, val);
+
+    /*
+     * The low order 2 bits are the link pointer (Type II interrupts).
+     * Shift back to get a valid IRQ server.
+     */
+    server >>= 2;
+
+    ics_write_xive(&phb->lsis, idx, server, prio, prio);
+}
+
+static uint64_t *pnv_phb3_ioda_access(PnvPHB3 *phb,
+                                      unsigned *out_table, unsigned *out_idx)
+{
+    uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+    unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
+    unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
+    unsigned int mask;
+    uint64_t *tptr = NULL;
+
+    switch (table) {
+    case IODA2_TBL_LIST:
+        tptr = phb->ioda_LIST;
+        mask = 7;
+        break;
+    case IODA2_TBL_LXIVT:
+        tptr = phb->ioda_LXIVT;
+        mask = 7;
+        break;
+    case IODA2_TBL_IVC_CAM:
+    case IODA2_TBL_RBA:
+        mask = 31;
+        break;
+    case IODA2_TBL_RCAM:
+        mask = 63;
+        break;
+    case IODA2_TBL_MRT:
+        mask = 7;
+        break;
+    case IODA2_TBL_PESTA:
+    case IODA2_TBL_PESTB:
+        mask = 255;
+        break;
+    case IODA2_TBL_TVT:
+        tptr = phb->ioda_TVT;
+        mask = 511;
+        break;
+    case IODA2_TBL_TCAM:
+    case IODA2_TBL_TDR:
+        mask = 63;
+        break;
+    case IODA2_TBL_M64BT:
+        tptr = phb->ioda_M64BT;
+        mask = 15;
+        break;
+    case IODA2_TBL_M32DT:
+        tptr = phb->ioda_MDT;
+        mask = 255;
+        break;
+    case IODA2_TBL_PEEV:
+        tptr = phb->ioda_PEEV;
+        mask = 3;
+        break;
+    default:
+        phb3_error(phb, "invalid IODA table %d", table);
+        return NULL;
+    }
+    index &= mask;
+    if (out_idx) {
+        *out_idx = index;
+    }
+    if (out_table) {
+        *out_table = table;
+    }
+    if (tptr) {
+        tptr += index;
+    }
+    if (adreg & PHB_IODA_AD_AUTOINC) {
+        index = (index + 1) & mask;
+        adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
+    }
+    phb->regs[PHB_IODA_ADDR >> 3] = adreg;
+    return tptr;
+}
+
+static uint64_t pnv_phb3_ioda_read(PnvPHB3 *phb)
+{
+        unsigned table;
+        uint64_t *tptr;
+
+        tptr = pnv_phb3_ioda_access(phb, &table, NULL);
+        if (!tptr) {
+            /* Return 0 on unsupported tables, not ff's */
+            return 0;
+        }
+        return *tptr;
+}
+
+static void pnv_phb3_ioda_write(PnvPHB3 *phb, uint64_t val)
+{
+        unsigned table, idx;
+        uint64_t *tptr;
+
+        tptr = pnv_phb3_ioda_access(phb, &table, &idx);
+        if (!tptr) {
+            return;
+        }
+
+        /* Handle side effects */
+        switch (table) {
+        case IODA2_TBL_LXIVT:
+            pnv_phb3_lxivt_write(phb, idx, val);
+            break;
+        case IODA2_TBL_M64BT:
+            *tptr = val;
+            pnv_phb3_check_m64(phb, idx);
+            break;
+        default:
+            *tptr = val;
+        }
+}
+
+/*
+ * This is called whenever the PHB LSI, MSI source ID register or
+ * the PBCQ irq filters are written.
+ */
+void pnv_phb3_remap_irqs(PnvPHB3 *phb)
+{
+    ICSState *ics = &phb->lsis;
+    uint32_t local, global, count, mask, comp;
+    uint64_t baren;
+    PnvPBCQState *pbcq = &phb->pbcq;
+
+    /*
+     * First check if we are enabled. Unlike real HW we don't separate
+     * TX and RX so we enable if both are set
+     */
+    baren = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
+    if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) ||
+        !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) {
+        ics->offset = 0;
+        return;
+    }
+
+    /* Grab local LSI source ID */
+    local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3;
+
+    /* Grab global one and compare */
+    global = GETFIELD(PBCQ_NEST_LSI_SRC,
+                      pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3;
+    if (global != local) {
+        /*
+         * This happens during initialization, let's come back when we
+         * are properly configured
+         */
+        ics->offset = 0;
+        return;
+    }
+
+    /* Get the base on the powerbus */
+    comp = GETFIELD(PBCQ_NEST_IRSN_COMP,
+                    pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]);
+    mask = GETFIELD(PBCQ_NEST_IRSN_COMP,
+                    pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]);
+    count = ((~mask) + 1) & 0x7ffff;
+    phb->total_irq = count;
+
+    /* Sanity checks */
+    if ((global + PNV_PHB3_NUM_LSI) > count) {
+        phb3_error(phb, "LSIs out of reach: LSI base=%d total irq=%d", global,
+                   count);
+    }
+
+    if (count > 2048) {
+        phb3_error(phb, "More interrupts than supported: %d", count);
+    }
+
+    if ((comp & mask) != comp) {
+        phb3_error(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x",
+                   comp, mask);
+        comp &= mask;
+    }
+    /* Setup LSI offset */
+    ics->offset = comp + global;
+
+    /* Setup MSI offset */
+    pnv_phb3_msi_update_config(&phb->msis, comp, count - PNV_PHB3_NUM_LSI);
+}
+
+static void pnv_phb3_lsi_src_id_write(PnvPHB3 *phb, uint64_t val)
+{
+    /* Sanitize content */
+    val &= PHB_LSI_SRC_ID;
+    phb->regs[PHB_LSI_SOURCE_ID >> 3] = val;
+    pnv_phb3_remap_irqs(phb);
+}
+
+static void pnv_phb3_rtc_invalidate(PnvPHB3 *phb, uint64_t val)
+{
+    PnvPhb3DMASpace *ds;
+
+    /* Always invalidate all for now ... */
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        ds->pe_num = PHB_INVALID_PE;
+    }
+}
+
+
+static void pnv_phb3_update_msi_regions(PnvPhb3DMASpace *ds)
+{
+    uint64_t cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
+
+    if (cfg & PHB_PHB3C_32BIT_MSI_EN) {
+        if (!memory_region_is_mapped(&ds->msi32_mr)) {
+            memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+                                        0xffff0000, &ds->msi32_mr);
+        }
+    } else {
+        if (memory_region_is_mapped(&ds->msi32_mr)) {
+            memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+                                        &ds->msi32_mr);
+        }
+    }
+
+    if (cfg & PHB_PHB3C_64BIT_MSI_EN) {
+        if (!memory_region_is_mapped(&ds->msi64_mr)) {
+            memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+                                        (1ull << 60), &ds->msi64_mr);
+        }
+    } else {
+        if (memory_region_is_mapped(&ds->msi64_mr)) {
+            memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+                                        &ds->msi64_mr);
+        }
+    }
+}
+
+static void pnv_phb3_update_all_msi_regions(PnvPHB3 *phb)
+{
+    PnvPhb3DMASpace *ds;
+
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        pnv_phb3_update_msi_regions(ds);
+    }
+}
+
+void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size)
+{
+    PnvPHB3 *phb = opaque;
+    bool changed;
+
+    /* Special case configuration data */
+    if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+        pnv_phb3_config_write(phb, off & 0x3, size, val);
+        return;
+    }
+
+    /* Other registers are 64-bit only */
+    if (size != 8 || off & 0x7) {
+        phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+                   off, size);
+        return;
+    }
+
+    /* Handle masking & filtering */
+    switch (off) {
+    case PHB_M64_UPPER_BITS:
+        val &= 0xfffc000000000000ull;
+        break;
+    case PHB_Q_DMA_R:
+        /*
+         * This is enough logic to make SW happy but we aren't actually
+         * quiescing the DMAs
+         */
+        if (val & PHB_Q_DMA_R_AUTORESET) {
+            val = 0;
+        } else {
+            val &= PHB_Q_DMA_R_QUIESCE_DMA;
+        }
+        break;
+    /* LEM stuff */
+    case PHB_LEM_FIR_AND_MASK:
+        phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
+        return;
+    case PHB_LEM_FIR_OR_MASK:
+        phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
+        return;
+    case PHB_LEM_ERROR_AND_MASK:
+        phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
+        return;
+    case PHB_LEM_ERROR_OR_MASK:
+        phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
+        return;
+    case PHB_LEM_WOF:
+        val = 0;
+        break;
+    }
+
+    /* Record whether it changed */
+    changed = phb->regs[off >> 3] != val;
+
+    /* Store in register cache first */
+    phb->regs[off >> 3] = val;
+
+    /* Handle side effects */
+    switch (off) {
+    case PHB_PHB3_CONFIG:
+        if (changed) {
+            pnv_phb3_update_all_msi_regions(phb);
+        }
+        /* fall through */
+    case PHB_M32_BASE_ADDR:
+    case PHB_M32_BASE_MASK:
+    case PHB_M32_START_ADDR:
+        if (changed) {
+            pnv_phb3_check_m32(phb);
+        }
+        break;
+    case PHB_M64_UPPER_BITS:
+        if (changed) {
+            pnv_phb3_check_all_m64s(phb);
+        }
+        break;
+    case PHB_LSI_SOURCE_ID:
+        if (changed) {
+            pnv_phb3_lsi_src_id_write(phb, val);
+        }
+        break;
+
+    /* IODA table accesses */
+    case PHB_IODA_DATA0:
+        pnv_phb3_ioda_write(phb, val);
+        break;
+
+    /* RTC invalidation */
+    case PHB_RTC_INVALIDATE:
+        pnv_phb3_rtc_invalidate(phb, val);
+        break;
+
+    /* FFI request */
+    case PHB_FFI_REQUEST:
+        pnv_phb3_msi_ffi(&phb->msis, val);
+        break;
+
+    /* Silent simple writes */
+    case PHB_CONFIG_ADDRESS:
+    case PHB_IODA_ADDR:
+    case PHB_TCE_KILL:
+    case PHB_TCE_SPEC_CTL:
+    case PHB_PEST_BAR:
+    case PHB_PELTV_BAR:
+    case PHB_RTT_BAR:
+    case PHB_RBA_BAR:
+    case PHB_IVT_BAR:
+    case PHB_FFI_LOCK:
+    case PHB_LEM_FIR_ACCUM:
+    case PHB_LEM_ERROR_MASK:
+    case PHB_LEM_ACTION0:
+    case PHB_LEM_ACTION1:
+        break;
+
+    /* Noise on anything else */
+    default:
+        qemu_log_mask(LOG_UNIMP, "phb3: reg_write 0x%"PRIx64"=%"PRIx64"\n",
+                      off, val);
+    }
+}
+
+uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size)
+{
+    PnvPHB3 *phb = opaque;
+    PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+    uint64_t val;
+
+    if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+        return pnv_phb3_config_read(phb, off & 0x3, size);
+    }
+
+    /* Other registers are 64-bit only */
+    if (size != 8 || off & 0x7) {
+        phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+                   off, size);
+        return ~0ull;
+    }
+
+    /* Default read from cache */
+    val = phb->regs[off >> 3];
+
+    switch (off) {
+    /* Simulate venice DD2.0 */
+    case PHB_VERSION:
+        return 0x000000a300000005ull;
+    case PHB_PCIE_SYSTEM_CONFIG:
+        return 0x441100fc30000000;
+
+    /* IODA table accesses */
+    case PHB_IODA_DATA0:
+        return pnv_phb3_ioda_read(phb);
+
+    /* Link training always appears trained */
+    case PHB_PCIE_DLP_TRAIN_CTL:
+        if (!pci_find_device(pci->bus, 1, 0)) {
+            return 0;
+        }
+        return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TC_DL_LINKACT;
+
+    /* FFI Lock */
+    case PHB_FFI_LOCK:
+        /* Set lock and return previous value */
+        phb->regs[off >> 3] |= PHB_FFI_LOCK_STATE;
+        return val;
+
+    /* DMA read sync: make it look like it's complete */
+    case PHB_DMARD_SYNC:
+        return PHB_DMARD_SYNC_COMPLETE;
+
+    /* Silent simple reads */
+    case PHB_PHB3_CONFIG:
+    case PHB_M32_BASE_ADDR:
+    case PHB_M32_BASE_MASK:
+    case PHB_M32_START_ADDR:
+    case PHB_CONFIG_ADDRESS:
+    case PHB_IODA_ADDR:
+    case PHB_RTC_INVALIDATE:
+    case PHB_TCE_KILL:
+    case PHB_TCE_SPEC_CTL:
+    case PHB_PEST_BAR:
+    case PHB_PELTV_BAR:
+    case PHB_RTT_BAR:
+    case PHB_RBA_BAR:
+    case PHB_IVT_BAR:
+    case PHB_M64_UPPER_BITS:
+    case PHB_LEM_FIR_ACCUM:
+    case PHB_LEM_ERROR_MASK:
+    case PHB_LEM_ACTION0:
+    case PHB_LEM_ACTION1:
+        break;
+
+    /* Noise on anything else */
+    default:
+        qemu_log_mask(LOG_UNIMP, "phb3: reg_read 0x%"PRIx64"=%"PRIx64"\n",
+                      off, val);
+    }
+    return val;
+}
+
+static const MemoryRegionOps pnv_phb3_reg_ops = {
+    .read = pnv_phb3_reg_read,
+    .write = pnv_phb3_reg_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int pnv_phb3_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+    /* Check that out properly ... */
+    return irq_num & 3;
+}
+
+static void pnv_phb3_set_irq(void *opaque, int irq_num, int level)
+{
+    PnvPHB3 *phb = opaque;
+
+    /* LSI only ... */
+    if (irq_num > 3) {
+        phb3_error(phb, "Unknown IRQ to set %d", irq_num);
+    }
+    qemu_set_irq(phb->qirqs[irq_num], level);
+}
+
+static bool pnv_phb3_resolve_pe(PnvPhb3DMASpace *ds)
+{
+    uint64_t rtt, addr;
+    uint16_t rte;
+    int bus_num;
+
+    /* Already resolved ? */
+    if (ds->pe_num != PHB_INVALID_PE) {
+        return true;
+    }
+
+    /* We need to lookup the RTT */
+    rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
+    if (!(rtt & PHB_RTT_BAR_ENABLE)) {
+        phb3_error(ds->phb, "DMA with RTT BAR disabled !");
+        /* Set error bits ? fence ? ... */
+        return false;
+    }
+
+    /* Read RTE */
+    bus_num = pci_bus_num(ds->bus);
+    addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
+    addr += 2 * ((bus_num << 8) | ds->devfn);
+    if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
+        phb3_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
+        /* Set error bits ? fence ? ... */
+        return false;
+    }
+    rte = be16_to_cpu(rte);
+
+    /* Fail upon reading of invalid PE# */
+    if (rte >= PNV_PHB3_NUM_PE) {
+        phb3_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
+        /* Set error bits ? fence ? ... */
+        return false;
+    }
+    ds->pe_num = rte;
+    return true;
+}
+
+static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr,
+                                   bool is_write, uint64_t tve,
+                                   IOMMUTLBEntry *tlb)
+{
+    uint64_t tta = GETFIELD(IODA2_TVT_TABLE_ADDR, tve);
+    int32_t  lev = GETFIELD(IODA2_TVT_NUM_LEVELS, tve);
+    uint32_t tts = GETFIELD(IODA2_TVT_TCE_TABLE_SIZE, tve);
+    uint32_t tps = GETFIELD(IODA2_TVT_IO_PSIZE, tve);
+    PnvPHB3 *phb = ds->phb;
+
+    /* Invalid levels */
+    if (lev > 4) {
+        phb3_error(phb, "Invalid #levels in TVE %d", lev);
+        return;
+    }
+
+    /* IO Page Size of 0 means untranslated, else use TCEs */
+    if (tps == 0) {
+        /*
+         * We only support non-translate in top window.
+         *
+         * TODO: Venice/Murano support it on bottom window above 4G and
+         * Naples suports it on everything
+         */
+        if (!(tve & PPC_BIT(51))) {
+            phb3_error(phb, "xlate for invalid non-translate TVE");
+            return;
+        }
+        /* TODO: Handle boundaries */
+
+        /* Use 4k pages like q35 ... for now */
+        tlb->iova = addr & 0xfffffffffffff000ull;
+        tlb->translated_addr = addr & 0x0003fffffffff000ull;
+        tlb->addr_mask = 0xfffull;
+        tlb->perm = IOMMU_RW;
+    } else {
+        uint32_t tce_shift, tbl_shift, sh;
+        uint64_t base, taddr, tce, tce_mask;
+
+        /* TVE disabled ? */
+        if (tts == 0) {
+            phb3_error(phb, "xlate for invalid translated TVE");
+            return;
+        }
+
+        /* Address bits per bottom level TCE entry */
+        tce_shift = tps + 11;
+
+        /* Address bits per table level */
+        tbl_shift = tts + 8;
+
+        /* Top level table base address */
+        base = tta << 12;
+
+        /* Total shift to first level */
+        sh = tbl_shift * lev + tce_shift;
+
+        /* TODO: Multi-level untested */
+        while ((lev--) >= 0) {
+            /* Grab the TCE address */
+            taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
+            if (dma_memory_read(&address_space_memory, taddr, &tce,
+                                sizeof(tce))) {
+                phb3_error(phb, "Failed to read TCE at 0x%"PRIx64, taddr);
+                return;
+            }
+            tce = be64_to_cpu(tce);
+
+            /* Check permission for indirect TCE */
+            if ((lev >= 0) && !(tce & 3)) {
+                phb3_error(phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
+                phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+                           is_write ? 'W' : 'R', tve);
+                phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+                           tta, lev, tts, tps);
+                return;
+            }
+            sh -= tbl_shift;
+            base = tce & ~0xfffull;
+        }
+
+        /* We exit the loop with TCE being the final TCE */
+        tce_mask = ~((1ull << tce_shift) - 1);
+        tlb->iova = addr & tce_mask;
+        tlb->translated_addr = tce & tce_mask;
+        tlb->addr_mask = ~tce_mask;
+        tlb->perm = tce & 3;
+        if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
+            phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr);
+            phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+                       is_write ? 'W' : 'R', tve);
+            phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+                       tta, lev, tts, tps);
+        }
+    }
+}
+
+static IOMMUTLBEntry pnv_phb3_translate_iommu(IOMMUMemoryRegion *iommu,
+                                              hwaddr addr,
+                                              IOMMUAccessFlags flag,
+                                              int iommu_idx)
+{
+    PnvPhb3DMASpace *ds = container_of(iommu, PnvPhb3DMASpace, dma_mr);
+    int tve_sel;
+    uint64_t tve, cfg;
+    IOMMUTLBEntry ret = {
+        .target_as = &address_space_memory,
+        .iova = addr,
+        .translated_addr = 0,
+        .addr_mask = ~(hwaddr)0,
+        .perm = IOMMU_NONE,
+    };
+    PnvPHB3 *phb = ds->phb;
+
+    /* Resolve PE# */
+    if (!pnv_phb3_resolve_pe(ds)) {
+        phb3_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+                   ds->bus, pci_bus_num(ds->bus), ds->devfn);
+        return ret;
+    }
+
+    /* Check top bits */
+    switch (addr >> 60) {
+    case 00:
+        /* DMA or 32-bit MSI ? */
+        cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
+        if ((cfg & PHB_PHB3C_32BIT_MSI_EN) &&
+            ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
+            phb3_error(phb, "xlate on 32-bit MSI region");
+            return ret;
+        }
+        /* Choose TVE XXX Use PHB3 Control Register */
+        tve_sel = (addr >> 59) & 1;
+        tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
+        pnv_phb3_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
+        break;
+    case 01:
+        phb3_error(phb, "xlate on 64-bit MSI region");
+        break;
+    default:
+        phb3_error(phb, "xlate on unsupported address 0x%"PRIx64, addr);
+    }
+    return ret;
+}
+
+#define TYPE_PNV_PHB3_IOMMU_MEMORY_REGION "pnv-phb3-iommu-memory-region"
+#define PNV_PHB3_IOMMU_MEMORY_REGION(obj) \
+    OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB3_IOMMU_MEMORY_REGION)
+
+static void pnv_phb3_iommu_memory_region_class_init(ObjectClass *klass,
+                                                    void *data)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+    imrc->translate = pnv_phb3_translate_iommu;
+}
+
+static const TypeInfo pnv_phb3_iommu_memory_region_info = {
+    .parent = TYPE_IOMMU_MEMORY_REGION,
+    .name = TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
+    .class_init = pnv_phb3_iommu_memory_region_class_init,
+};
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ */
+static void pnv_phb3_msi_write(void *opaque, hwaddr addr,
+                               uint64_t data, unsigned size)
+{
+    PnvPhb3DMASpace *ds = opaque;
+
+    /* Resolve PE# */
+    if (!pnv_phb3_resolve_pe(ds)) {
+        phb3_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+                   ds->bus, pci_bus_num(ds->bus), ds->devfn);
+        return;
+    }
+
+    pnv_phb3_msi_send(&ds->phb->msis, addr, data, ds->pe_num);
+}
+
+/* There is no .read as the read result is undefined by PCI spec */
+static uint64_t pnv_phb3_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvPhb3DMASpace *ds = opaque;
+
+    phb3_error(ds->phb, "invalid read @ 0x%" HWADDR_PRIx, addr);
+    return -1;
+}
+
+static const MemoryRegionOps pnv_phb3_msi_ops = {
+    .read = pnv_phb3_msi_read,
+    .write = pnv_phb3_msi_write,
+    .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    PnvPHB3 *phb = opaque;
+    PnvPhb3DMASpace *ds;
+
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        if (ds->bus == bus && ds->devfn == devfn) {
+            break;
+        }
+    }
+
+    if (ds == NULL) {
+        ds = g_malloc0(sizeof(PnvPhb3DMASpace));
+        ds->bus = bus;
+        ds->devfn = devfn;
+        ds->pe_num = PHB_INVALID_PE;
+        ds->phb = phb;
+        memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
+                                 TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
+                                 OBJECT(phb), "phb3_iommu", UINT64_MAX);
+        address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
+                           "phb3_iommu");
+        memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb3_msi_ops,
+                              ds, "msi32", 0x10000);
+        memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb3_msi_ops,
+                              ds, "msi64", 0x100000);
+        pnv_phb3_update_msi_regions(ds);
+
+        QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
+    }
+    return &ds->dma_as;
+}
+
+static void pnv_phb3_instance_init(Object *obj)
+{
+    PnvPHB3 *phb = PNV_PHB3(obj);
+
+    QLIST_INIT(&phb->dma_spaces);
+
+    /* LSI sources */
+    object_initialize_child(obj, "lsi", &phb->lsis, sizeof(phb->lsis),
+                             TYPE_ICS, &error_abort, NULL);
+
+    /* Default init ... will be fixed by HW inits */
+    phb->lsis.offset = 0;
+
+    /* MSI sources */
+    object_initialize_child(obj, "msi", &phb->msis, sizeof(phb->msis),
+                            TYPE_PHB3_MSI, &error_abort, NULL);
+
+    /* Power Bus Common Queue */
+    object_initialize_child(obj, "pbcq", &phb->pbcq, sizeof(phb->pbcq),
+                            TYPE_PNV_PBCQ, &error_abort, NULL);
+
+    /* Root Port */
+    object_initialize_child(obj, "root", &phb->root, sizeof(phb->root),
+                            TYPE_PNV_PHB3_ROOT_PORT, &error_abort, NULL);
+    qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
+    qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
+}
+
+static void pnv_phb3_realize(DeviceState *dev, Error **errp)
+{
+    PnvPHB3 *phb = PNV_PHB3(dev);
+    PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+    PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+    Error *local_err = NULL;
+    int i;
+
+    if (phb->phb_id >= PNV8_CHIP_PHB3_MAX) {
+        error_setg(errp, "invalid PHB index: %d", phb->phb_id);
+        return;
+    }
+
+    /* LSI sources */
+    object_property_set_link(OBJECT(&phb->lsis), OBJECT(pnv), "xics",
+                                   &error_abort);
+    object_property_set_int(OBJECT(&phb->lsis), PNV_PHB3_NUM_LSI, "nr-irqs",
+                            &error_abort);
+    object_property_set_bool(OBJECT(&phb->lsis), true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    for (i = 0; i < phb->lsis.nr_irqs; i++) {
+        ics_set_irq_type(&phb->lsis, i, true);
+    }
+
+    phb->qirqs = qemu_allocate_irqs(ics_set_irq, &phb->lsis, phb->lsis.nr_irqs);
+
+    /* MSI sources */
+    object_property_set_link(OBJECT(&phb->msis), OBJECT(phb), "phb",
+                                   &error_abort);
+    object_property_set_link(OBJECT(&phb->msis), OBJECT(pnv), "xics",
+                                   &error_abort);
+    object_property_set_int(OBJECT(&phb->msis), PHB3_MAX_MSI, "nr-irqs",
+                            &error_abort);
+    object_property_set_bool(OBJECT(&phb->msis), true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Power Bus Common Queue */
+    object_property_set_link(OBJECT(&phb->pbcq), OBJECT(phb), "phb",
+                                   &error_abort);
+    object_property_set_bool(OBJECT(&phb->pbcq), true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Controller Registers */
+    memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb3_reg_ops, phb,
+                          "phb3-regs", 0x1000);
+
+    /*
+     * PHB3 doesn't support IO space. However, qemu gets very upset if
+     * we don't have an IO region to anchor IO BARs onto so we just
+     * initialize one which we never hook up to anything
+     */
+    memory_region_init(&phb->pci_io, OBJECT(phb), "pci-io", 0x10000);
+    memory_region_init(&phb->pci_mmio, OBJECT(phb), "pci-mmio",
+                       PCI_MMIO_TOTAL_SIZE);
+
+    pci->bus = pci_register_root_bus(dev, "root-bus",
+                                     pnv_phb3_set_irq, pnv_phb3_map_irq, phb,
+                                     &phb->pci_mmio, &phb->pci_io,
+                                     0, 4, TYPE_PNV_PHB3_ROOT_BUS);
+
+    pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
+
+    /* Add a single Root port */
+    qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
+    qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
+    qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus));
+    qdev_init_nofail(DEVICE(&phb->root));
+}
+
+void pnv_phb3_update_regions(PnvPHB3 *phb)
+{
+    PnvPBCQState *pbcq = &phb->pbcq;
+
+    /* Unmap first always */
+    if (memory_region_is_mapped(&phb->mr_regs)) {
+        memory_region_del_subregion(&pbcq->phbbar, &phb->mr_regs);
+    }
+
+    /* Map registers if enabled */
+    if (memory_region_is_mapped(&pbcq->phbbar)) {
+        /* TODO: We should use the PHB BAR 2 register but we don't ... */
+        memory_region_add_subregion(&pbcq->phbbar, 0, &phb->mr_regs);
+    }
+
+    /* Check/update m32 */
+    if (memory_region_is_mapped(&phb->mr_m32)) {
+        pnv_phb3_check_m32(phb);
+    }
+    pnv_phb3_check_all_m64s(phb);
+}
+
+static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge,
+                                          PCIBus *rootbus)
+{
+    PnvPHB3 *phb = PNV_PHB3(host_bridge);
+
+    snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
+             phb->chip_id, phb->phb_id);
+    return phb->bus_path;
+}
+
+static Property pnv_phb3_properties[] = {
+        DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0),
+        DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0),
+        DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_phb3_class_init(ObjectClass *klass, void *data)
+{
+    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    hc->root_bus_path = pnv_phb3_root_bus_path;
+    dc->realize = pnv_phb3_realize;
+    device_class_set_props(dc, pnv_phb3_properties);
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_phb3_type_info = {
+    .name          = TYPE_PNV_PHB3,
+    .parent        = TYPE_PCIE_HOST_BRIDGE,
+    .instance_size = sizeof(PnvPHB3),
+    .class_init    = pnv_phb3_class_init,
+    .instance_init = pnv_phb3_instance_init,
+};
+
+static void pnv_phb3_root_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    /*
+     * PHB3 has only a single root complex. Enforce the limit on the
+     * parent bus
+     */
+    k->max_dev = 1;
+}
+
+static const TypeInfo pnv_phb3_root_bus_info = {
+    .name = TYPE_PNV_PHB3_ROOT_BUS,
+    .parent = TYPE_PCIE_BUS,
+    .class_init = pnv_phb3_root_bus_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { INTERFACE_PCIE_DEVICE },
+        { }
+    },
+};
+
+static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp)
+{
+    PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    rpc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
+
+    dc->desc     = "IBM PHB3 PCIE Root Port";
+
+    device_class_set_parent_realize(dc, pnv_phb3_root_port_realize,
+                                    &rpc->parent_realize);
+    dc->user_creatable = false;
+
+    k->vendor_id = PCI_VENDOR_ID_IBM;
+    k->device_id = 0x03dc;
+    k->revision  = 0;
+
+    rpc->exp_offset = 0x48;
+    rpc->aer_offset = 0x100;
+}
+
+static const TypeInfo pnv_phb3_root_port_info = {
+    .name          = TYPE_PNV_PHB3_ROOT_PORT,
+    .parent        = TYPE_PCIE_ROOT_PORT,
+    .instance_size = sizeof(PnvPHB3RootPort),
+    .class_init    = pnv_phb3_root_port_class_init,
+};
+
+static void pnv_phb3_register_types(void)
+{
+    type_register_static(&pnv_phb3_root_bus_info);
+    type_register_static(&pnv_phb3_root_port_info);
+    type_register_static(&pnv_phb3_type_info);
+    type_register_static(&pnv_phb3_iommu_memory_region_info);
+}
+
+type_init(pnv_phb3_register_types)
diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c
new file mode 100644
index 0000000000..ecfc1b2c4e
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_msi.c
@@ -0,0 +1,349 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/ppc/pnv.h"
+#include "hw/pci/msi.h"
+#include "monitor/monitor.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/reset.h"
+
+static uint64_t phb3_msi_ive_addr(PnvPHB3 *phb, int srcno)
+{
+    uint64_t ivtbar = phb->regs[PHB_IVT_BAR >> 3];
+    uint64_t phbctl = phb->regs[PHB_CONTROL >> 3];
+
+    if (!(ivtbar & PHB_IVT_BAR_ENABLE)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Failed access to disable IVT BAR !");
+        return 0;
+    }
+
+    if (srcno >= (ivtbar & PHB_IVT_LENGTH_MASK)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "MSI out of bounds (%d vs  0x%"PRIx64")",
+                      srcno, (uint64_t) (ivtbar & PHB_IVT_LENGTH_MASK));
+        return 0;
+    }
+
+    ivtbar &= PHB_IVT_BASE_ADDRESS_MASK;
+
+    if (phbctl & PHB_CTRL_IVE_128_BYTES) {
+        return ivtbar + 128 * srcno;
+    } else {
+        return ivtbar + 16 * srcno;
+    }
+}
+
+static bool phb3_msi_read_ive(PnvPHB3 *phb, int srcno, uint64_t *out_ive)
+{
+    uint64_t ive_addr, ive;
+
+    ive_addr = phb3_msi_ive_addr(phb, srcno);
+    if (!ive_addr) {
+        return false;
+    }
+
+    if (dma_memory_read(&address_space_memory, ive_addr, &ive, sizeof(ive))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Failed to read IVE at 0x%" PRIx64,
+                      ive_addr);
+        return false;
+    }
+    *out_ive = be64_to_cpu(ive);
+
+    return true;
+}
+
+static void phb3_msi_set_p(Phb3MsiState *msi, int srcno, uint8_t gen)
+{
+    uint64_t ive_addr;
+    uint8_t p = 0x01 | (gen << 1);
+
+    ive_addr = phb3_msi_ive_addr(msi->phb, srcno);
+    if (!ive_addr) {
+        return;
+    }
+
+    if (dma_memory_write(&address_space_memory, ive_addr + 4, &p, 1)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Failed to write IVE (set P) at 0x%" PRIx64, ive_addr);
+    }
+}
+
+static void phb3_msi_set_q(Phb3MsiState *msi, int srcno)
+{
+    uint64_t ive_addr;
+    uint8_t q = 0x01;
+
+    ive_addr = phb3_msi_ive_addr(msi->phb, srcno);
+    if (!ive_addr) {
+        return;
+    }
+
+    if (dma_memory_write(&address_space_memory, ive_addr + 5, &q, 1)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Failed to write IVE (set Q) at 0x%" PRIx64, ive_addr);
+    }
+}
+
+static void phb3_msi_try_send(Phb3MsiState *msi, int srcno, bool force)
+{
+    ICSState *ics = ICS(msi);
+    uint64_t ive;
+    uint64_t server, prio, pq, gen;
+
+    if (!phb3_msi_read_ive(msi->phb, srcno, &ive)) {
+        return;
+    }
+
+    server = GETFIELD(IODA2_IVT_SERVER, ive);
+    prio = GETFIELD(IODA2_IVT_PRIORITY, ive);
+    if (!force) {
+        pq = GETFIELD(IODA2_IVT_Q, ive) | (GETFIELD(IODA2_IVT_P, ive) << 1);
+    } else {
+        pq = 0;
+    }
+    gen = GETFIELD(IODA2_IVT_GEN, ive);
+
+    /*
+     * The low order 2 bits are the link pointer (Type II interrupts).
+     * Shift back to get a valid IRQ server.
+     */
+    server >>= 2;
+
+    switch (pq) {
+    case 0: /* 00 */
+        if (prio == 0xff) {
+            /* Masked, set Q */
+            phb3_msi_set_q(msi, srcno);
+        } else {
+            /* Enabled, set P and send */
+            phb3_msi_set_p(msi, srcno, gen);
+            icp_irq(ics, server, srcno + ics->offset, prio);
+        }
+        break;
+    case 2: /* 10 */
+        /* Already pending, set Q */
+        phb3_msi_set_q(msi, srcno);
+        break;
+    case 1: /* 01 */
+    case 3: /* 11 */
+    default:
+        /* Just drop stuff if Q already set */
+        break;
+    }
+}
+
+static void phb3_msi_set_irq(void *opaque, int srcno, int val)
+{
+    Phb3MsiState *msi = PHB3_MSI(opaque);
+
+    if (val) {
+        phb3_msi_try_send(msi, srcno, false);
+    }
+}
+
+
+void pnv_phb3_msi_send(Phb3MsiState *msi, uint64_t addr, uint16_t data,
+                       int32_t dev_pe)
+{
+    ICSState *ics = ICS(msi);
+    uint64_t ive;
+    uint16_t pe;
+    uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
+
+    if (src >= ics->nr_irqs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "MSI %d out of bounds", src);
+        return;
+    }
+    if (dev_pe >= 0) {
+        if (!phb3_msi_read_ive(msi->phb, src, &ive)) {
+            return;
+        }
+        pe = GETFIELD(IODA2_IVT_PE, ive);
+        if (pe != dev_pe) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "MSI %d send by PE#%d but assigned to PE#%d",
+                          src, dev_pe, pe);
+            return;
+        }
+    }
+    qemu_irq_pulse(msi->qirqs[src]);
+}
+
+void pnv_phb3_msi_ffi(Phb3MsiState *msi, uint64_t val)
+{
+    /* Emit interrupt */
+    pnv_phb3_msi_send(msi, val, 0, -1);
+
+    /* Clear FFI lock */
+    msi->phb->regs[PHB_FFI_LOCK >> 3] = 0;
+}
+
+static void phb3_msi_reject(ICSState *ics, uint32_t nr)
+{
+    Phb3MsiState *msi = PHB3_MSI(ics);
+    unsigned int srcno = nr - ics->offset;
+    unsigned int idx = srcno >> 6;
+    unsigned int bit = 1ull << (srcno & 0x3f);
+
+    assert(srcno < PHB3_MAX_MSI);
+
+    msi->rba[idx] |= bit;
+    msi->rba_sum |= (1u << idx);
+}
+
+static void phb3_msi_resend(ICSState *ics)
+{
+    Phb3MsiState *msi = PHB3_MSI(ics);
+    unsigned int i, j;
+
+    if (msi->rba_sum == 0) {
+        return;
+    }
+
+    for (i = 0; i < 32; i++) {
+        if ((msi->rba_sum & (1u << i)) == 0) {
+            continue;
+        }
+        msi->rba_sum &= ~(1u << i);
+        for (j = 0; j < 64; j++) {
+            if ((msi->rba[i] & (1ull << j)) == 0) {
+                continue;
+            }
+            msi->rba[i] &= ~(1u << j);
+            phb3_msi_try_send(msi, i * 64 + j, true);
+        }
+    }
+}
+
+static void phb3_msi_reset(DeviceState *dev)
+{
+    Phb3MsiState *msi = PHB3_MSI(dev);
+    ICSStateClass *icsc = ICS_GET_CLASS(dev);
+
+    icsc->parent_reset(dev);
+
+    memset(msi->rba, 0, sizeof(msi->rba));
+    msi->rba_sum = 0;
+}
+
+static void phb3_msi_reset_handler(void *dev)
+{
+    phb3_msi_reset(dev);
+}
+
+void pnv_phb3_msi_update_config(Phb3MsiState *msi, uint32_t base,
+                                uint32_t count)
+{
+    ICSState *ics = ICS(msi);
+
+    if (count > PHB3_MAX_MSI) {
+        count = PHB3_MAX_MSI;
+    }
+    ics->nr_irqs = count;
+    ics->offset = base;
+}
+
+static void phb3_msi_realize(DeviceState *dev, Error **errp)
+{
+    Phb3MsiState *msi = PHB3_MSI(dev);
+    ICSState *ics = ICS(msi);
+    ICSStateClass *icsc = ICS_GET_CLASS(ics);
+    Error *local_err = NULL;
+
+    assert(msi->phb);
+
+    icsc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    msi->qirqs = qemu_allocate_irqs(phb3_msi_set_irq, msi, ics->nr_irqs);
+
+    qemu_register_reset(phb3_msi_reset_handler, dev);
+}
+
+static void phb3_msi_instance_init(Object *obj)
+{
+    Phb3MsiState *msi = PHB3_MSI(obj);
+    ICSState *ics = ICS(obj);
+
+    object_property_add_link(obj, "phb", TYPE_PNV_PHB3,
+                             (Object **)&msi->phb,
+                             object_property_allow_set_link,
+                             OBJ_PROP_LINK_STRONG,
+                             &error_abort);
+
+    /* Will be overriden later */
+    ics->offset = 0;
+}
+
+static void phb3_msi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ICSStateClass *isc = ICS_CLASS(klass);
+
+    device_class_set_parent_realize(dc, phb3_msi_realize,
+                                    &isc->parent_realize);
+    device_class_set_parent_reset(dc, phb3_msi_reset,
+                                  &isc->parent_reset);
+
+    isc->reject = phb3_msi_reject;
+    isc->resend = phb3_msi_resend;
+}
+
+static const TypeInfo phb3_msi_info = {
+    .name = TYPE_PHB3_MSI,
+    .parent = TYPE_ICS,
+    .instance_size = sizeof(Phb3MsiState),
+    .class_init = phb3_msi_class_init,
+    .class_size = sizeof(ICSStateClass),
+    .instance_init = phb3_msi_instance_init,
+};
+
+static void pnv_phb3_msi_register_types(void)
+{
+    type_register_static(&phb3_msi_info);
+}
+
+type_init(pnv_phb3_msi_register_types);
+
+void pnv_phb3_msi_pic_print_info(Phb3MsiState *msi, Monitor *mon)
+{
+    ICSState *ics = ICS(msi);
+    int i;
+
+    monitor_printf(mon, "ICS %4x..%4x %p\n",
+                   ics->offset, ics->offset + ics->nr_irqs - 1, ics);
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        uint64_t ive;
+
+        if (!phb3_msi_read_ive(msi->phb, i, &ive)) {
+            return;
+        }
+
+        if (GETFIELD(IODA2_IVT_PRIORITY, ive) == 0xff) {
+            continue;
+        }
+
+        monitor_printf(mon, "  %4x %c%c server=%04x prio=%02x gen=%d\n",
+                       ics->offset + i,
+                       GETFIELD(IODA2_IVT_P, ive) ? 'P' : '-',
+                       GETFIELD(IODA2_IVT_Q, ive) ? 'Q' : '-',
+                       (uint32_t) GETFIELD(IODA2_IVT_SERVER, ive) >> 2,
+                       (uint32_t) GETFIELD(IODA2_IVT_PRIORITY, ive),
+                       (uint32_t) GETFIELD(IODA2_IVT_GEN, ive));
+    }
+}
diff --git a/hw/pci-host/pnv_phb3_pbcq.c b/hw/pci-host/pnv_phb3_pbcq.c
new file mode 100644
index 0000000000..f232228b0e
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_pbcq.c
@@ -0,0 +1,358 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/log.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+
+#include <libfdt.h>
+
+#define phb3_pbcq_error(pbcq, fmt, ...)                                 \
+    qemu_log_mask(LOG_GUEST_ERROR, "phb3_pbcq[%d:%d]: " fmt "\n",       \
+                  (pbcq)->phb->chip_id, (pbcq)->phb->phb_id, ## __VA_ARGS__)
+
+static uint64_t pnv_pbcq_nest_xscom_read(void *opaque, hwaddr addr,
+                                         unsigned size)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+    uint32_t offset = addr >> 3;
+
+    return pbcq->nest_regs[offset];
+}
+
+static uint64_t pnv_pbcq_pci_xscom_read(void *opaque, hwaddr addr,
+                                        unsigned size)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+    uint32_t offset = addr >> 3;
+
+    return pbcq->pci_regs[offset];
+}
+
+static uint64_t pnv_pbcq_spci_xscom_read(void *opaque, hwaddr addr,
+                                         unsigned size)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+    uint32_t offset = addr >> 3;
+
+    if (offset == PBCQ_SPCI_ASB_DATA) {
+        return pnv_phb3_reg_read(pbcq->phb,
+                                 pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], 8);
+    }
+    return pbcq->spci_regs[offset];
+}
+
+static void pnv_pbcq_update_map(PnvPBCQState *pbcq)
+{
+    uint64_t bar_en = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
+    uint64_t bar, mask, size;
+
+    /*
+     * NOTE: This will really not work well if those are remapped
+     * after the PHB has created its sub regions. We could do better
+     * if we had a way to resize regions but we don't really care
+     * that much in practice as the stuff below really only happens
+     * once early during boot
+     */
+
+    /* Handle unmaps */
+    if (memory_region_is_mapped(&pbcq->mmbar0) &&
+        !(bar_en & PBCQ_NEST_BAR_EN_MMIO0)) {
+        memory_region_del_subregion(get_system_memory(), &pbcq->mmbar0);
+    }
+    if (memory_region_is_mapped(&pbcq->mmbar1) &&
+        !(bar_en & PBCQ_NEST_BAR_EN_MMIO1)) {
+        memory_region_del_subregion(get_system_memory(), &pbcq->mmbar1);
+    }
+    if (memory_region_is_mapped(&pbcq->phbbar) &&
+        !(bar_en & PBCQ_NEST_BAR_EN_PHB)) {
+        memory_region_del_subregion(get_system_memory(), &pbcq->phbbar);
+    }
+
+    /* Update PHB */
+    pnv_phb3_update_regions(pbcq->phb);
+
+    /* Handle maps */
+    if (!memory_region_is_mapped(&pbcq->mmbar0) &&
+        (bar_en & PBCQ_NEST_BAR_EN_MMIO0)) {
+        bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] >> 14;
+        mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0];
+        size = ((~mask) >> 14) + 1;
+        memory_region_init(&pbcq->mmbar0, OBJECT(pbcq), "pbcq-mmio0", size);
+        memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar0);
+        pbcq->mmio0_base = bar;
+        pbcq->mmio0_size = size;
+    }
+    if (!memory_region_is_mapped(&pbcq->mmbar1) &&
+        (bar_en & PBCQ_NEST_BAR_EN_MMIO1)) {
+        bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] >> 14;
+        mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1];
+        size = ((~mask) >> 14) + 1;
+        memory_region_init(&pbcq->mmbar1, OBJECT(pbcq), "pbcq-mmio1", size);
+        memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar1);
+        pbcq->mmio1_base = bar;
+        pbcq->mmio1_size = size;
+    }
+    if (!memory_region_is_mapped(&pbcq->phbbar)
+        && (bar_en & PBCQ_NEST_BAR_EN_PHB)) {
+        bar = pbcq->nest_regs[PBCQ_NEST_PHB_BAR] >> 14;
+        size = 0x1000;
+        memory_region_init(&pbcq->phbbar, OBJECT(pbcq), "pbcq-phb", size);
+        memory_region_add_subregion(get_system_memory(), bar, &pbcq->phbbar);
+    }
+
+    /* Update PHB */
+    pnv_phb3_update_regions(pbcq->phb);
+}
+
+static void pnv_pbcq_nest_xscom_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+    uint32_t reg = addr >> 3;
+
+    switch (reg) {
+    case PBCQ_NEST_MMIO_BAR0:
+    case PBCQ_NEST_MMIO_BAR1:
+    case PBCQ_NEST_MMIO_MASK0:
+    case PBCQ_NEST_MMIO_MASK1:
+        if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] &
+            (PBCQ_NEST_BAR_EN_MMIO0 |
+             PBCQ_NEST_BAR_EN_MMIO1)) {
+            phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported");
+        }
+        pbcq->nest_regs[reg] = val & 0xffffffffc0000000ull;
+        break;
+    case PBCQ_NEST_PHB_BAR:
+        if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] & PBCQ_NEST_BAR_EN_PHB) {
+            phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported");
+        }
+        pbcq->nest_regs[reg] = val & 0xfffffffffc000000ull;
+        break;
+    case PBCQ_NEST_BAR_EN:
+        pbcq->nest_regs[reg] = val & 0xf800000000000000ull;
+        pnv_pbcq_update_map(pbcq);
+        pnv_phb3_remap_irqs(pbcq->phb);
+        break;
+    case PBCQ_NEST_IRSN_COMPARE:
+    case PBCQ_NEST_IRSN_MASK:
+        pbcq->nest_regs[reg] = val & PBCQ_NEST_IRSN_COMP;
+        pnv_phb3_remap_irqs(pbcq->phb);
+        break;
+    case PBCQ_NEST_LSI_SRC_ID:
+        pbcq->nest_regs[reg] = val & PBCQ_NEST_LSI_SRC;
+        pnv_phb3_remap_irqs(pbcq->phb);
+        break;
+    default:
+        phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+                        addr, val);
+    }
+}
+
+static void pnv_pbcq_pci_xscom_write(void *opaque, hwaddr addr,
+                                     uint64_t val, unsigned size)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+    uint32_t reg = addr >> 3;
+
+    switch (reg) {
+    case PBCQ_PCI_BAR2:
+        pbcq->pci_regs[reg] = val & 0xfffffffffc000000ull;
+        pnv_pbcq_update_map(pbcq);
+    default:
+        phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+                        addr, val);
+    }
+}
+
+static void pnv_pbcq_spci_xscom_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+    uint32_t reg = addr >> 3;
+
+    switch (reg) {
+    case PBCQ_SPCI_ASB_ADDR:
+        pbcq->spci_regs[reg] = val & 0xfff;
+        break;
+    case PBCQ_SPCI_ASB_STATUS:
+        pbcq->spci_regs[reg] &= ~val;
+        break;
+    case PBCQ_SPCI_ASB_DATA:
+        pnv_phb3_reg_write(pbcq->phb, pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR],
+                           val, 8);
+        break;
+    case PBCQ_SPCI_AIB_CAPP_EN:
+    case PBCQ_SPCI_CAPP_SEC_TMR:
+        break;
+    default:
+        phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+                        addr, val);
+    }
+}
+
+static const MemoryRegionOps pnv_pbcq_nest_xscom_ops = {
+    .read = pnv_pbcq_nest_xscom_read,
+    .write = pnv_pbcq_nest_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static const MemoryRegionOps pnv_pbcq_pci_xscom_ops = {
+    .read = pnv_pbcq_pci_xscom_read,
+    .write = pnv_pbcq_pci_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static const MemoryRegionOps pnv_pbcq_spci_xscom_ops = {
+    .read = pnv_pbcq_spci_xscom_read,
+    .write = pnv_pbcq_spci_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_pbcq_default_bars(PnvPBCQState *pbcq)
+{
+    uint64_t mm0, mm1, reg;
+    PnvPHB3 *phb = pbcq->phb;
+
+    mm0 = 0x3d00000000000ull + 0x4000000000ull * phb->chip_id +
+            0x1000000000ull * phb->phb_id;
+    mm1 = 0x3ff8000000000ull + 0x0200000000ull * phb->chip_id +
+            0x0080000000ull * phb->phb_id;
+    reg = 0x3fffe40000000ull + 0x0000400000ull * phb->chip_id +
+            0x0000100000ull * phb->phb_id;
+
+    pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] = mm0 << 14;
+    pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] = mm1 << 14;
+    pbcq->nest_regs[PBCQ_NEST_PHB_BAR] = reg << 14;
+    pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0] = 0x3fff000000000ull << 14;
+    pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1] = 0x3ffff80000000ull << 14;
+    pbcq->pci_regs[PBCQ_PCI_BAR2] = reg << 14;
+}
+
+static void pnv_pbcq_realize(DeviceState *dev, Error **errp)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(dev);
+    PnvPHB3 *phb;
+    char name[32];
+
+    assert(pbcq->phb);
+    phb = pbcq->phb;
+
+    /* TODO: Fix OPAL to do that: establish default BAR values */
+    pnv_pbcq_default_bars(pbcq);
+
+    /* Initialize the XSCOM region for the PBCQ registers */
+    snprintf(name, sizeof(name), "xscom-pbcq-nest-%d.%d",
+             phb->chip_id, phb->phb_id);
+    pnv_xscom_region_init(&pbcq->xscom_nest_regs, OBJECT(dev),
+                          &pnv_pbcq_nest_xscom_ops, pbcq, name,
+                          PNV_XSCOM_PBCQ_NEST_SIZE);
+    snprintf(name, sizeof(name), "xscom-pbcq-pci-%d.%d",
+             phb->chip_id, phb->phb_id);
+    pnv_xscom_region_init(&pbcq->xscom_pci_regs, OBJECT(dev),
+                          &pnv_pbcq_pci_xscom_ops, pbcq, name,
+                          PNV_XSCOM_PBCQ_PCI_SIZE);
+    snprintf(name, sizeof(name), "xscom-pbcq-spci-%d.%d",
+             phb->chip_id, phb->phb_id);
+    pnv_xscom_region_init(&pbcq->xscom_spci_regs, OBJECT(dev),
+                          &pnv_pbcq_spci_xscom_ops, pbcq, name,
+                          PNV_XSCOM_PBCQ_SPCI_SIZE);
+}
+
+static int pnv_pbcq_dt_xscom(PnvXScomInterface *dev, void *fdt,
+                             int xscom_offset)
+{
+    const char compat[] = "ibm,power8-pbcq";
+    PnvPHB3 *phb = PNV_PBCQ(dev)->phb;
+    char *name;
+    int offset;
+    uint32_t lpc_pcba = PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id;
+    uint32_t reg[] = {
+        cpu_to_be32(lpc_pcba),
+        cpu_to_be32(PNV_XSCOM_PBCQ_NEST_SIZE),
+        cpu_to_be32(PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id),
+        cpu_to_be32(PNV_XSCOM_PBCQ_PCI_SIZE),
+        cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id),
+        cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_SIZE)
+    };
+
+    name = g_strdup_printf("pbcq@%x", lpc_pcba);
+    offset = fdt_add_subnode(fdt, xscom_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,phb-index", phb->phb_id)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", phb->chip_id)));
+    _FDT((fdt_setprop(fdt, offset, "compatible", compat,
+                      sizeof(compat))));
+    return 0;
+}
+
+static void phb3_pbcq_instance_init(Object *obj)
+{
+    PnvPBCQState *pbcq = PNV_PBCQ(obj);
+
+    object_property_add_link(obj, "phb", TYPE_PNV_PHB3,
+                             (Object **)&pbcq->phb,
+                             object_property_allow_set_link,
+                             OBJ_PROP_LINK_STRONG,
+                             &error_abort);
+}
+
+static void pnv_pbcq_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+    xdc->dt_xscom = pnv_pbcq_dt_xscom;
+
+    dc->realize = pnv_pbcq_realize;
+    dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_pbcq_type_info = {
+    .name          = TYPE_PNV_PBCQ,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvPBCQState),
+    .instance_init = phb3_pbcq_instance_init,
+    .class_init    = pnv_pbcq_class_init,
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { }
+    }
+};
+
+static void pnv_pbcq_register_types(void)
+{
+    type_register_static(&pnv_pbcq_type_info);
+}
+
+type_init(pnv_pbcq_register_types)
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
new file mode 100644
index 0000000000..23cf093928
--- /dev/null
+++ b/hw/pci-host/pnv_phb4.c
@@ -0,0 +1,1439 @@
+/*
+ * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ *
+ * Copyright (c) 2018-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "monitor/monitor.h"
+#include "target/ppc/cpu.h"
+#include "hw/pci-host/pnv_phb4_regs.h"
+#include "hw/pci-host/pnv_phb4.h"
+#include "hw/pci/pcie_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+
+#define phb_error(phb, fmt, ...)                                        \
+    qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n",            \
+                  (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
+
+/*
+ * QEMU version of the GETFIELD/SETFIELD macros
+ *
+ * These are common with the PnvXive model.
+ */
+static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
+{
+    return (word & mask) >> ctz64(mask);
+}
+
+static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
+                                uint64_t value)
+{
+    return (word & ~mask) | ((value << ctz64(mask)) & mask);
+}
+
+static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
+{
+    PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+    uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
+    uint8_t bus, devfn;
+
+    if (!(addr >> 63)) {
+        return NULL;
+    }
+    bus = (addr >> 52) & 0xff;
+    devfn = (addr >> 44) & 0xff;
+
+    /* We don't access the root complex this way */
+    if (bus == 0 && devfn == 0) {
+        return NULL;
+    }
+    return pci_find_device(pci->bus, bus, devfn);
+}
+
+/*
+ * The CONFIG_DATA register expects little endian accesses, but as the
+ * region is big endian, we have to swap the value.
+ */
+static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off,
+                                  unsigned size, uint64_t val)
+{
+    uint32_t cfg_addr, limit;
+    PCIDevice *pdev;
+
+    pdev = pnv_phb4_find_cfg_dev(phb);
+    if (!pdev) {
+        return;
+    }
+    cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+    cfg_addr |= off;
+    limit = pci_config_size(pdev);
+    if (limit <= cfg_addr) {
+        /*
+         * conventional pci device can be behind pcie-to-pci bridge.
+         * 256 <= addr < 4K has no effects.
+         */
+        return;
+    }
+    switch (size) {
+    case 1:
+        break;
+    case 2:
+        val = bswap16(val);
+        break;
+    case 4:
+        val = bswap32(val);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
+}
+
+static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off,
+                                     unsigned size)
+{
+    uint32_t cfg_addr, limit;
+    PCIDevice *pdev;
+    uint64_t val;
+
+    pdev = pnv_phb4_find_cfg_dev(phb);
+    if (!pdev) {
+        return ~0ull;
+    }
+    cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+    cfg_addr |= off;
+    limit = pci_config_size(pdev);
+    if (limit <= cfg_addr) {
+        /*
+         * conventional pci device can be behind pcie-to-pci bridge.
+         * 256 <= addr < 4K has no effects.
+         */
+        return ~0ull;
+    }
+    val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
+    switch (size) {
+    case 1:
+        return val;
+    case 2:
+        return bswap16(val);
+    case 4:
+        return bswap32(val);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/*
+ * Root complex register accesses are memory mapped.
+ */
+static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
+                                     unsigned size, uint64_t val)
+{
+    PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+    PCIDevice *pdev;
+
+    if (size != 4) {
+        phb_error(phb, "rc_config_write invalid size %d\n", size);
+        return;
+    }
+
+    pdev = pci_find_device(pci->bus, 0, 0);
+    assert(pdev);
+
+    pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE,
+                                 bswap32(val), 4);
+}
+
+static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
+                                        unsigned size)
+{
+    PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+    PCIDevice *pdev;
+    uint64_t val;
+
+    if (size != 4) {
+        phb_error(phb, "rc_config_read invalid size %d\n", size);
+        return ~0ull;
+    }
+
+    pdev = pci_find_device(pci->bus, 0, 0);
+    assert(pdev);
+
+    val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4);
+    return bswap32(val);
+}
+
+static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
+{
+    uint64_t base, start, size, mbe0, mbe1;
+    MemoryRegion *parent;
+    char name[64];
+
+    /* Unmap first */
+    if (memory_region_is_mapped(&phb->mr_mmio[index])) {
+        /* Should we destroy it in RCU friendly way... ? */
+        memory_region_del_subregion(phb->mr_mmio[index].container,
+                                    &phb->mr_mmio[index]);
+    }
+
+    /* Get table entry */
+    mbe0 = phb->ioda_MBT[(index << 1)];
+    mbe1 = phb->ioda_MBT[(index << 1) + 1];
+
+    if (!(mbe0 & IODA3_MBT0_ENABLE)) {
+        return;
+    }
+
+    /* Grab geometry from registers */
+    base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12;
+    size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12;
+    size |= 0xff00000000000000ull;
+    size = ~size + 1;
+
+    /* Calculate PCI side start address based on M32/M64 window type */
+    if (mbe0 & IODA3_MBT0_TYPE_M32) {
+        start = phb->regs[PHB_M32_START_ADDR >> 3];
+        if ((start + size) > 0x100000000ull) {
+            phb_error(phb, "M32 set beyond 4GB boundary !");
+            size = 0x100000000 - start;
+        }
+    } else {
+        start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
+    }
+
+    /* TODO: Figure out how to implemet/decode AOMASK */
+
+    /* Check if it matches an enabled MMIO region in the PEC stack */
+    if (memory_region_is_mapped(&phb->stack->mmbar0) &&
+        base >= phb->stack->mmio0_base &&
+        (base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) {
+        parent = &phb->stack->mmbar0;
+        base -= phb->stack->mmio0_base;
+    } else if (memory_region_is_mapped(&phb->stack->mmbar1) &&
+        base >= phb->stack->mmio1_base &&
+        (base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) {
+        parent = &phb->stack->mmbar1;
+        base -= phb->stack->mmio1_base;
+    } else {
+        phb_error(phb, "PHB MBAR %d out of parent bounds", index);
+        return;
+    }
+
+    /* Create alias (better name ?) */
+    snprintf(name, sizeof(name), "phb4-mbar%d", index);
+    memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name,
+                             &phb->pci_mmio, start, size);
+    memory_region_add_subregion(parent, base, &phb->mr_mmio[index]);
+}
+
+static void pnv_phb4_check_all_mbt(PnvPHB4 *phb)
+{
+    uint64_t i;
+    uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS :
+        PNV_PHB4_MIN_MMIO_WINDOWS;
+
+    for (i = 0; i < num_windows; i++) {
+        pnv_phb4_check_mbt(phb, i);
+    }
+}
+
+static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
+                                      unsigned *out_table, unsigned *out_idx)
+{
+    uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+    unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
+    unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
+    unsigned int mask;
+    uint64_t *tptr = NULL;
+
+    switch (table) {
+    case IODA3_TBL_LIST:
+        tptr = phb->ioda_LIST;
+        mask = 7;
+        break;
+    case IODA3_TBL_MIST:
+        tptr = phb->ioda_MIST;
+        mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
+        mask -= 1;
+        break;
+    case IODA3_TBL_RCAM:
+        mask = phb->big_phb ? 127 : 63;
+        break;
+    case IODA3_TBL_MRT:
+        mask = phb->big_phb ? 15 : 7;
+        break;
+    case IODA3_TBL_PESTA:
+    case IODA3_TBL_PESTB:
+        mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+        mask -= 1;
+        break;
+    case IODA3_TBL_TVT:
+        tptr = phb->ioda_TVT;
+        mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1);
+        mask -= 1;
+        break;
+    case IODA3_TBL_TCR:
+    case IODA3_TBL_TDR:
+        mask = phb->big_phb ? 1023 : 511;
+        break;
+    case IODA3_TBL_MBT:
+        tptr = phb->ioda_MBT;
+        mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1);
+        mask -= 1;
+        break;
+    case IODA3_TBL_MDT:
+        tptr = phb->ioda_MDT;
+        mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+        mask -= 1;
+        break;
+    case IODA3_TBL_PEEV:
+        tptr = phb->ioda_PEEV;
+        mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1);
+        mask -= 1;
+        break;
+    default:
+        phb_error(phb, "invalid IODA table %d", table);
+        return NULL;
+    }
+    index &= mask;
+    if (out_idx) {
+        *out_idx = index;
+    }
+    if (out_table) {
+        *out_table = table;
+    }
+    if (tptr) {
+        tptr += index;
+    }
+    if (adreg & PHB_IODA_AD_AUTOINC) {
+        index = (index + 1) & mask;
+        adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
+    }
+
+    phb->regs[PHB_IODA_ADDR >> 3] = adreg;
+    return tptr;
+}
+
+static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb)
+{
+    unsigned table, idx;
+    uint64_t *tptr;
+
+    tptr = pnv_phb4_ioda_access(phb, &table, &idx);
+    if (!tptr) {
+        /* Special PESTA case */
+        if (table == IODA3_TBL_PESTA) {
+            return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63;
+        } else if (table == IODA3_TBL_PESTB) {
+            return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62;
+        }
+        /* Return 0 on unsupported tables, not ff's */
+        return 0;
+    }
+    return *tptr;
+}
+
+static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
+{
+    unsigned table, idx;
+    uint64_t *tptr;
+
+    tptr = pnv_phb4_ioda_access(phb, &table, &idx);
+    if (!tptr) {
+        /* Special PESTA case */
+        if (table == IODA3_TBL_PESTA) {
+            phb->ioda_PEST_AB[idx] &= ~1;
+            phb->ioda_PEST_AB[idx] |= (val >> 63) & 1;
+        } else if (table == IODA3_TBL_PESTB) {
+            phb->ioda_PEST_AB[idx] &= ~2;
+            phb->ioda_PEST_AB[idx] |= (val >> 62) & 2;
+        }
+        return;
+    }
+
+    /* Handle side effects */
+    switch (table) {
+    case IODA3_TBL_LIST:
+        break;
+    case IODA3_TBL_MIST: {
+        /* Special mask for MIST partial write */
+        uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+        uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg);
+        uint64_t v = *tptr;
+        if (mmask == 0) {
+            mmask = 0xf;
+        }
+        if (mmask & 8) {
+            v &= 0x0000ffffffffffffull;
+            v |= 0xcfff000000000000ull & val;
+        }
+        if (mmask & 4) {
+            v &= 0xffff0000ffffffffull;
+            v |= 0x0000cfff00000000ull & val;
+        }
+        if (mmask & 2) {
+            v &= 0xffffffff0000ffffull;
+            v |= 0x00000000cfff0000ull & val;
+        }
+        if (mmask & 1) {
+            v &= 0xffffffffffff0000ull;
+            v |= 0x000000000000cfffull & val;
+        }
+        *tptr = val;
+        break;
+    }
+    case IODA3_TBL_MBT:
+        *tptr = val;
+
+        /* Copy accross the valid bit to the other half */
+        phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
+        phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
+
+        /* Update mappings */
+        pnv_phb4_check_mbt(phb, idx >> 1);
+        break;
+    default:
+        *tptr = val;
+    }
+}
+
+static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val)
+{
+    PnvPhb4DMASpace *ds;
+
+    /* Always invalidate all for now ... */
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        ds->pe_num = PHB_INVALID_PE;
+    }
+}
+
+static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds)
+{
+    uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
+
+    if (cfg & PHB_PHB4C_32BIT_MSI_EN) {
+        if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
+            memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+                                        0xffff0000, &ds->msi32_mr);
+        }
+    } else {
+        if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
+            memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+                                        &ds->msi32_mr);
+        }
+    }
+
+    if (cfg & PHB_PHB4C_64BIT_MSI_EN) {
+        if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
+            memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+                                        (1ull << 60), &ds->msi64_mr);
+        }
+    } else {
+        if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
+            memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+                                        &ds->msi64_mr);
+        }
+    }
+}
+
+static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb)
+{
+    PnvPhb4DMASpace *ds;
+
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        pnv_phb4_update_msi_regions(ds);
+    }
+}
+
+static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
+{
+    int shift, flags, i, lsi_base;
+    XiveSource *xsrc = &phb->xsrc;
+
+    /* The XIVE source characteristics can be set at run time */
+    if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) {
+        shift = XIVE_ESB_64K;
+    } else {
+        shift = XIVE_ESB_4K;
+    }
+    if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) {
+        flags = XIVE_SRC_STORE_EOI;
+    } else {
+        flags = 0;
+    }
+
+    phb->xsrc.esb_shift = shift;
+    phb->xsrc.esb_flags = flags;
+
+    lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
+    lsi_base <<= 3;
+
+    /* TODO: handle reset values of PHB_LSI_SRC_ID */
+    if (!lsi_base) {
+        return;
+    }
+
+    /* TODO: need a xive_source_irq_reset_lsi() */
+    bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs);
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        bool msi = (i < lsi_base || i >= (lsi_base + 8));
+        if (!msi) {
+            xive_source_irq_set_lsi(xsrc, i);
+        }
+    }
+}
+
+static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,
+                               unsigned size)
+{
+    PnvPHB4 *phb = PNV_PHB4(opaque);
+    bool changed;
+
+    /* Special case outbound configuration data */
+    if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+        pnv_phb4_config_write(phb, off & 0x3, size, val);
+        return;
+    }
+
+    /* Special case RC configuration space */
+    if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
+        pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val);
+        return;
+    }
+
+    /* Other registers are 64-bit only */
+    if (size != 8 || off & 0x7) {
+        phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+                   off, size);
+        return;
+    }
+
+    /* Handle masking */
+    switch (off) {
+    case PHB_LSI_SOURCE_ID:
+        val &= PHB_LSI_SRC_ID;
+        break;
+    case PHB_M64_UPPER_BITS:
+        val &= 0xff00000000000000ull;
+        break;
+    /* TCE Kill */
+    case PHB_TCE_KILL:
+        /* Clear top 3 bits which HW does to indicate successful queuing */
+        val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE);
+        break;
+    case PHB_Q_DMA_R:
+        /*
+         * This is enough logic to make SW happy but we aren't
+         * actually quiescing the DMAs
+         */
+        if (val & PHB_Q_DMA_R_AUTORESET) {
+            val = 0;
+        } else {
+            val &= PHB_Q_DMA_R_QUIESCE_DMA;
+        }
+        break;
+    /* LEM stuff */
+    case PHB_LEM_FIR_AND_MASK:
+        phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
+        return;
+    case PHB_LEM_FIR_OR_MASK:
+        phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
+        return;
+    case PHB_LEM_ERROR_AND_MASK:
+        phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
+        return;
+    case PHB_LEM_ERROR_OR_MASK:
+        phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
+        return;
+    case PHB_LEM_WOF:
+        val = 0;
+        break;
+    /* TODO: More regs ..., maybe create a table with masks... */
+
+    /* Read only registers */
+    case PHB_CPU_LOADSTORE_STATUS:
+    case PHB_ETU_ERR_SUMMARY:
+    case PHB_PHB4_GEN_CAP:
+    case PHB_PHB4_TCE_CAP:
+    case PHB_PHB4_IRQ_CAP:
+    case PHB_PHB4_EEH_CAP:
+        return;
+    }
+
+    /* Record whether it changed */
+    changed = phb->regs[off >> 3] != val;
+
+    /* Store in register cache first */
+    phb->regs[off >> 3] = val;
+
+    /* Handle side effects */
+    switch (off) {
+    case PHB_PHB4_CONFIG:
+        if (changed) {
+            pnv_phb4_update_all_msi_regions(phb);
+        }
+        break;
+    case PHB_M32_START_ADDR:
+    case PHB_M64_UPPER_BITS:
+        if (changed) {
+            pnv_phb4_check_all_mbt(phb);
+        }
+        break;
+
+    /* IODA table accesses */
+    case PHB_IODA_DATA0:
+        pnv_phb4_ioda_write(phb, val);
+        break;
+
+    /* RTC invalidation */
+    case PHB_RTC_INVALIDATE:
+        pnv_phb4_rtc_invalidate(phb, val);
+        break;
+
+    /* PHB Control (Affects XIVE source) */
+    case PHB_CTRLR:
+    case PHB_LSI_SOURCE_ID:
+        pnv_phb4_update_xsrc(phb);
+        break;
+
+    /* Silent simple writes */
+    case PHB_ASN_CMPM:
+    case PHB_CONFIG_ADDRESS:
+    case PHB_IODA_ADDR:
+    case PHB_TCE_KILL:
+    case PHB_TCE_SPEC_CTL:
+    case PHB_PEST_BAR:
+    case PHB_PELTV_BAR:
+    case PHB_RTT_BAR:
+    case PHB_LEM_FIR_ACCUM:
+    case PHB_LEM_ERROR_MASK:
+    case PHB_LEM_ACTION0:
+    case PHB_LEM_ACTION1:
+    case PHB_TCE_TAG_ENABLE:
+    case PHB_INT_NOTIFY_ADDR:
+    case PHB_INT_NOTIFY_INDEX:
+    case PHB_DMARD_SYNC:
+       break;
+
+    /* Noise on anything else */
+    default:
+        qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n",
+                      off, val);
+    }
+}
+
+static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size)
+{
+    PnvPHB4 *phb = PNV_PHB4(opaque);
+    uint64_t val;
+
+    if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+        return pnv_phb4_config_read(phb, off & 0x3, size);
+    }
+
+    /* Special case RC configuration space */
+    if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
+        return pnv_phb4_rc_config_read(phb, off & 0x7ff, size);
+    }
+
+    /* Other registers are 64-bit only */
+    if (size != 8 || off & 0x7) {
+        phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+                   off, size);
+        return ~0ull;
+    }
+
+    /* Default read from cache */
+    val = phb->regs[off >> 3];
+
+    switch (off) {
+    case PHB_VERSION:
+        return phb->version;
+
+        /* Read-only */
+    case PHB_PHB4_GEN_CAP:
+        return 0xe4b8000000000000ull;
+    case PHB_PHB4_TCE_CAP:
+        return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull;
+    case PHB_PHB4_IRQ_CAP:
+        return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull;
+    case PHB_PHB4_EEH_CAP:
+        return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull;
+
+    /* IODA table accesses */
+    case PHB_IODA_DATA0:
+        return pnv_phb4_ioda_read(phb);
+
+    /* Link training always appears trained */
+    case PHB_PCIE_DLP_TRAIN_CTL:
+        /* TODO: Do something sensible with speed ? */
+        return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
+
+    /* DMA read sync: make it look like it's complete */
+    case PHB_DMARD_SYNC:
+        return PHB_DMARD_SYNC_COMPLETE;
+
+    /* Silent simple reads */
+    case PHB_LSI_SOURCE_ID:
+    case PHB_CPU_LOADSTORE_STATUS:
+    case PHB_ASN_CMPM:
+    case PHB_PHB4_CONFIG:
+    case PHB_M32_START_ADDR:
+    case PHB_CONFIG_ADDRESS:
+    case PHB_IODA_ADDR:
+    case PHB_RTC_INVALIDATE:
+    case PHB_TCE_KILL:
+    case PHB_TCE_SPEC_CTL:
+    case PHB_PEST_BAR:
+    case PHB_PELTV_BAR:
+    case PHB_RTT_BAR:
+    case PHB_M64_UPPER_BITS:
+    case PHB_CTRLR:
+    case PHB_LEM_FIR_ACCUM:
+    case PHB_LEM_ERROR_MASK:
+    case PHB_LEM_ACTION0:
+    case PHB_LEM_ACTION1:
+    case PHB_TCE_TAG_ENABLE:
+    case PHB_INT_NOTIFY_ADDR:
+    case PHB_INT_NOTIFY_INDEX:
+    case PHB_Q_DMA_R:
+    case PHB_ETU_ERR_SUMMARY:
+        break;
+
+    /* Noise on anything else */
+    default:
+        qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n",
+                      off, val);
+    }
+    return val;
+}
+
+static const MemoryRegionOps pnv_phb4_reg_ops = {
+    .read = pnv_phb4_reg_read,
+    .write = pnv_phb4_reg_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvPHB4 *phb = PNV_PHB4(opaque);
+    uint32_t reg = addr >> 3;
+    uint64_t val;
+    hwaddr offset;
+
+    switch (reg) {
+    case PHB_SCOM_HV_IND_ADDR:
+        return phb->scom_hv_ind_addr_reg;
+
+    case PHB_SCOM_HV_IND_DATA:
+        if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
+            phb_error(phb, "Invalid indirect address");
+            return ~0ull;
+        }
+        size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
+        offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
+        val = pnv_phb4_reg_read(phb, offset, size);
+        if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
+            offset += size;
+            offset &= 0x3fff;
+            phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
+                                                 phb->scom_hv_ind_addr_reg,
+                                                 offset);
+        }
+        return val;
+    case PHB_SCOM_ETU_LEM_FIR:
+    case PHB_SCOM_ETU_LEM_FIR_AND:
+    case PHB_SCOM_ETU_LEM_FIR_OR:
+    case PHB_SCOM_ETU_LEM_FIR_MSK:
+    case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
+    case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
+    case PHB_SCOM_ETU_LEM_ACT0:
+    case PHB_SCOM_ETU_LEM_ACT1:
+    case PHB_SCOM_ETU_LEM_WOF:
+        offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
+        return pnv_phb4_reg_read(phb, offset, size);
+    case PHB_SCOM_ETU_PMON_CONFIG:
+    case PHB_SCOM_ETU_PMON_CTR0:
+    case PHB_SCOM_ETU_PMON_CTR1:
+    case PHB_SCOM_ETU_PMON_CTR2:
+    case PHB_SCOM_ETU_PMON_CTR3:
+        offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
+        return pnv_phb4_reg_read(phb, offset, size);
+
+    default:
+        qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr);
+        return ~0ull;
+    }
+}
+
+static void pnv_phb4_xscom_write(void *opaque, hwaddr addr,
+                                 uint64_t val, unsigned size)
+{
+    PnvPHB4 *phb = PNV_PHB4(opaque);
+    uint32_t reg = addr >> 3;
+    hwaddr offset;
+
+    switch (reg) {
+    case PHB_SCOM_HV_IND_ADDR:
+        phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff;
+        break;
+    case PHB_SCOM_HV_IND_DATA:
+        if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
+            phb_error(phb, "Invalid indirect address");
+            break;
+        }
+        size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
+        offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
+        pnv_phb4_reg_write(phb, offset, val, size);
+        if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
+            offset += size;
+            offset &= 0x3fff;
+            phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
+                                                 phb->scom_hv_ind_addr_reg,
+                                                 offset);
+        }
+        break;
+    case PHB_SCOM_ETU_LEM_FIR:
+    case PHB_SCOM_ETU_LEM_FIR_AND:
+    case PHB_SCOM_ETU_LEM_FIR_OR:
+    case PHB_SCOM_ETU_LEM_FIR_MSK:
+    case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
+    case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
+    case PHB_SCOM_ETU_LEM_ACT0:
+    case PHB_SCOM_ETU_LEM_ACT1:
+    case PHB_SCOM_ETU_LEM_WOF:
+        offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
+        pnv_phb4_reg_write(phb, offset, val, size);
+        break;
+    case PHB_SCOM_ETU_PMON_CONFIG:
+    case PHB_SCOM_ETU_PMON_CTR0:
+    case PHB_SCOM_ETU_PMON_CTR1:
+    case PHB_SCOM_ETU_PMON_CTR2:
+    case PHB_SCOM_ETU_PMON_CTR3:
+        offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
+        pnv_phb4_reg_write(phb, offset, val, size);
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx
+                      "=%"PRIx64"\n", addr, val);
+    }
+}
+
+const MemoryRegionOps pnv_phb4_xscom_ops = {
+    .read = pnv_phb4_xscom_read,
+    .write = pnv_phb4_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+    /* Check that out properly ... */
+    return irq_num & 3;
+}
+
+static void pnv_phb4_set_irq(void *opaque, int irq_num, int level)
+{
+    PnvPHB4 *phb = PNV_PHB4(opaque);
+    uint32_t lsi_base;
+
+    /* LSI only ... */
+    if (irq_num > 3) {
+        phb_error(phb, "IRQ %x is not an LSI", irq_num);
+    }
+    lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
+    lsi_base <<= 3;
+    qemu_set_irq(phb->qirqs[lsi_base + irq_num], level);
+}
+
+static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds)
+{
+    uint64_t rtt, addr;
+    uint16_t rte;
+    int bus_num;
+    int num_PEs;
+
+    /* Already resolved ? */
+    if (ds->pe_num != PHB_INVALID_PE) {
+        return true;
+    }
+
+    /* We need to lookup the RTT */
+    rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
+    if (!(rtt & PHB_RTT_BAR_ENABLE)) {
+        phb_error(ds->phb, "DMA with RTT BAR disabled !");
+        /* Set error bits ? fence ? ... */
+        return false;
+    }
+
+    /* Read RTE */
+    bus_num = pci_bus_num(ds->bus);
+    addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
+    addr += 2 * ((bus_num << 8) | ds->devfn);
+    if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
+        phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
+        /* Set error bits ? fence ? ... */
+        return false;
+    }
+    rte = be16_to_cpu(rte);
+
+    /* Fail upon reading of invalid PE# */
+    num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+    if (rte >= num_PEs) {
+        phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
+        rte &= num_PEs - 1;
+    }
+    ds->pe_num = rte;
+    return true;
+}
+
+static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr,
+                                   bool is_write, uint64_t tve,
+                                   IOMMUTLBEntry *tlb)
+{
+    uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve);
+    int32_t  lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve);
+    uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve);
+    uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve);
+
+    /* Invalid levels */
+    if (lev > 4) {
+        phb_error(ds->phb, "Invalid #levels in TVE %d", lev);
+        return;
+    }
+
+    /* Invalid entry */
+    if (tts == 0) {
+        phb_error(ds->phb, "Access to invalid TVE");
+        return;
+    }
+
+    /* IO Page Size of 0 means untranslated, else use TCEs */
+    if (tps == 0) {
+        /* TODO: Handle boundaries */
+
+        /* Use 4k pages like q35 ... for now */
+        tlb->iova = addr & 0xfffffffffffff000ull;
+        tlb->translated_addr = addr & 0x0003fffffffff000ull;
+        tlb->addr_mask = 0xfffull;
+        tlb->perm = IOMMU_RW;
+    } else {
+        uint32_t tce_shift, tbl_shift, sh;
+        uint64_t base, taddr, tce, tce_mask;
+
+        /* Address bits per bottom level TCE entry */
+        tce_shift = tps + 11;
+
+        /* Address bits per table level */
+        tbl_shift = tts + 8;
+
+        /* Top level table base address */
+        base = tta << 12;
+
+        /* Total shift to first level */
+        sh = tbl_shift * lev + tce_shift;
+
+        /* TODO: Limit to support IO page sizes */
+
+        /* TODO: Multi-level untested */
+        while ((lev--) >= 0) {
+            /* Grab the TCE address */
+            taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
+            if (dma_memory_read(&address_space_memory, taddr, &tce,
+                                sizeof(tce))) {
+                phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr);
+                return;
+            }
+            tce = be64_to_cpu(tce);
+
+            /* Check permission for indirect TCE */
+            if ((lev >= 0) && !(tce & 3)) {
+                phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
+                phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+                           is_write ? 'W' : 'R', tve);
+                phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+                           tta, lev, tts, tps);
+                return;
+            }
+            sh -= tbl_shift;
+            base = tce & ~0xfffull;
+        }
+
+        /* We exit the loop with TCE being the final TCE */
+        tce_mask = ~((1ull << tce_shift) - 1);
+        tlb->iova = addr & tce_mask;
+        tlb->translated_addr = tce & tce_mask;
+        tlb->addr_mask = ~tce_mask;
+        tlb->perm = tce & 3;
+        if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
+            phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr);
+            phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+                       is_write ? 'W' : 'R', tve);
+            phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+                       tta, lev, tts, tps);
+        }
+    }
+}
+
+static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu,
+                                              hwaddr addr,
+                                              IOMMUAccessFlags flag,
+                                              int iommu_idx)
+{
+    PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr);
+    int tve_sel;
+    uint64_t tve, cfg;
+    IOMMUTLBEntry ret = {
+        .target_as = &address_space_memory,
+        .iova = addr,
+        .translated_addr = 0,
+        .addr_mask = ~(hwaddr)0,
+        .perm = IOMMU_NONE,
+    };
+
+    /* Resolve PE# */
+    if (!pnv_phb4_resolve_pe(ds)) {
+        phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+                   ds->bus, pci_bus_num(ds->bus), ds->devfn);
+        return ret;
+    }
+
+    /* Check top bits */
+    switch (addr >> 60) {
+    case 00:
+        /* DMA or 32-bit MSI ? */
+        cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
+        if ((cfg & PHB_PHB4C_32BIT_MSI_EN) &&
+            ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
+            phb_error(ds->phb, "xlate on 32-bit MSI region");
+            return ret;
+        }
+        /* Choose TVE XXX Use PHB4 Control Register */
+        tve_sel = (addr >> 59) & 1;
+        tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
+        pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
+        break;
+    case 01:
+        phb_error(ds->phb, "xlate on 64-bit MSI region");
+        break;
+    default:
+        phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr);
+    }
+    return ret;
+}
+
+#define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region"
+#define PNV_PHB4_IOMMU_MEMORY_REGION(obj) \
+    OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB4_IOMMU_MEMORY_REGION)
+
+static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass,
+                                                    void *data)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+    imrc->translate = pnv_phb4_translate_iommu;
+}
+
+static const TypeInfo pnv_phb4_iommu_memory_region_info = {
+    .parent = TYPE_IOMMU_MEMORY_REGION,
+    .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
+    .class_init = pnv_phb4_iommu_memory_region_class_init,
+};
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ */
+static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
+                               uint64_t data, unsigned size)
+{
+    PnvPhb4DMASpace *ds = opaque;
+    PnvPHB4 *phb = ds->phb;
+
+    uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
+
+    /* Resolve PE# */
+    if (!pnv_phb4_resolve_pe(ds)) {
+        phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+                   ds->bus, pci_bus_num(ds->bus), ds->devfn);
+        return;
+    }
+
+    /* TODO: Check it doesn't collide with LSIs */
+    if (src >= phb->xsrc.nr_irqs) {
+        phb_error(phb, "MSI %d out of bounds", src);
+        return;
+    }
+
+    /* TODO: check PE/MSI assignement */
+
+    qemu_irq_pulse(phb->qirqs[src]);
+}
+
+/* There is no .read as the read result is undefined by PCI spec */
+static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvPhb4DMASpace *ds = opaque;
+
+    phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr);
+    return -1;
+}
+
+static const MemoryRegionOps pnv_phb4_msi_ops = {
+    .read = pnv_phb4_msi_read,
+    .write = pnv_phb4_msi_write,
+    .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn)
+{
+    PnvPhb4DMASpace *ds;
+
+    QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+        if (ds->bus == bus && ds->devfn == devfn) {
+            break;
+        }
+    }
+    return ds;
+}
+
+static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    PnvPHB4 *phb = opaque;
+    PnvPhb4DMASpace *ds;
+    char name[32];
+
+    ds = pnv_phb4_dma_find(phb, bus, devfn);
+
+    if (ds == NULL) {
+        ds = g_malloc0(sizeof(PnvPhb4DMASpace));
+        ds->bus = bus;
+        ds->devfn = devfn;
+        ds->pe_num = PHB_INVALID_PE;
+        ds->phb = phb;
+        snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id,
+                 phb->phb_id);
+        memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
+                                 TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
+                                 OBJECT(phb), name, UINT64_MAX);
+        address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
+                           name);
+        memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops,
+                              ds, "msi32", 0x10000);
+        memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops,
+                              ds, "msi64", 0x100000);
+        pnv_phb4_update_msi_regions(ds);
+
+        QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
+    }
+    return &ds->dma_as;
+}
+
+static void pnv_phb4_instance_init(Object *obj)
+{
+    PnvPHB4 *phb = PNV_PHB4(obj);
+
+    QLIST_INIT(&phb->dma_spaces);
+
+    /* XIVE interrupt source object */
+    object_initialize_child(obj, "source", &phb->xsrc, sizeof(XiveSource),
+                            TYPE_XIVE_SOURCE, &error_abort, NULL);
+
+    /* Root Port */
+    object_initialize_child(obj, "root", &phb->root, sizeof(phb->root),
+                            TYPE_PNV_PHB4_ROOT_PORT, &error_abort, NULL);
+
+    qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
+    qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
+}
+
+static void pnv_phb4_realize(DeviceState *dev, Error **errp)
+{
+    PnvPHB4 *phb = PNV_PHB4(dev);
+    PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+    XiveSource *xsrc = &phb->xsrc;
+    Error *local_err = NULL;
+    int nr_irqs;
+    char name[32];
+
+    assert(phb->stack);
+
+    /* Set the "big_phb" flag */
+    phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
+
+    /* Controller Registers */
+    snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
+             phb->phb_id);
+    memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb,
+                          name, 0x2000);
+
+    /*
+     * PHB4 doesn't support IO space. However, qemu gets very upset if
+     * we don't have an IO region to anchor IO BARs onto so we just
+     * initialize one which we never hook up to anything
+     */
+
+    snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
+             phb->phb_id);
+    memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000);
+
+    snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id,
+             phb->phb_id);
+    memory_region_init(&phb->pci_mmio, OBJECT(phb), name,
+                       PCI_MMIO_TOTAL_SIZE);
+
+    pci->bus = pci_register_root_bus(dev, "root-bus",
+                                     pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
+                                     &phb->pci_mmio, &phb->pci_io,
+                                     0, 4, TYPE_PNV_PHB4_ROOT_BUS);
+    pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
+
+    /* Add a single Root port */
+    qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
+    qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
+    qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus));
+    qdev_init_nofail(DEVICE(&phb->root));
+
+    /* Setup XIVE Source */
+    if (phb->big_phb) {
+        nr_irqs = PNV_PHB4_MAX_INTs;
+    } else {
+        nr_irqs = PNV_PHB4_MAX_INTs >> 1;
+    }
+    object_property_set_int(OBJECT(xsrc), nr_irqs, "nr-irqs", &error_fatal);
+    object_property_set_link(OBJECT(xsrc), OBJECT(phb), "xive", &error_fatal);
+    object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    pnv_phb4_update_xsrc(phb);
+
+    phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
+}
+
+static void pnv_phb4_reset(DeviceState *dev)
+{
+    PnvPHB4 *phb = PNV_PHB4(dev);
+    PCIDevice *root_dev = PCI_DEVICE(&phb->root);
+
+    /*
+     * Configure PCI device id at reset using a property.
+     */
+    pci_config_set_vendor_id(root_dev->config, PCI_VENDOR_ID_IBM);
+    pci_config_set_device_id(root_dev->config, phb->device_id);
+}
+
+static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge,
+                                          PCIBus *rootbus)
+{
+    PnvPHB4 *phb = PNV_PHB4(host_bridge);
+
+    snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
+             phb->chip_id, phb->phb_id);
+    return phb->bus_path;
+}
+
+static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno)
+{
+    PnvPHB4 *phb = PNV_PHB4(xf);
+    uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
+    uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
+    uint64_t data = XIVE_TRIGGER_PQ | offset | srcno;
+    MemTxResult result;
+
+    address_space_stq_be(&address_space_memory, notif_port, data,
+                         MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port);
+        return;
+    }
+}
+
+static Property pnv_phb4_properties[] = {
+        DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
+        DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
+        DEFINE_PROP_UINT64("version", PnvPHB4, version, 0),
+        DEFINE_PROP_UINT16("device-id", PnvPHB4, device_id, 0),
+        DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK,
+                         PnvPhb4PecStack *),
+        DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_phb4_class_init(ObjectClass *klass, void *data)
+{
+    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
+
+    hc->root_bus_path   = pnv_phb4_root_bus_path;
+    dc->realize         = pnv_phb4_realize;
+    device_class_set_props(dc, pnv_phb4_properties);
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->user_creatable  = false;
+    dc->reset           = pnv_phb4_reset;
+
+    xfc->notify         = pnv_phb4_xive_notify;
+}
+
+static const TypeInfo pnv_phb4_type_info = {
+    .name          = TYPE_PNV_PHB4,
+    .parent        = TYPE_PCIE_HOST_BRIDGE,
+    .instance_init = pnv_phb4_instance_init,
+    .instance_size = sizeof(PnvPHB4),
+    .class_init    = pnv_phb4_class_init,
+    .interfaces = (InterfaceInfo[]) {
+            { TYPE_XIVE_NOTIFIER },
+            { },
+    }
+};
+
+static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    /*
+     * PHB4 has only a single root complex. Enforce the limit on the
+     * parent bus
+     */
+    k->max_dev = 1;
+}
+
+static const TypeInfo pnv_phb4_root_bus_info = {
+    .name = TYPE_PNV_PHB4_ROOT_BUS,
+    .parent = TYPE_PCIE_BUS,
+    .class_init = pnv_phb4_root_bus_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { INTERFACE_PCIE_DEVICE },
+        { }
+    },
+};
+
+static void pnv_phb4_root_port_reset(DeviceState *dev)
+{
+    PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+    PCIDevice *d = PCI_DEVICE(dev);
+    uint8_t *conf = d->config;
+
+    rpc->parent_reset(dev);
+
+    pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
+                               PCI_IO_RANGE_MASK & 0xff);
+    pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
+                                 PCI_IO_RANGE_MASK & 0xff);
+    pci_set_word(conf + PCI_MEMORY_BASE, 0);
+    pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
+    pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
+    pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
+    pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
+    pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff);
+}
+
+static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
+{
+    PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    rpc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
+
+    dc->desc     = "IBM PHB4 PCIE Root Port";
+    dc->user_creatable = false;
+
+    device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
+                                    &rpc->parent_realize);
+    device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
+                                  &rpc->parent_reset);
+
+    k->vendor_id = PCI_VENDOR_ID_IBM;
+    k->device_id = PNV_PHB4_DEVICE_ID;
+    k->revision  = 0;
+
+    rpc->exp_offset = 0x48;
+    rpc->aer_offset = 0x100;
+
+    dc->reset = &pnv_phb4_root_port_reset;
+}
+
+static const TypeInfo pnv_phb4_root_port_info = {
+    .name          = TYPE_PNV_PHB4_ROOT_PORT,
+    .parent        = TYPE_PCIE_ROOT_PORT,
+    .instance_size = sizeof(PnvPHB4RootPort),
+    .class_init    = pnv_phb4_root_port_class_init,
+};
+
+static void pnv_phb4_register_types(void)
+{
+    type_register_static(&pnv_phb4_root_bus_info);
+    type_register_static(&pnv_phb4_root_port_info);
+    type_register_static(&pnv_phb4_type_info);
+    type_register_static(&pnv_phb4_iommu_memory_region_info);
+}
+
+type_init(pnv_phb4_register_types);
+
+void pnv_phb4_update_regions(PnvPhb4PecStack *stack)
+{
+    PnvPHB4 *phb = &stack->phb;
+
+    /* Unmap first always */
+    if (memory_region_is_mapped(&phb->mr_regs)) {
+        memory_region_del_subregion(&stack->phbbar, &phb->mr_regs);
+    }
+    if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) {
+        memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio);
+    }
+
+    /* Map registers if enabled */
+    if (memory_region_is_mapped(&stack->phbbar)) {
+        memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs);
+    }
+
+    /* Map ESB if enabled */
+    if (memory_region_is_mapped(&stack->intbar)) {
+        memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio);
+    }
+
+    /* Check/update m32 */
+    pnv_phb4_check_all_mbt(phb);
+}
+
+void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon)
+{
+    uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
+
+    monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x\n",
+                   phb->chip_id, phb->phb_id,
+                   offset, offset + phb->xsrc.nr_irqs - 1);
+    xive_source_pic_print_info(&phb->xsrc, 0, mon);
+}
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
new file mode 100644
index 0000000000..68e1db3eac
--- /dev/null
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -0,0 +1,595 @@
+/*
+ * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ *
+ * Copyright (c) 2018-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/log.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci-host/pnv_phb4_regs.h"
+#include "hw/pci-host/pnv_phb4.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/ppc/pnv.h"
+#include "hw/qdev-properties.h"
+
+#include <libfdt.h>
+
+#define phb_pec_error(pec, fmt, ...)                                    \
+    qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n",        \
+                  (pec)->chip_id, (pec)->index, ## __VA_ARGS__)
+
+
+static uint64_t pnv_pec_nest_xscom_read(void *opaque, hwaddr addr,
+                                        unsigned size)
+{
+    PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+    uint32_t reg = addr >> 3;
+
+    /* TODO: add list of allowed registers and error out if not */
+    return pec->nest_regs[reg];
+}
+
+static void pnv_pec_nest_xscom_write(void *opaque, hwaddr addr,
+                                     uint64_t val, unsigned size)
+{
+    PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+    uint32_t reg = addr >> 3;
+
+    switch (reg) {
+    case PEC_NEST_PBCQ_HW_CONFIG:
+    case PEC_NEST_DROP_PRIO_CTRL:
+    case PEC_NEST_PBCQ_ERR_INJECT:
+    case PEC_NEST_PCI_NEST_CLK_TRACE_CTL:
+    case PEC_NEST_PBCQ_PMON_CTRL:
+    case PEC_NEST_PBCQ_PBUS_ADDR_EXT:
+    case PEC_NEST_PBCQ_PRED_VEC_TIMEOUT:
+    case PEC_NEST_CAPP_CTRL:
+    case PEC_NEST_PBCQ_READ_STK_OVR:
+    case PEC_NEST_PBCQ_WRITE_STK_OVR:
+    case PEC_NEST_PBCQ_STORE_STK_OVR:
+    case PEC_NEST_PBCQ_RETRY_BKOFF_CTRL:
+        pec->nest_regs[reg] = val;
+        break;
+    default:
+        phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__,
+                      addr, val);
+    }
+}
+
+static const MemoryRegionOps pnv_pec_nest_xscom_ops = {
+    .read = pnv_pec_nest_xscom_read,
+    .write = pnv_pec_nest_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_pci_xscom_read(void *opaque, hwaddr addr,
+                                       unsigned size)
+{
+    PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+    uint32_t reg = addr >> 3;
+
+    /* TODO: add list of allowed registers and error out if not */
+    return pec->pci_regs[reg];
+}
+
+static void pnv_pec_pci_xscom_write(void *opaque, hwaddr addr,
+                                    uint64_t val, unsigned size)
+{
+    PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+    uint32_t reg = addr >> 3;
+
+    switch (reg) {
+    case PEC_PCI_PBAIB_HW_CONFIG:
+    case PEC_PCI_PBAIB_READ_STK_OVR:
+        pec->pci_regs[reg] = val;
+        break;
+    default:
+        phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__,
+                      addr, val);
+    }
+}
+
+static const MemoryRegionOps pnv_pec_pci_xscom_ops = {
+    .read = pnv_pec_pci_xscom_read,
+    .write = pnv_pec_pci_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr,
+                                            unsigned size)
+{
+    PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+    uint32_t reg = addr >> 3;
+
+    /* TODO: add list of allowed registers and error out if not */
+    return stack->nest_regs[reg];
+}
+
+static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
+{
+    PnvPhb4PecState *pec = stack->pec;
+    MemoryRegion *sysmem = pec->system_memory;
+    uint64_t bar_en = stack->nest_regs[PEC_NEST_STK_BAR_EN];
+    uint64_t bar, mask, size;
+    char name[64];
+
+    /*
+     * NOTE: This will really not work well if those are remapped
+     * after the PHB has created its sub regions. We could do better
+     * if we had a way to resize regions but we don't really care
+     * that much in practice as the stuff below really only happens
+     * once early during boot
+     */
+
+    /* Handle unmaps */
+    if (memory_region_is_mapped(&stack->mmbar0) &&
+        !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
+        memory_region_del_subregion(sysmem, &stack->mmbar0);
+    }
+    if (memory_region_is_mapped(&stack->mmbar1) &&
+        !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
+        memory_region_del_subregion(sysmem, &stack->mmbar1);
+    }
+    if (memory_region_is_mapped(&stack->phbbar) &&
+        !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
+        memory_region_del_subregion(sysmem, &stack->phbbar);
+    }
+    if (memory_region_is_mapped(&stack->intbar) &&
+        !(bar_en & PEC_NEST_STK_BAR_EN_INT)) {
+        memory_region_del_subregion(sysmem, &stack->intbar);
+    }
+
+    /* Update PHB */
+    pnv_phb4_update_regions(stack);
+
+    /* Handle maps */
+    if (!memory_region_is_mapped(&stack->mmbar0) &&
+        (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
+        bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8;
+        mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK];
+        size = ((~mask) >> 8) + 1;
+        snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio0",
+                 pec->chip_id, pec->index, stack->stack_no);
+        memory_region_init(&stack->mmbar0, OBJECT(stack), name, size);
+        memory_region_add_subregion(sysmem, bar, &stack->mmbar0);
+        stack->mmio0_base = bar;
+        stack->mmio0_size = size;
+    }
+    if (!memory_region_is_mapped(&stack->mmbar1) &&
+        (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
+        bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8;
+        mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK];
+        size = ((~mask) >> 8) + 1;
+        snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio1",
+                 pec->chip_id, pec->index, stack->stack_no);
+        memory_region_init(&stack->mmbar1, OBJECT(stack), name, size);
+        memory_region_add_subregion(sysmem, bar, &stack->mmbar1);
+        stack->mmio1_base = bar;
+        stack->mmio1_size = size;
+    }
+    if (!memory_region_is_mapped(&stack->phbbar) &&
+        (bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
+        bar = stack->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8;
+        size = PNV_PHB4_NUM_REGS << 3;
+        snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-phb",
+                 pec->chip_id, pec->index, stack->stack_no);
+        memory_region_init(&stack->phbbar, OBJECT(stack), name, size);
+        memory_region_add_subregion(sysmem, bar, &stack->phbbar);
+    }
+    if (!memory_region_is_mapped(&stack->intbar) &&
+        (bar_en & PEC_NEST_STK_BAR_EN_INT)) {
+        bar = stack->nest_regs[PEC_NEST_STK_INT_BAR] >> 8;
+        size = PNV_PHB4_MAX_INTs << 16;
+        snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-int",
+                 stack->pec->chip_id, stack->pec->index, stack->stack_no);
+        memory_region_init(&stack->intbar, OBJECT(stack), name, size);
+        memory_region_add_subregion(sysmem, bar, &stack->intbar);
+    }
+
+    /* Update PHB */
+    pnv_phb4_update_regions(stack);
+}
+
+static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr,
+                                         uint64_t val, unsigned size)
+{
+    PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+    PnvPhb4PecState *pec = stack->pec;
+    uint32_t reg = addr >> 3;
+
+    switch (reg) {
+    case PEC_NEST_STK_PCI_NEST_FIR:
+        stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val;
+        break;
+    case PEC_NEST_STK_PCI_NEST_FIR_CLR:
+        stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val;
+        break;
+    case PEC_NEST_STK_PCI_NEST_FIR_SET:
+        stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val;
+        break;
+    case PEC_NEST_STK_PCI_NEST_FIR_MSK:
+        stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val;
+        break;
+    case PEC_NEST_STK_PCI_NEST_FIR_MSKC:
+        stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val;
+        break;
+    case PEC_NEST_STK_PCI_NEST_FIR_MSKS:
+        stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val;
+        break;
+    case PEC_NEST_STK_PCI_NEST_FIR_ACT0:
+    case PEC_NEST_STK_PCI_NEST_FIR_ACT1:
+        stack->nest_regs[reg] = val;
+        break;
+    case PEC_NEST_STK_PCI_NEST_FIR_WOF:
+        stack->nest_regs[reg] = 0;
+        break;
+    case PEC_NEST_STK_ERR_REPORT_0:
+    case PEC_NEST_STK_ERR_REPORT_1:
+    case PEC_NEST_STK_PBCQ_GNRL_STATUS:
+        /* Flag error ? */
+        break;
+    case PEC_NEST_STK_PBCQ_MODE:
+        stack->nest_regs[reg] = val & 0xff00000000000000ull;
+        break;
+    case PEC_NEST_STK_MMIO_BAR0:
+    case PEC_NEST_STK_MMIO_BAR0_MASK:
+    case PEC_NEST_STK_MMIO_BAR1:
+    case PEC_NEST_STK_MMIO_BAR1_MASK:
+        if (stack->nest_regs[PEC_NEST_STK_BAR_EN] &
+            (PEC_NEST_STK_BAR_EN_MMIO0 |
+             PEC_NEST_STK_BAR_EN_MMIO1)) {
+            phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+        }
+        stack->nest_regs[reg] = val & 0xffffffffff000000ull;
+        break;
+    case PEC_NEST_STK_PHB_REGS_BAR:
+        if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) {
+            phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+        }
+        stack->nest_regs[reg] = val & 0xffffffffffc00000ull;
+        break;
+    case PEC_NEST_STK_INT_BAR:
+        if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) {
+            phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+        }
+        stack->nest_regs[reg] = val & 0xfffffff000000000ull;
+        break;
+    case PEC_NEST_STK_BAR_EN:
+        stack->nest_regs[reg] = val & 0xf000000000000000ull;
+        pnv_pec_stk_update_map(stack);
+        break;
+    case PEC_NEST_STK_DATA_FRZ_TYPE:
+    case PEC_NEST_STK_PBCQ_TUN_BAR:
+        /* Not used for now */
+        stack->nest_regs[reg] = val;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx
+                      "=%"PRIx64"\n", addr, val);
+    }
+}
+
+static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = {
+    .read = pnv_pec_stk_nest_xscom_read,
+    .write = pnv_pec_stk_nest_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr,
+                                           unsigned size)
+{
+    PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+    uint32_t reg = addr >> 3;
+
+    /* TODO: add list of allowed registers and error out if not */
+    return stack->pci_regs[reg];
+}
+
+static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr,
+                                        uint64_t val, unsigned size)
+{
+    PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+    uint32_t reg = addr >> 3;
+
+    switch (reg) {
+    case PEC_PCI_STK_PCI_FIR:
+        stack->nest_regs[reg] = val;
+        break;
+    case PEC_PCI_STK_PCI_FIR_CLR:
+        stack->nest_regs[PEC_PCI_STK_PCI_FIR] &= val;
+        break;
+    case PEC_PCI_STK_PCI_FIR_SET:
+        stack->nest_regs[PEC_PCI_STK_PCI_FIR] |= val;
+        break;
+    case PEC_PCI_STK_PCI_FIR_MSK:
+        stack->nest_regs[reg] = val;
+        break;
+    case PEC_PCI_STK_PCI_FIR_MSKC:
+        stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val;
+        break;
+    case PEC_PCI_STK_PCI_FIR_MSKS:
+        stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val;
+        break;
+    case PEC_PCI_STK_PCI_FIR_ACT0:
+    case PEC_PCI_STK_PCI_FIR_ACT1:
+        stack->nest_regs[reg] = val;
+        break;
+    case PEC_PCI_STK_PCI_FIR_WOF:
+        stack->nest_regs[reg] = 0;
+        break;
+    case PEC_PCI_STK_ETU_RESET:
+        stack->nest_regs[reg] = val & 0x8000000000000000ull;
+        /* TODO: Implement reset */
+        break;
+    case PEC_PCI_STK_PBAIB_ERR_REPORT:
+        break;
+    case PEC_PCI_STK_PBAIB_TX_CMD_CRED:
+    case PEC_PCI_STK_PBAIB_TX_DAT_CRED:
+        stack->nest_regs[reg] = val;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx
+                      "=%"PRIx64"\n", addr, val);
+    }
+}
+
+static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = {
+    .read = pnv_pec_stk_pci_xscom_read,
+    .write = pnv_pec_stk_pci_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_pec_instance_init(Object *obj)
+{
+    PnvPhb4PecState *pec = PNV_PHB4_PEC(obj);
+    int i;
+
+    for (i = 0; i < PHB4_PEC_MAX_STACKS; i++) {
+        object_initialize_child(obj, "stack[*]", &pec->stacks[i],
+                                sizeof(pec->stacks[i]), TYPE_PNV_PHB4_PEC_STACK,
+                                &error_abort, NULL);
+    }
+}
+
+static void pnv_pec_realize(DeviceState *dev, Error **errp)
+{
+    PnvPhb4PecState *pec = PNV_PHB4_PEC(dev);
+    Error *local_err = NULL;
+    char name[64];
+    int i;
+
+    assert(pec->system_memory);
+
+    /* Create stacks */
+    for (i = 0; i < pec->num_stacks; i++) {
+        PnvPhb4PecStack *stack = &pec->stacks[i];
+        Object *stk_obj = OBJECT(stack);
+
+        object_property_set_int(stk_obj, i, "stack-no", &error_abort);
+        object_property_set_link(stk_obj, OBJECT(pec), "pec", &error_abort);
+        object_property_set_bool(stk_obj, true, "realized", errp);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+    /* Initialize the XSCOM regions for the PEC registers */
+    snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest", pec->chip_id,
+             pec->index);
+    pnv_xscom_region_init(&pec->nest_regs_mr, OBJECT(dev),
+                          &pnv_pec_nest_xscom_ops, pec, name,
+                          PHB4_PEC_NEST_REGS_COUNT);
+
+    snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci", pec->chip_id,
+             pec->index);
+    pnv_xscom_region_init(&pec->pci_regs_mr, OBJECT(dev),
+                          &pnv_pec_pci_xscom_ops, pec, name,
+                          PHB4_PEC_PCI_REGS_COUNT);
+}
+
+static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt,
+                            int xscom_offset)
+{
+    PnvPhb4PecState *pec = PNV_PHB4_PEC(dev);
+    PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(dev);
+    uint32_t nbase = pecc->xscom_nest_base(pec);
+    uint32_t pbase = pecc->xscom_pci_base(pec);
+    int offset, i;
+    char *name;
+    uint32_t reg[] = {
+        cpu_to_be32(nbase),
+        cpu_to_be32(pecc->xscom_nest_size),
+        cpu_to_be32(pbase),
+        cpu_to_be32(pecc->xscom_pci_size),
+    };
+
+    name = g_strdup_printf("pbcq@%x", nbase);
+    offset = fdt_add_subnode(fdt, xscom_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,pec-index", pec->index)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 1)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0)));
+    _FDT((fdt_setprop(fdt, offset, "compatible", pecc->compat,
+                      pecc->compat_size)));
+
+    for (i = 0; i < pec->num_stacks; i++) {
+        PnvPhb4PecStack *stack = &pec->stacks[i];
+        PnvPHB4 *phb = &stack->phb;
+        int stk_offset;
+
+        name = g_strdup_printf("stack@%x", i);
+        stk_offset = fdt_add_subnode(fdt, offset, name);
+        _FDT(stk_offset);
+        g_free(name);
+        _FDT((fdt_setprop(fdt, stk_offset, "compatible", pecc->stk_compat,
+                          pecc->stk_compat_size)));
+        _FDT((fdt_setprop_cell(fdt, stk_offset, "reg", i)));
+        _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", phb->phb_id)));
+    }
+
+    return 0;
+}
+
+static Property pnv_pec_properties[] = {
+        DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0),
+        DEFINE_PROP_UINT32("num-stacks", PnvPhb4PecState, num_stacks, 0),
+        DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0),
+        DEFINE_PROP_LINK("system-memory", PnvPhb4PecState, system_memory,
+                     TYPE_MEMORY_REGION, MemoryRegion *),
+        DEFINE_PROP_END_OF_LIST(),
+};
+
+static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec)
+{
+    return PNV9_XSCOM_PEC_PCI_BASE + 0x1000000 * pec->index;
+}
+
+static uint32_t pnv_pec_xscom_nest_base(PnvPhb4PecState *pec)
+{
+    return PNV9_XSCOM_PEC_NEST_BASE + 0x400 * pec->index;
+}
+
+static void pnv_pec_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+    PnvPhb4PecClass *pecc = PNV_PHB4_PEC_CLASS(klass);
+    static const char compat[] = "ibm,power9-pbcq";
+    static const char stk_compat[] = "ibm,power9-phb-stack";
+
+    xdc->dt_xscom = pnv_pec_dt_xscom;
+
+    dc->realize = pnv_pec_realize;
+    device_class_set_props(dc, pnv_pec_properties);
+    dc->user_creatable = false;
+
+    pecc->xscom_nest_base = pnv_pec_xscom_nest_base;
+    pecc->xscom_pci_base  = pnv_pec_xscom_pci_base;
+    pecc->xscom_nest_size = PNV9_XSCOM_PEC_NEST_SIZE;
+    pecc->xscom_pci_size  = PNV9_XSCOM_PEC_PCI_SIZE;
+    pecc->compat = compat;
+    pecc->compat_size = sizeof(compat);
+    pecc->stk_compat = stk_compat;
+    pecc->stk_compat_size = sizeof(stk_compat);
+}
+
+static const TypeInfo pnv_pec_type_info = {
+    .name          = TYPE_PNV_PHB4_PEC,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvPhb4PecState),
+    .instance_init = pnv_pec_instance_init,
+    .class_init    = pnv_pec_class_init,
+    .class_size    = sizeof(PnvPhb4PecClass),
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { }
+    }
+};
+
+static void pnv_pec_stk_instance_init(Object *obj)
+{
+    PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(obj);
+
+    object_initialize_child(obj, "phb", &stack->phb, sizeof(stack->phb),
+                            TYPE_PNV_PHB4, &error_abort, NULL);
+}
+
+static void pnv_pec_stk_realize(DeviceState *dev, Error **errp)
+{
+    PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(dev);
+    PnvPhb4PecState *pec = stack->pec;
+    char name[64];
+
+    assert(pec);
+
+    /* Initialize the XSCOM regions for the stack registers */
+    snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-stack-%d",
+             pec->chip_id, pec->index, stack->stack_no);
+    pnv_xscom_region_init(&stack->nest_regs_mr, OBJECT(stack),
+                          &pnv_pec_stk_nest_xscom_ops, stack, name,
+                          PHB4_PEC_NEST_STK_REGS_COUNT);
+
+    snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d",
+             pec->chip_id, pec->index, stack->stack_no);
+    pnv_xscom_region_init(&stack->pci_regs_mr, OBJECT(stack),
+                          &pnv_pec_stk_pci_xscom_ops, stack, name,
+                          PHB4_PEC_PCI_STK_REGS_COUNT);
+
+    /* PHB pass-through */
+    snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d-phb",
+             pec->chip_id, pec->index, stack->stack_no);
+    pnv_xscom_region_init(&stack->phb_regs_mr, OBJECT(&stack->phb),
+                          &pnv_phb4_xscom_ops, &stack->phb, name, 0x40);
+
+    /*
+     * Let the machine/chip realize the PHB object to customize more
+     * easily some fields
+     */
+}
+
+static Property pnv_pec_stk_properties[] = {
+        DEFINE_PROP_UINT32("stack-no", PnvPhb4PecStack, stack_no, 0),
+        DEFINE_PROP_LINK("pec", PnvPhb4PecStack, pec, TYPE_PNV_PHB4_PEC,
+                         PnvPhb4PecState *),
+        DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_pec_stk_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, pnv_pec_stk_properties);
+    dc->realize = pnv_pec_stk_realize;
+    dc->user_creatable = false;
+
+    /* TODO: reset regs ? */
+}
+
+static const TypeInfo pnv_pec_stk_type_info = {
+    .name          = TYPE_PNV_PHB4_PEC_STACK,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvPhb4PecStack),
+    .instance_init = pnv_pec_stk_instance_init,
+    .class_init    = pnv_pec_stk_class_init,
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { }
+    }
+};
+
+static void pnv_pec_register_types(void)
+{
+    type_register_static(&pnv_pec_type_info);
+    type_register_static(&pnv_pec_stk_type_info);
+}
+
+type_init(pnv_pec_register_types);
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index e27efe9a24..354828bf13 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -135,6 +135,8 @@ config XIVE_SPAPR
     default y
     depends on PSERIES
     select XIVE
+    select PCI
+    select PCIE_PORT
 
 config XIVE_KVM
     bool
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index e61994cf5a..139c857b1e 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -40,6 +40,7 @@
 #include "hw/intc/intc.h"
 #include "hw/ipmi/ipmi.h"
 #include "target/ppc/mmu-hash64.h"
+#include "hw/pci/msi.h"
 
 #include "hw/ppc/xics.h"
 #include "hw/qdev-properties.h"
@@ -615,16 +616,29 @@ static ISABus *pnv_isa_create(PnvChip *chip, Error **errp)
 static void pnv_chip_power8_pic_print_info(PnvChip *chip, Monitor *mon)
 {
     Pnv8Chip *chip8 = PNV8_CHIP(chip);
+    int i;
 
     ics_pic_print_info(&chip8->psi.ics, mon);
+    for (i = 0; i < chip->num_phbs; i++) {
+        pnv_phb3_msi_pic_print_info(&chip8->phbs[i].msis, mon);
+        ics_pic_print_info(&chip8->phbs[i].lsis, mon);
+    }
 }
 
 static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon)
 {
     Pnv9Chip *chip9 = PNV9_CHIP(chip);
+    int i, j;
 
     pnv_xive_pic_print_info(&chip9->xive, mon);
     pnv_psi_pic_print_info(&chip9->psi, mon);
+
+    for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+        PnvPhb4PecState *pec = &chip9->pecs[i];
+        for (j = 0; j < pec->num_stacks; j++) {
+            pnv_phb4_pic_print_info(&pec->stacks[j].phb, mon);
+        }
+    }
 }
 
 static uint64_t pnv_chip_power8_xscom_core_base(PnvChip *chip,
@@ -716,7 +730,7 @@ static void pnv_init(MachineState *machine)
         exit(1);
     }
 
-    fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
+    fw_size = load_image_targphys(fw_filename, pnv->fw_load_addr, FW_MAX_SIZE);
     if (fw_size < 0) {
         error_report("Could not load OPAL firmware '%s'", fw_filename);
         exit(1);
@@ -748,6 +762,9 @@ static void pnv_init(MachineState *machine)
         }
     }
 
+    /* MSIs are supported on this platform */
+    msi_nonbroken = true;
+
     /*
      * Check compatibility of the specified CPU with the machine
      * default.
@@ -1014,7 +1031,10 @@ static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
 
 static void pnv_chip_power8_instance_init(Object *obj)
 {
+    PnvChip *chip = PNV_CHIP(obj);
     Pnv8Chip *chip8 = PNV8_CHIP(obj);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+    int i;
 
     object_property_add_link(obj, "xics", TYPE_XICS_FABRIC,
                              (Object **)&chip8->xics,
@@ -1033,6 +1053,17 @@ static void pnv_chip_power8_instance_init(Object *obj)
 
     object_initialize_child(obj, "homer",  &chip8->homer, sizeof(chip8->homer),
                             TYPE_PNV8_HOMER, &error_abort, NULL);
+
+    for (i = 0; i < pcc->num_phbs; i++) {
+        object_initialize_child(obj, "phb[*]", &chip8->phbs[i],
+                                sizeof(chip8->phbs[i]), TYPE_PNV_PHB3,
+                                &error_abort, NULL);
+    }
+
+    /*
+     * Number of PHBs is the chip default
+     */
+    chip->num_phbs = pcc->num_phbs;
 }
 
 static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp)
@@ -1071,6 +1102,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
     Pnv8Chip *chip8 = PNV8_CHIP(dev);
     Pnv8Psi *psi8 = &chip8->psi;
     Error *local_err = NULL;
+    int i;
 
     assert(chip8->xics);
 
@@ -1151,6 +1183,33 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
     /* Homer mmio region */
     memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip),
                                 &chip8->homer.regs);
+
+    /* PHB3 controllers */
+    for (i = 0; i < chip->num_phbs; i++) {
+        PnvPHB3 *phb = &chip8->phbs[i];
+        PnvPBCQState *pbcq = &phb->pbcq;
+
+        object_property_set_int(OBJECT(phb), i, "index", &error_fatal);
+        object_property_set_int(OBJECT(phb), chip->chip_id, "chip-id",
+                                &error_fatal);
+        object_property_set_bool(OBJECT(phb), true, "realized", &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+        qdev_set_parent_bus(DEVICE(phb), sysbus_get_default());
+
+        /* Populate the XSCOM address space. */
+        pnv_xscom_add_subregion(chip,
+                                PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id,
+                                &pbcq->xscom_nest_regs);
+        pnv_xscom_add_subregion(chip,
+                                PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id,
+                                &pbcq->xscom_pci_regs);
+        pnv_xscom_add_subregion(chip,
+                                PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id,
+                                &pbcq->xscom_spci_regs);
+    }
 }
 
 static uint32_t pnv_chip_power8_xscom_pcba(PnvChip *chip, uint64_t addr)
@@ -1166,6 +1225,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
 
     k->chip_cfam_id = 0x221ef04980000000ull;  /* P8 Murano DD2.1 */
     k->cores_mask = POWER8E_CORE_MASK;
+    k->num_phbs = 3;
     k->core_pir = pnv_chip_core_pir_p8;
     k->intc_create = pnv_chip_power8_intc_create;
     k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1189,6 +1249,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
 
     k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
     k->cores_mask = POWER8_CORE_MASK;
+    k->num_phbs = 3;
     k->core_pir = pnv_chip_core_pir_p8;
     k->intc_create = pnv_chip_power8_intc_create;
     k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1212,6 +1273,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
 
     k->chip_cfam_id = 0x120d304980000000ull;  /* P8 Naples DD1.0 */
     k->cores_mask = POWER8_CORE_MASK;
+    k->num_phbs = 3;
     k->core_pir = pnv_chip_core_pir_p8;
     k->intc_create = pnv_chip_power8_intc_create;
     k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1230,7 +1292,10 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
 
 static void pnv_chip_power9_instance_init(Object *obj)
 {
+    PnvChip *chip = PNV_CHIP(obj);
     Pnv9Chip *chip9 = PNV9_CHIP(obj);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+    int i;
 
     object_initialize_child(obj, "xive", &chip9->xive, sizeof(chip9->xive),
                             TYPE_PNV_XIVE, &error_abort, NULL);
@@ -1248,6 +1313,17 @@ static void pnv_chip_power9_instance_init(Object *obj)
 
     object_initialize_child(obj, "homer",  &chip9->homer, sizeof(chip9->homer),
                             TYPE_PNV9_HOMER, &error_abort, NULL);
+
+    for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+        object_initialize_child(obj, "pec[*]", &chip9->pecs[i],
+                                sizeof(chip9->pecs[i]), TYPE_PNV_PHB4_PEC,
+                                &error_abort, NULL);
+    }
+
+    /*
+     * Number of PHBs is the chip default
+     */
+    chip->num_phbs = pcc->num_phbs;
 }
 
 static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
@@ -1276,6 +1352,78 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
     }
 }
 
+static void pnv_chip_power9_phb_realize(PnvChip *chip, Error **errp)
+{
+    Pnv9Chip *chip9 = PNV9_CHIP(chip);
+    Error *local_err = NULL;
+    int i, j;
+    int phb_id = 0;
+
+    for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+        PnvPhb4PecState *pec = &chip9->pecs[i];
+        PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+        uint32_t pec_nest_base;
+        uint32_t pec_pci_base;
+
+        object_property_set_int(OBJECT(pec), i, "index", &error_fatal);
+        /*
+         * PEC0 -> 1 stack
+         * PEC1 -> 2 stacks
+         * PEC2 -> 3 stacks
+         */
+        object_property_set_int(OBJECT(pec), i + 1, "num-stacks",
+                                &error_fatal);
+        object_property_set_int(OBJECT(pec), chip->chip_id, "chip-id",
+                                 &error_fatal);
+        object_property_set_link(OBJECT(pec), OBJECT(get_system_memory()),
+                                 "system-memory", &error_abort);
+        object_property_set_bool(OBJECT(pec), true, "realized", &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        pec_nest_base = pecc->xscom_nest_base(pec);
+        pec_pci_base = pecc->xscom_pci_base(pec);
+
+        pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr);
+        pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr);
+
+        for (j = 0; j < pec->num_stacks && phb_id < chip->num_phbs;
+             j++, phb_id++) {
+            PnvPhb4PecStack *stack = &pec->stacks[j];
+            Object *obj = OBJECT(&stack->phb);
+
+            object_property_set_int(obj, phb_id, "index", &error_fatal);
+            object_property_set_int(obj, chip->chip_id, "chip-id",
+                                    &error_fatal);
+            object_property_set_int(obj, PNV_PHB4_VERSION, "version",
+                                    &error_fatal);
+            object_property_set_int(obj, PNV_PHB4_DEVICE_ID, "device-id",
+                                    &error_fatal);
+            object_property_set_link(obj, OBJECT(stack), "stack", &error_abort);
+            object_property_set_bool(obj, true, "realized", &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
+            qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());
+
+            /* Populate the XSCOM address space. */
+            pnv_xscom_add_subregion(chip,
+                                   pec_nest_base + 0x40 * (stack->stack_no + 1),
+                                   &stack->nest_regs_mr);
+            pnv_xscom_add_subregion(chip,
+                                    pec_pci_base + 0x40 * (stack->stack_no + 1),
+                                    &stack->pci_regs_mr);
+            pnv_xscom_add_subregion(chip,
+                                    pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 +
+                                    0x40 * stack->stack_no,
+                                    &stack->phb_regs_mr);
+        }
+    }
+}
+
 static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
 {
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
@@ -1378,6 +1526,13 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
     /* Homer mmio region */
     memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip),
                                 &chip9->homer.regs);
+
+    /* PHBs */
+    pnv_chip_power9_phb_realize(chip, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 }
 
 static uint32_t pnv_chip_power9_xscom_pcba(PnvChip *chip, uint64_t addr)
@@ -1404,6 +1559,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
     k->xscom_core_base = pnv_chip_power9_xscom_core_base;
     k->xscom_pcba = pnv_chip_power9_xscom_pcba;
     dc->desc = "PowerNV Chip POWER9";
+    k->num_phbs = 6;
 
     device_class_set_parent_realize(dc, pnv_chip_power9_realize,
                                     &k->parent_realize);
@@ -1533,6 +1689,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
     const char *typename = pnv_chip_core_typename(chip);
     int i, core_hwid;
+    PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
 
     if (!object_class_by_name(typename)) {
         error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
@@ -1571,6 +1728,8 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
         object_property_set_int(OBJECT(pnv_core),
                                 pcc->core_pir(chip, core_hwid),
                                 "pir", &error_fatal);
+        object_property_set_int(OBJECT(pnv_core), pnv->fw_load_addr,
+                                "hrmor", &error_fatal);
         object_property_set_link(OBJECT(pnv_core), OBJECT(chip), "chip",
                                  &error_abort);
         object_property_set_bool(OBJECT(pnv_core), true, "realized",
@@ -1605,6 +1764,7 @@ static Property pnv_chip_properties[] = {
     DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
     DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
     DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1),
+    DEFINE_PROP_UINT32("num-phbs", PnvChip, num_phbs, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1638,14 +1798,23 @@ PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir)
 static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
 {
     PnvMachineState *pnv = PNV_MACHINE(xi);
-    int i;
+    int i, j;
 
     for (i = 0; i < pnv->num_chips; i++) {
+        PnvChip *chip = pnv->chips[i];
         Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
 
         if (ics_valid_irq(&chip8->psi.ics, irq)) {
             return &chip8->psi.ics;
         }
+        for (j = 0; j < chip->num_phbs; j++) {
+            if (ics_valid_irq(&chip8->phbs[j].lsis, irq)) {
+                return &chip8->phbs[j].lsis;
+            }
+            if (ics_valid_irq(ICS(&chip8->phbs[j].msis), irq)) {
+                return ICS(&chip8->phbs[j].msis);
+            }
+        }
     }
     return NULL;
 }
@@ -1653,11 +1822,17 @@ static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
 static void pnv_ics_resend(XICSFabric *xi)
 {
     PnvMachineState *pnv = PNV_MACHINE(xi);
-    int i;
+    int i, j;
 
     for (i = 0; i < pnv->num_chips; i++) {
+        PnvChip *chip = pnv->chips[i];
         Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
+
         ics_resend(&chip8->psi.ics);
+        for (j = 0; j < chip->num_phbs; j++) {
+            ics_resend(&chip8->phbs[j].lsis);
+            ics_resend(ICS(&chip8->phbs[j].msis));
+        }
     }
 }
 
@@ -1767,6 +1942,22 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data)
     pmc->dt_power_mgt = pnv_dt_power_mgt;
 }
 
+static bool pnv_machine_get_hb(Object *obj, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+
+    return !!pnv->fw_load_addr;
+}
+
+static void pnv_machine_set_hb(Object *obj, bool value, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+
+    if (value) {
+        pnv->fw_load_addr = 0x8000000;
+    }
+}
+
 static void pnv_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -1786,6 +1977,13 @@ static void pnv_machine_class_init(ObjectClass *oc, void *data)
      */
     mc->default_ram_size = INITRD_LOAD_ADDR + INITRD_MAX_SIZE;
     ispc->print_info = pnv_pic_print_info;
+
+    object_class_property_add_bool(oc, "hb-mode",
+                                   pnv_machine_get_hb, pnv_machine_set_hb,
+                                   &error_abort);
+    object_class_property_set_description(oc, "hb-mode",
+                              "Use a hostboot like boot loader",
+                              NULL);
 }
 
 #define DEFINE_PNV8_CHIP_TYPE(type, class_initfn) \
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 8ca5fbd1a9..234562040d 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -40,11 +40,11 @@ static const char *pnv_core_cpu_typename(PnvCore *pc)
     return cpu_type;
 }
 
-static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip)
+static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
 
     cpu_reset(cs);
 
@@ -56,7 +56,9 @@ static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip)
     env->nip = 0x10;
     env->msr |= MSR_HVB; /* Hypervisor mode */
 
-    pcc->intc_reset(chip, cpu);
+    env->spr[SPR_HRMOR] = pc->hrmor;
+
+    pcc->intc_reset(pc->chip, cpu);
 }
 
 /*
@@ -162,14 +164,14 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = {
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
-static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp)
+static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp)
 {
     CPUPPCState *env = &cpu->env;
     int core_pir;
     int thread_index = 0; /* TODO: TCG supports only one thread */
     ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
     Error *local_err = NULL;
-    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
 
     object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
     if (local_err) {
@@ -177,13 +179,13 @@ static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp)
         return;
     }
 
-    pcc->intc_create(chip, cpu, &local_err);
+    pcc->intc_create(pc->chip, cpu, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
     }
 
-    core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", &error_abort);
+    core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort);
 
     /*
      * The PIR of a thread is the core PIR + the thread index. We will
@@ -203,7 +205,7 @@ static void pnv_core_reset(void *dev)
     int i;
 
     for (i = 0; i < cc->nr_threads; i++) {
-        pnv_core_cpu_reset(pc->threads[i], pc->chip);
+        pnv_core_cpu_reset(pc, pc->threads[i]);
     }
 }
 
@@ -231,8 +233,6 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
 
         snprintf(name, sizeof(name), "thread[%d]", i);
         object_property_add_child(OBJECT(pc), name, obj, &error_abort);
-        object_property_add_alias(obj, "core-pir", OBJECT(pc),
-                                  "pir", &error_abort);
 
         cpu->machine_data = g_new0(PnvCPUState, 1);
 
@@ -240,7 +240,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
     }
 
     for (j = 0; j < cc->nr_threads; j++) {
-        pnv_core_cpu_realize(pc->threads[j], pc->chip, &local_err);
+        pnv_core_cpu_realize(pc, pc->threads[j], &local_err);
         if (local_err) {
             goto err;
         }
@@ -263,12 +263,12 @@ err:
     error_propagate(errp, local_err);
 }
 
-static void pnv_core_cpu_unrealize(PowerPCCPU *cpu, PnvChip *chip)
+static void pnv_core_cpu_unrealize(PnvCore *pc, PowerPCCPU *cpu)
 {
     PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
-    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
 
-    pcc->intc_destroy(chip, cpu);
+    pcc->intc_destroy(pc->chip, cpu);
     cpu_remove_sync(CPU(cpu));
     cpu->machine_data = NULL;
     g_free(pnv_cpu);
@@ -284,13 +284,14 @@ static void pnv_core_unrealize(DeviceState *dev, Error **errp)
     qemu_unregister_reset(pnv_core_reset, pc);
 
     for (i = 0; i < cc->nr_threads; i++) {
-        pnv_core_cpu_unrealize(pc->threads[i], pc->chip);
+        pnv_core_cpu_unrealize(pc, pc->threads[i]);
     }
     g_free(pc->threads);
 }
 
 static Property pnv_core_properties[] = {
     DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
+    DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0),
     DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -324,6 +325,7 @@ static void pnv_core_class_init(ObjectClass *oc, void *data)
     dc->realize = pnv_core_realize;
     dc->unrealize = pnv_core_unrealize;
     device_class_set_props(dc, pnv_core_properties);
+    dc->user_creatable = false;
 }
 
 #define DEFINE_PNV_CORE_TYPE(family, cpu_model) \
@@ -422,6 +424,7 @@ static void pnv_quad_class_init(ObjectClass *oc, void *data)
 
     dc->realize = pnv_quad_realize;
     device_class_set_props(dc, pnv_quad_properties);
+    dc->user_creatable = false;
 }
 
 static const TypeInfo pnv_quad_info = {
diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c
index 93ae42f7e4..9a262629b7 100644
--- a/hw/ppc/pnv_homer.c
+++ b/hw/ppc/pnv_homer.c
@@ -360,6 +360,7 @@ static void pnv_homer_class_init(ObjectClass *klass, void *data)
     dc->realize = pnv_homer_realize;
     dc->desc = "PowerNV HOMER Memory";
     device_class_set_props(dc, pnv_homer_properties);
+    dc->user_creatable = false;
 }
 
 static const TypeInfo pnv_homer_type_info = {
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index 22b205532b..5989d723c5 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -762,6 +762,7 @@ static void pnv_lpc_class_init(ObjectClass *klass, void *data)
     dc->realize = pnv_lpc_realize;
     dc->desc = "PowerNV LPC Controller";
     device_class_set_props(dc, pnv_lpc_properties);
+    dc->user_creatable = false;
 }
 
 static const TypeInfo pnv_lpc_info = {
@@ -825,6 +826,7 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp)
     qemu_irq *irqs;
     qemu_irq_handler handler;
     PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+    bool hostboot_mode = !!pnv->fw_load_addr;
 
     /* let isa_bus_new() create its own bridge on SysBus otherwise
      * devices speficied on the command line won't find the bus and
@@ -859,7 +861,9 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp)
      * Start disabled. The HIOMAP protocol will activate the mapping
      * with HIOMAP_C_CREATE_WRITE_WINDOW
      */
-    memory_region_set_enabled(&pnv->pnor->mmio, false);
+    if (!hostboot_mode) {
+        memory_region_set_enabled(&pnv->pnor->mmio, false);
+    }
 
     return isa_bus;
 }
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
index 2173fac0e7..5a716c256e 100644
--- a/hw/ppc/pnv_occ.c
+++ b/hw/ppc/pnv_occ.c
@@ -280,6 +280,7 @@ static void pnv_occ_class_init(ObjectClass *klass, void *data)
     dc->realize = pnv_occ_realize;
     dc->desc = "PowerNV OCC Controller";
     device_class_set_props(dc, pnv_occ_properties);
+    dc->user_creatable = false;
 }
 
 static const TypeInfo pnv_occ_type_info = {
diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c
index f761d8dc26..c365ee58b8 100644
--- a/hw/ppc/pnv_pnor.c
+++ b/hw/ppc/pnv_pnor.c
@@ -11,6 +11,7 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
+#include "qemu/units.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "hw/loader.h"
@@ -46,7 +47,8 @@ static void pnv_pnor_update(PnvPnor *s, int offset, int size)
     ret = blk_pwrite(s->blk, offset, s->storage + offset,
                      offset_end - offset, 0);
     if (ret < 0) {
-        error_report("Could not update PNOR: %s", strerror(-ret));
+        error_report("Could not update PNOR offset=0x%" PRIx32" : %s", offset,
+                     strerror(-ret));
     }
 }
 
@@ -111,7 +113,7 @@ static void pnv_pnor_realize(DeviceState *dev, Error **errp)
 }
 
 static Property pnv_pnor_properties[] = {
-    DEFINE_PROP_INT64("size", PnvPnor, size, 128 << 20),
+    DEFINE_PROP_INT64("size", PnvPnor, size, 128 * MiB),
     DEFINE_PROP_DRIVE("drive", PnvPnor, blk),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 4c5fa29399..4a11fb1640 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1490,24 +1490,6 @@ int ppc_dcr_init (CPUPPCState *env, int (*read_error)(int dcrn),
 }
 
 /*****************************************************************************/
-/* Debug port */
-void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val)
-{
-    addr &= 0xF;
-    switch (addr) {
-    case 0:
-        printf("%c", val);
-        break;
-    case 1:
-        printf("\n");
-        fflush(stdout);
-        break;
-    case 2:
-        printf("Set loglevel to %04" PRIx32 "\n", val);
-        qemu_set_log(val | 0x100);
-        break;
-    }
-}
 
 int ppc_cpu_pir(PowerPCCPU *cpu)
 {
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 862345c2ac..111cc80867 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -42,7 +42,7 @@
 #include "hw/loader.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "hw/isa/pc87312.h"
-#include "hw/net/ne2000-isa.h"
+#include "hw/qdev-properties.h"
 #include "sysemu/arch_init.h"
 #include "sysemu/kvm.h"
 #include "sysemu/qtest.h"
@@ -60,178 +60,9 @@
 
 #define CFG_ADDR 0xf0000510
 
-#define BIOS_SIZE (1 * MiB)
-#define BIOS_FILENAME "ppc_rom.bin"
 #define KERNEL_LOAD_ADDR 0x01000000
 #define INITRD_LOAD_ADDR 0x01800000
 
-/* Constants for devices init */
-static const int ide_iobase[2] = { 0x1f0, 0x170 };
-static const int ide_iobase2[2] = { 0x3f6, 0x376 };
-static const int ide_irq[2] = { 13, 13 };
-
-#define NE2000_NB_MAX 6
-
-static uint32_t ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 };
-static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
-
-/* ISA IO ports bridge */
-#define PPC_IO_BASE 0x80000000
-
-/* Fake super-io ports for PREP platform (Intel 82378ZB) */
-typedef struct sysctrl_t {
-    qemu_irq reset_irq;
-    Nvram *nvram;
-    uint8_t state;
-    uint8_t syscontrol;
-    int contiguous_map;
-    qemu_irq contiguous_map_irq;
-    int endian;
-} sysctrl_t;
-
-enum {
-    STATE_HARDFILE = 0x01,
-};
-
-static sysctrl_t *sysctrl;
-
-static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val)
-{
-    sysctrl_t *sysctrl = opaque;
-
-    trace_prep_io_800_writeb(addr - PPC_IO_BASE, val);
-    switch (addr) {
-    case 0x0092:
-        /* Special port 92 */
-        /* Check soft reset asked */
-        if (val & 0x01) {
-            qemu_irq_raise(sysctrl->reset_irq);
-        } else {
-            qemu_irq_lower(sysctrl->reset_irq);
-        }
-        /* Check LE mode */
-        if (val & 0x02) {
-            sysctrl->endian = 1;
-        } else {
-            sysctrl->endian = 0;
-        }
-        break;
-    case 0x0800:
-        /* Motorola CPU configuration register : read-only */
-        break;
-    case 0x0802:
-        /* Motorola base module feature register : read-only */
-        break;
-    case 0x0803:
-        /* Motorola base module status register : read-only */
-        break;
-    case 0x0808:
-        /* Hardfile light register */
-        if (val & 1)
-            sysctrl->state |= STATE_HARDFILE;
-        else
-            sysctrl->state &= ~STATE_HARDFILE;
-        break;
-    case 0x0810:
-        /* Password protect 1 register */
-        if (sysctrl->nvram != NULL) {
-            NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram);
-            (k->toggle_lock)(sysctrl->nvram, 1);
-        }
-        break;
-    case 0x0812:
-        /* Password protect 2 register */
-        if (sysctrl->nvram != NULL) {
-            NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram);
-            (k->toggle_lock)(sysctrl->nvram, 2);
-        }
-        break;
-    case 0x0814:
-        /* L2 invalidate register */
-        //        tlb_flush(first_cpu, 1);
-        break;
-    case 0x081C:
-        /* system control register */
-        sysctrl->syscontrol = val & 0x0F;
-        break;
-    case 0x0850:
-        /* I/O map type register */
-        sysctrl->contiguous_map = val & 0x01;
-        qemu_set_irq(sysctrl->contiguous_map_irq, sysctrl->contiguous_map);
-        break;
-    default:
-        printf("ERROR: unaffected IO port write: %04" PRIx32
-               " => %02" PRIx32"\n", addr, val);
-        break;
-    }
-}
-
-static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr)
-{
-    sysctrl_t *sysctrl = opaque;
-    uint32_t retval = 0xFF;
-
-    switch (addr) {
-    case 0x0092:
-        /* Special port 92 */
-        retval = sysctrl->endian << 1;
-        break;
-    case 0x0800:
-        /* Motorola CPU configuration register */
-        retval = 0xEF; /* MPC750 */
-        break;
-    case 0x0802:
-        /* Motorola Base module feature register */
-        retval = 0xAD; /* No ESCC, PMC slot neither ethernet */
-        break;
-    case 0x0803:
-        /* Motorola base module status register */
-        retval = 0xE0; /* Standard MPC750 */
-        break;
-    case 0x080C:
-        /* Equipment present register:
-         *  no L2 cache
-         *  no upgrade processor
-         *  no cards in PCI slots
-         *  SCSI fuse is bad
-         */
-        retval = 0x3C;
-        break;
-    case 0x0810:
-        /* Motorola base module extended feature register */
-        retval = 0x39; /* No USB, CF and PCI bridge. NVRAM present */
-        break;
-    case 0x0814:
-        /* L2 invalidate: don't care */
-        break;
-    case 0x0818:
-        /* Keylock */
-        retval = 0x00;
-        break;
-    case 0x081C:
-        /* system control register
-         * 7 - 6 / 1 - 0: L2 cache enable
-         */
-        retval = sysctrl->syscontrol;
-        break;
-    case 0x0823:
-        /* */
-        retval = 0x03; /* no L2 cache */
-        break;
-    case 0x0850:
-        /* I/O map type register */
-        retval = sysctrl->contiguous_map;
-        break;
-    default:
-        printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr);
-        break;
-    }
-    trace_prep_io_800_readb(addr - PPC_IO_BASE, retval);
-
-    return retval;
-}
-
-
 #define NVRAM_SIZE        0x2000
 
 static void fw_cfg_boot_set(void *opaque, const char *boot_device,
@@ -247,17 +78,6 @@ static void ppc_prep_reset(void *opaque)
     cpu_reset(CPU(cpu));
 }
 
-static const MemoryRegionPortio prep_portio_list[] = {
-    /* System control ports */
-    { 0x0092, 1, 1, .read = PREP_io_800_readb, .write = PREP_io_800_writeb, },
-    { 0x0800, 0x52, 1,
-      .read = PREP_io_800_readb, .write = PREP_io_800_writeb, },
-    /* Special port to get debug messages from Open-Firmware */
-    { 0x0F00, 4, 1, .write = PPC_debug_write, },
-    PORTIO_END_OF_LIST(),
-};
-
-static PortioList prep_port_list;
 
 /*****************************************************************************/
 /* NVRAM helpers */
@@ -397,207 +217,6 @@ static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size,
     return 0;
 }
 
-/* PowerPC PREP hardware initialisation */
-static void ppc_prep_init(MachineState *machine)
-{
-    ram_addr_t ram_size = machine->ram_size;
-    const char *kernel_filename = machine->kernel_filename;
-    const char *kernel_cmdline = machine->kernel_cmdline;
-    const char *initrd_filename = machine->initrd_filename;
-    const char *boot_device = machine->boot_order;
-    MemoryRegion *sysmem = get_system_memory();
-    PowerPCCPU *cpu = NULL;
-    CPUPPCState *env = NULL;
-    Nvram *m48t59;
-#if 0
-    MemoryRegion *xcsr = g_new(MemoryRegion, 1);
-#endif
-    int linux_boot, i, nb_nics1;
-    MemoryRegion *ram = g_new(MemoryRegion, 1);
-    uint32_t kernel_base, initrd_base;
-    long kernel_size, initrd_size;
-    DeviceState *dev;
-    PCIHostState *pcihost;
-    PCIBus *pci_bus;
-    PCIDevice *pci;
-    ISABus *isa_bus;
-    ISADevice *isa;
-    int ppc_boot_device;
-    DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
-
-    sysctrl = g_malloc0(sizeof(sysctrl_t));
-
-    linux_boot = (kernel_filename != NULL);
-
-    /* init CPUs */
-    for (i = 0; i < machine->smp.cpus; i++) {
-        cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
-        env = &cpu->env;
-
-        if (env->flags & POWERPC_FLAG_RTC_CLK) {
-            /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */
-            cpu_ppc_tb_init(env, 7812500UL);
-        } else {
-            /* Set time-base frequency to 100 Mhz */
-            cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
-        }
-        qemu_register_reset(ppc_prep_reset, cpu);
-    }
-
-    /* allocate RAM */
-    memory_region_allocate_system_memory(ram, NULL, "ppc_prep.ram", ram_size);
-    memory_region_add_subregion(sysmem, 0, ram);
-
-    if (linux_boot) {
-        kernel_base = KERNEL_LOAD_ADDR;
-        /* now we can load the kernel */
-        kernel_size = load_image_targphys(kernel_filename, kernel_base,
-                                          ram_size - kernel_base);
-        if (kernel_size < 0) {
-            error_report("could not load kernel '%s'", kernel_filename);
-            exit(1);
-        }
-        /* load initrd */
-        if (initrd_filename) {
-            initrd_base = INITRD_LOAD_ADDR;
-            initrd_size = load_image_targphys(initrd_filename, initrd_base,
-                                              ram_size - initrd_base);
-            if (initrd_size < 0) {
-                error_report("could not load initial ram disk '%s'",
-                             initrd_filename);
-                exit(1);
-            }
-        } else {
-            initrd_base = 0;
-            initrd_size = 0;
-        }
-        ppc_boot_device = 'm';
-    } else {
-        kernel_base = 0;
-        kernel_size = 0;
-        initrd_base = 0;
-        initrd_size = 0;
-        ppc_boot_device = '\0';
-        /* For now, OHW cannot boot from the network. */
-        for (i = 0; boot_device[i] != '\0'; i++) {
-            if (boot_device[i] >= 'a' && boot_device[i] <= 'f') {
-                ppc_boot_device = boot_device[i];
-                break;
-            }
-        }
-        if (ppc_boot_device == '\0') {
-            error_report("No valid boot device for Mac99 machine");
-            exit(1);
-        }
-    }
-
-    if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
-        error_report("Only 6xx bus is supported on PREP machine");
-        exit(1);
-    }
-
-    dev = qdev_create(NULL, "raven-pcihost");
-    if (bios_name == NULL) {
-        bios_name = BIOS_FILENAME;
-    }
-    qdev_prop_set_string(dev, "bios-name", bios_name);
-    qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE);
-    qdev_prop_set_bit(dev, "is-legacy-prep", true);
-    pcihost = PCI_HOST_BRIDGE(dev);
-    object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev), NULL);
-    qdev_init_nofail(dev);
-    pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
-    if (pci_bus == NULL) {
-        error_report("Couldn't create PCI host controller");
-        exit(1);
-    }
-    sysctrl->contiguous_map_irq = qdev_get_gpio_in(dev, 0);
-
-    /* PCI -> ISA bridge */
-    pci = pci_create_simple(pci_bus, PCI_DEVFN(1, 0), "i82378");
-    cpu = POWERPC_CPU(first_cpu);
-    qdev_connect_gpio_out(&pci->qdev, 0,
-                          cpu->env.irq_inputs[PPC6xx_INPUT_INT]);
-    sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9));
-    sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11));
-    sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9));
-    sysbus_connect_irq(&pcihost->busdev, 3, qdev_get_gpio_in(&pci->qdev, 11));
-    isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci), "isa.0"));
-
-    /* Super I/O (parallel + serial ports) */
-    isa = isa_create(isa_bus, TYPE_PC87312_SUPERIO);
-    dev = DEVICE(isa);
-    qdev_prop_set_uint8(dev, "config", 13); /* fdc, ser0, ser1, par0 */
-    qdev_init_nofail(dev);
-
-    /* init basic PC hardware */
-    pci_vga_init(pci_bus);
-
-    nb_nics1 = nb_nics;
-    if (nb_nics1 > NE2000_NB_MAX)
-        nb_nics1 = NE2000_NB_MAX;
-    for(i = 0; i < nb_nics1; i++) {
-        if (nd_table[i].model == NULL) {
-            nd_table[i].model = g_strdup("ne2k_isa");
-        }
-        if (strcmp(nd_table[i].model, "ne2k_isa") == 0) {
-            isa_ne2000_init(isa_bus, ne2000_io[i], ne2000_irq[i],
-                            &nd_table[i]);
-        } else {
-            pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
-        }
-    }
-
-    ide_drive_get(hd, ARRAY_SIZE(hd));
-    for(i = 0; i < MAX_IDE_BUS; i++) {
-        isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], ide_irq[i],
-                     hd[2 * i],
-                     hd[2 * i + 1]);
-    }
-
-    cpu = POWERPC_CPU(first_cpu);
-    sysctrl->reset_irq = cpu->env.irq_inputs[PPC6xx_INPUT_HRESET];
-
-    portio_list_init(&prep_port_list, NULL, prep_portio_list, sysctrl, "prep");
-    portio_list_add(&prep_port_list, isa_address_space_io(isa), 0x0);
-
-    /*
-     * PowerPC control and status register group: unimplemented,
-     * would be at address 0xFEFF0000.
-     */
-
-    if (machine_usb(machine)) {
-        pci_create_simple(pci_bus, -1, "pci-ohci");
-    }
-
-    m48t59 = m48t59_init_isa(isa_bus, 0x0074, NVRAM_SIZE, 2000, 59);
-    if (m48t59 == NULL)
-        return;
-    sysctrl->nvram = m48t59;
-
-    /* Initialise NVRAM */
-    PPC_NVRAM_set_params(m48t59, NVRAM_SIZE, "PREP", ram_size,
-                         ppc_boot_device,
-                         kernel_base, kernel_size,
-                         kernel_cmdline,
-                         initrd_base, initrd_size,
-                         /* XXX: need an option to load a NVRAM image */
-                         0,
-                         graphic_width, graphic_height, graphic_depth);
-}
-
-static void prep_machine_init(MachineClass *mc)
-{
-    mc->deprecation_reason = "use 40p machine type instead";
-    mc->desc = "PowerPC PREP platform";
-    mc->init = ppc_prep_init;
-    mc->block_default_type = IF_IDE;
-    mc->max_cpus = MAX_CPUS;
-    mc->default_boot_order = "cad";
-    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("602");
-    mc->default_display = "std";
-}
-
 static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
 {
     uint16_t checksum = *(uint16_t *)opaque;
@@ -821,4 +440,3 @@ static void ibm_40p_machine_init(MachineClass *mc)
 }
 
 DEFINE_MACHINE("40p", ibm_40p_machine_init)
-DEFINE_MACHINE("prep", prep_machine_init)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a0076e5fbd..c9b2e0a5e0 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
 #include "migration/qemu-file-types.h"
 #include "migration/global_state.h"
 #include "migration/register.h"
+#include "migration/blocker.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
 #include "cpu-models.h"
@@ -1677,6 +1678,14 @@ static void spapr_machine_reset(MachineState *machine)
     first_ppc_cpu->env.gpr[5] = 0;
 
     spapr->cas_reboot = false;
+
+    spapr->mc_status = -1;
+    spapr->guest_machine_check_addr = -1;
+
+    /* Signal all vCPUs waiting on this condition */
+    qemu_cond_broadcast(&spapr->mc_delivery_cond);
+
+    migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -1959,6 +1968,42 @@ static const VMStateDescription vmstate_spapr_dtb = {
     },
 };
 
+static bool spapr_fwnmi_needed(void *opaque)
+{
+    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+    return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+    /*
+     * Check if machine check handling is in progress and print a
+     * warning message.
+     */
+    if (spapr->mc_status != -1) {
+        warn_report("A machine check is being handled during migration. The"
+                "handler may run and log hardware error on the destination");
+    }
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+    .name = "spapr_machine_check",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_fwnmi_needed,
+    .pre_save = spapr_fwnmi_pre_save,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+        VMSTATE_INT32(mc_status, SpaprMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_spapr = {
     .name = "spapr",
     .version_id = 3,
@@ -1992,6 +2037,8 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_dtb,
         &vmstate_spapr_cap_large_decr,
         &vmstate_spapr_cap_ccf_assist,
+        &vmstate_spapr_cap_fwnmi,
+        &vmstate_spapr_machine_check,
         NULL
     }
 };
@@ -2807,6 +2854,13 @@ static void spapr_machine_init(MachineState *machine)
         spapr_create_lmb_dr_connectors(spapr);
     }
 
+    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
+        /* Create the error string for live migration blocker */
+        error_setg(&spapr->fwnmi_migration_blocker,
+            "A machine check is being handled during migration. The handler"
+            "may run and log hardware error on the destination");
+    }
+
     /* Set up RTAS event infrastructure */
     spapr_events_init(spapr);
 
@@ -2970,6 +3024,8 @@ static void spapr_machine_init(MachineState *machine)
 
         kvmppc_spapr_enable_inkernel_multitce();
     }
+
+    qemu_cond_init(&spapr->mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
@@ -4397,7 +4453,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
-    smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
     spapr_caps_add_properties(smc, &error_abort);
     smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
@@ -4465,8 +4522,12 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
  */
 static void spapr_machine_4_2_class_options(MachineClass *mc)
 {
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
     spapr_machine_5_0_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
+    smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..8b27d3ac09 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -485,17 +485,48 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
     uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist();
 
     if (tcg_enabled() && val) {
-        /* TODO - for now only allow broken for TCG */
-        error_setg(errp,
-"Requested count cache flush assist capability level not supported by tcg,"
-                   " try appending -machine cap-ccf-assist=off");
+        /* TCG doesn't implement anything here, but allow with a warning */
+        warn_report("TCG doesn't support requested feature, cap-ccf-assist=on");
     } else if (kvm_enabled() && (val > kvm_val)) {
+        uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch();
+
+        if (kvm_ibs == SPAPR_CAP_FIXED_CCD) {
+            /*
+             * If we don't have CCF assist on the host, the assist
+             * instruction is a harmless no-op.  It won't correctly
+             * implement the cache count flush *but* if we have
+             * count-cache-disabled in the host, that flush is
+             * unnnecessary.  So, specifically allow this case.  This
+             * allows us to have better performance on POWER9 DD2.3,
+             * while still working on POWER9 DD2.2 and POWER8 host
+             * cpus.
+             */
+            return;
+        }
         error_setg(errp,
 "Requested count cache flush assist capability level not supported by kvm,"
                    " try appending -machine cap-ccf-assist=off");
     }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+                                Error **errp)
+{
+    if (!val) {
+        return; /* Disabled by default */
+    }
+
+    if (tcg_enabled()) {
+        warn_report("Firmware Assisted Non-Maskable Interrupts(FWNMI) not "
+                    "supported in TCG");
+    } else if (kvm_enabled()) {
+        if (kvmppc_set_fwnmi() < 0) {
+            error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) "
+                             "not supported by KVM");
+        }
+    }
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -595,6 +626,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_ccf_assist_apply,
     },
+    [SPAPR_CAP_FWNMI_MCE] = {
+        .name = "fwnmi-mce",
+        .description = "Handle fwnmi machine check exceptions",
+        .index = SPAPR_CAP_FWNMI_MCE,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_fwnmi_mce_apply,
+    },
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +774,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e355e000d0..884e455f02 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,8 +40,10 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include <libfdt.h>
+#include "migration/blocker.h"
 
 #define RTAS_LOG_VERSION_MASK                   0xff000000
 #define   RTAS_LOG_VERSION_6                    0x06000000
@@ -213,6 +215,104 @@ struct hp_extended_log {
     struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
+    struct rtas_event_log_v6_section_header hdr;
+    uint32_t fru_id;
+    uint32_t proc_id;
+    uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE                           0
+#define RTAS_LOG_V6_MC_TYPE_SLB                          1
+#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
+#define RTAS_LOG_V6_MC_TYPE_TLB                          4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
+    uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
+#define RTAS_LOG_V6_MC_UE_IFETCH                         1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
+#define RTAS_LOG_V6_MC_SLB_PARITY                        0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
+#define RTAS_LOG_V6_MC_TLB_PARITY                        1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
+    uint8_t reserved_1[6];
+    uint64_t effective_address;
+    uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+    struct rtas_event_log_v6 v6hdr;
+    struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+    unsigned long srr1_mask;
+    unsigned long srr1_value;
+    bool nip_valid; /* nip is a valid indicator of faulting address */
+    uint8_t error_type;
+    uint8_t error_subtype;
+    unsigned int initiator;
+    unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+    unsigned long dsisr_value;
+    bool dar_valid; /* dar is a valid indicator of faulting address */
+    uint8_t error_type;
+    uint8_t error_subtype;
+    unsigned int initiator;
+    unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x00008000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00004000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000400, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000080, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000100, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
 typedef enum EventClass {
     EVENT_CLASS_INTERNAL_ERRORS     = 0,
     EVENT_CLASS_EPOW                = 1,
@@ -622,6 +722,175 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
                             RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
+static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
+                                        struct mc_extended_log *ext_elog)
+{
+    int i;
+    CPUPPCState *env = &cpu->env;
+    uint32_t summary;
+    uint64_t dsisr = env->spr[SPR_DSISR];
+
+    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
+    if (recovered) {
+        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
+    } else {
+        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
+    }
+
+    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
+        for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) {
+            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
+                continue;
+            }
+
+            ext_elog->mc.error_type = mc_derror_table[i].error_type;
+            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
+            if (mc_derror_table[i].dar_valid) {
+                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
+            }
+
+            summary |= mc_derror_table[i].initiator
+                        | mc_derror_table[i].severity;
+
+            return summary;
+        }
+    } else {
+        for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) {
+            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
+                    mc_ierror_table[i].srr1_value) {
+                continue;
+            }
+
+            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
+            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
+            if (mc_ierror_table[i].nip_valid) {
+                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
+            }
+
+            summary |= mc_ierror_table[i].initiator
+                        | mc_ierror_table[i].severity;
+
+            return summary;
+        }
+    }
+
+    summary |= RTAS_LOG_INITIATOR_CPU;
+    return summary;
+}
+
+static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    CPUState *cs = CPU(cpu);
+    uint64_t rtas_addr;
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    target_ulong msr = 0;
+    struct rtas_error_log log;
+    struct mc_extended_log *ext_elog;
+    uint32_t summary;
+
+    /*
+     * Properly set bits in MSR before we invoke the handler.
+     * SRR0/1, DAR and DSISR are properly set by KVM
+     */
+    if (!(*pcc->interrupts_big_endian)(cpu)) {
+        msr |= (1ULL << MSR_LE);
+    }
+
+    if (env->msr & (1ULL << MSR_SF)) {
+        msr |= (1ULL << MSR_SF);
+    }
+
+    msr |= (1ULL << MSR_ME);
+
+    ext_elog = g_malloc0(sizeof(*ext_elog));
+    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
+
+    log.summary = cpu_to_be32(summary);
+    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
+
+    spapr_init_v6hdr(&ext_elog->v6hdr);
+    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
+    ext_elog->mc.hdr.section_length =
+                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
+    ext_elog->mc.hdr.section_version = 1;
+
+    /* get rtas addr from fdt */
+    rtas_addr = spapr_get_rtas_addr();
+    if (!rtas_addr) {
+        /* Unable to fetch rtas_addr. Hence reset the guest */
+        ppc_cpu_do_system_reset(cs);
+        g_free(ext_elog);
+        return;
+    }
+
+    stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
+                env->gpr[3]);
+    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+                              sizeof(env->gpr[3]), &log, sizeof(log));
+    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+                              sizeof(env->gpr[3]) + sizeof(log), ext_elog,
+                              sizeof(*ext_elog));
+
+    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
+    env->msr = msr;
+    env->nip = spapr->guest_machine_check_addr;
+
+    g_free(ext_elog);
+}
+
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    CPUState *cs = CPU(cpu);
+    int ret;
+    Error *local_err = NULL;
+
+    if (spapr->guest_machine_check_addr == -1) {
+        /*
+         * This implies that we have hit a machine check either when the
+         * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+         * called) or between system reset and "ibm,nmi-register".
+         * Fall back to the old machine check behavior in such cases.
+         */
+        cs->exception_index = POWERPC_EXCP_MCHECK;
+        ppc_cpu_do_interrupt(cs);
+        return;
+    }
+
+    while (spapr->mc_status != -1) {
+        /*
+         * Check whether the same CPU got machine check error
+         * while still handling the mc error (i.e., before
+         * that CPU called "ibm,nmi-interlock")
+         */
+        if (spapr->mc_status == cpu->vcpu_id) {
+            qemu_system_guest_panicked(NULL);
+            return;
+        }
+        qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
+        /* Meanwhile if the system is reset, then just return */
+        if (spapr->guest_machine_check_addr == -1) {
+            return;
+        }
+    }
+
+    ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
+    if (ret == -EBUSY) {
+        /*
+         * We don't want to abort so we let the migration to continue.
+         * In a rare case, the machine check handler will run on the target.
+         * Though this is not preferable, it is better than aborting
+         * the migration or killing the VM.
+         */
+        warn_report("Received a fwnmi while migration was in progress");
+    }
+
+    spapr->mc_status = cpu->vcpu_id;
+    spapr_mce_dispatch_elog(cpu, recovered);
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
                             uint32_t token, uint32_t nargs,
                             target_ulong args,
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index f1799b1b70..b8bb66b5c0 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1676,6 +1676,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     Error *local_err = NULL;
     bool raw_mode_supported = false;
     bool guest_xive;
+    CPUState *cs;
+
+    /* CAS is supposed to be called early when only the boot vCPU is active. */
+    CPU_FOREACH(cs) {
+        if (cs == CPU(cpu)) {
+            continue;
+        }
+        if (!cs->halted) {
+            warn_report("guest has multiple active vCPUs at CAS, which is not allowed");
+            return H_MULTI_THREADS_ACTIVE;
+        }
+    }
 
     cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err);
     if (local_err) {
@@ -1703,7 +1715,15 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     ov_table = addr;
 
     ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
+    if (!ov1_guest) {
+        warn_report("guest didn't provide option vector 1");
+        return H_PARAMETER;
+    }
     ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+    if (!ov5_guest) {
+        warn_report("guest didn't provide option vector 5");
+        return H_PARAMETER;
+    }
     if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
         error_report("guest requested hash and radix MMU, which is invalid.");
         exit(EXIT_FAILURE);
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 85135e0e1a..883fe28465 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -50,6 +50,7 @@
 #include "hw/ppc/fdt.h"
 #include "target/ppc/mmu-hash64.h"
 #include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
 
 static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                    uint32_t token, uint32_t nargs,
@@ -399,6 +400,62 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
     rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+                                  SpaprMachineState *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args,
+                                  uint32_t nret, target_ulong rets)
+{
+    hwaddr rtas_addr;
+
+    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    rtas_addr = spapr_get_rtas_addr();
+    if (!rtas_addr) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    spapr->guest_machine_check_addr = rtas_ld(args, 1);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+                                   SpaprMachineState *spapr,
+                                   uint32_t token, uint32_t nargs,
+                                   target_ulong args,
+                                   uint32_t nret, target_ulong rets)
+{
+    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    if (spapr->guest_machine_check_addr == -1) {
+        /* NMI register not called */
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    if (spapr->mc_status != cpu->vcpu_id) {
+        /* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    /*
+     * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+     * hence unset mc_status.
+     */
+    spapr->mc_status = -1;
+    qemu_cond_signal(&spapr->mc_delivery_cond);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    migrate_del_blocker(spapr->fwnmi_migration_blocker);
+}
+
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
@@ -476,6 +533,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas)
     }
 }
 
+hwaddr spapr_get_rtas_addr(void)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    int rtas_node;
+    const fdt32_t *rtas_data;
+    void *fdt = spapr->fdt_blob;
+
+    /* fetch rtas addr from fdt */
+    rtas_node = fdt_path_offset(fdt, "/rtas");
+    if (rtas_node < 0) {
+        return 0;
+    }
+
+    rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL);
+    if (!rtas_data) {
+        return 0;
+    }
+
+    /*
+     * We assume that the OS called RTAS instantiate-rtas, but some other
+     * OS might call RTAS instantiate-rtas-64 instead. This fine as of now
+     * as SLOF only supports 32-bit variant.
+     */
+    return (hwaddr)fdt32_to_cpu(*rtas_data);
+}
+
 static void core_rtas_register_types(void)
 {
     spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
@@ -501,6 +584,10 @@ static void core_rtas_register_types(void)
                         rtas_set_power_level);
     spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
                         rtas_get_power_level);
+    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+                        rtas_ibm_nmi_register);
+    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+                        rtas_ibm_nmi_interlock);
 }
 
 type_init(core_rtas_register_types)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index f14944e900..0b085eabe4 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -87,6 +87,7 @@ static int vio_make_devnode(SpaprVioDevice *dev,
     SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
     int vdevice_off, node_off, ret;
     char *dt_name;
+    const char *dt_compatible;
 
     vdevice_off = fdt_path_offset(fdt, "/vdevice");
     if (vdevice_off < 0) {
@@ -113,9 +114,15 @@ static int vio_make_devnode(SpaprVioDevice *dev,
         }
     }
 
-    if (pc->dt_compatible) {
+    if (pc->get_dt_compatible) {
+        dt_compatible = pc->get_dt_compatible(dev);
+    } else {
+        dt_compatible = pc->dt_compatible;
+    }
+
+    if (dt_compatible) {
         ret = fdt_setprop_string(fdt, node_off, "compatible",
-                                 pc->dt_compatible);
+                                 dt_compatible);
         if (ret < 0) {
             return ret;
         }
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index 7526947ea7..91dd00ee91 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -89,10 +89,7 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env,
     tlb->PID = 0;
 }
 
-static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size,
-                                      int do_init,
-                                      const char *cpu_type,
-                                      uint32_t sysclk)
+static PowerPCCPU *ppc440_init_xilinx(const char *cpu_type, uint32_t sysclk)
 {
     PowerPCCPU *cpu;
     CPUPPCState *env;
@@ -213,7 +210,7 @@ static void virtex_init(MachineState *machine)
     int i;
 
     /* init CPUs */
-    cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_type, 400000000);
+    cpu = ppc440_init_xilinx(machine->cpu_type, 400000000);
     env = &cpu->env;
 
     if (env->mmu_model != POWERPC_MMU_BOOKE) {
diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
index 4c8ee87d67..9e67d990e8 100644
--- a/hw/tpm/Kconfig
+++ b/hw/tpm/Kconfig
@@ -22,3 +22,9 @@ config TPM_EMULATOR
     bool
     default y
     depends on TPMDEV
+
+config TPM_SPAPR
+    bool
+    default y
+    depends on TPM && PSERIES
+    select TPMDEV
diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
index de0b85d02a..85eb99ae05 100644
--- a/hw/tpm/Makefile.objs
+++ b/hw/tpm/Makefile.objs
@@ -4,3 +4,4 @@ common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o
 common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o
 common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
 common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o
+obj-$(CONFIG_TPM_SPAPR) += tpm_spapr.o
diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c
new file mode 100644
index 0000000000..ce65eb2e45
--- /dev/null
+++ b/hw/tpm/tpm_spapr.c
@@ -0,0 +1,429 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * PAPR Virtual TPM
+ *
+ * Copyright (c) 2015, 2017, 2019 IBM Corporation.
+ *
+ * Authors:
+ *    Stefan Berger <stefanb@linux.vnet.ibm.com>
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+
+#include "sysemu/tpm_backend.h"
+#include "tpm_int.h"
+#include "tpm_util.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "trace.h"
+
+#define DEBUG_SPAPR 0
+
+#define VIO_SPAPR_VTPM(obj) \
+     OBJECT_CHECK(SpaprTpmState, (obj), TYPE_TPM_SPAPR)
+
+typedef struct TpmCrq {
+    uint8_t valid;  /* 0x80: cmd; 0xc0: init crq */
+                    /* 0x81-0x83: CRQ message response */
+    uint8_t msg;    /* see below */
+    uint16_t len;   /* len of TPM request; len of TPM response */
+    uint32_t data;  /* rtce_dma_handle when sending TPM request */
+    uint64_t reserved;
+} TpmCrq;
+
+#define SPAPR_VTPM_VALID_INIT_CRQ_COMMAND  0xC0
+#define SPAPR_VTPM_VALID_COMMAND           0x80
+#define SPAPR_VTPM_MSG_RESULT              0x80
+
+/* msg types for valid = SPAPR_VTPM_VALID_INIT_CRQ */
+#define SPAPR_VTPM_INIT_CRQ_RESULT           0x1
+#define SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT  0x2
+
+/* msg types for valid = SPAPR_VTPM_VALID_CMD */
+#define SPAPR_VTPM_GET_VERSION               0x1
+#define SPAPR_VTPM_TPM_COMMAND               0x2
+#define SPAPR_VTPM_GET_RTCE_BUFFER_SIZE      0x3
+#define SPAPR_VTPM_PREPARE_TO_SUSPEND        0x4
+
+/* response error messages */
+#define SPAPR_VTPM_VTPM_ERROR                0xff
+
+/* error codes */
+#define SPAPR_VTPM_ERR_COPY_IN_FAILED        0x3
+#define SPAPR_VTPM_ERR_COPY_OUT_FAILED       0x4
+
+#define TPM_SPAPR_BUFFER_MAX                 4096
+
+typedef struct {
+    SpaprVioDevice vdev;
+
+    TpmCrq crq; /* track single TPM command */
+
+    uint8_t state;
+#define SPAPR_VTPM_STATE_NONE         0
+#define SPAPR_VTPM_STATE_EXECUTION    1
+#define SPAPR_VTPM_STATE_COMPLETION   2
+
+    unsigned char *buffer;
+
+    uint32_t numbytes; /* number of bytes to deliver on resume */
+
+    TPMBackendCmd cmd;
+
+    TPMBackend *be_driver;
+    TPMVersion be_tpm_version;
+
+    size_t be_buffer_size;
+} SpaprTpmState;
+
+/*
+ * Send a request to the TPM.
+ */
+static void tpm_spapr_tpm_send(SpaprTpmState *s)
+{
+    if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) {
+        tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
+    }
+
+    s->state = SPAPR_VTPM_STATE_EXECUTION;
+    s->cmd = (TPMBackendCmd) {
+        .locty = 0,
+        .in = s->buffer,
+        .in_len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size),
+        .out = s->buffer,
+        .out_len = s->be_buffer_size,
+    };
+
+    tpm_backend_deliver_request(s->be_driver, &s->cmd);
+}
+
+static int tpm_spapr_process_cmd(SpaprTpmState *s, uint64_t dataptr)
+{
+    long rc;
+
+    /* a max. of be_buffer_size bytes can be transported */
+    rc = spapr_vio_dma_read(&s->vdev, dataptr,
+                            s->buffer, s->be_buffer_size);
+    if (rc) {
+        error_report("tpm_spapr_got_payload: DMA read failure");
+    }
+    /* let vTPM handle any malformed request */
+    tpm_spapr_tpm_send(s);
+
+    return rc;
+}
+
+static inline int spapr_tpm_send_crq(struct SpaprVioDevice *dev, TpmCrq *crq)
+{
+    return spapr_vio_send_crq(dev, (uint8_t *)crq);
+}
+
+static int tpm_spapr_do_crq(struct SpaprVioDevice *dev, uint8_t *crq_data)
+{
+    SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+    TpmCrq local_crq;
+    TpmCrq *crq = &s->crq; /* requests only */
+    int rc;
+    uint8_t valid = crq_data[0];
+    uint8_t msg = crq_data[1];
+
+    trace_tpm_spapr_do_crq(valid, msg);
+
+    switch (valid) {
+    case SPAPR_VTPM_VALID_INIT_CRQ_COMMAND: /* Init command/response */
+
+        /* Respond to initialization request */
+        switch (msg) {
+        case SPAPR_VTPM_INIT_CRQ_RESULT:
+            trace_tpm_spapr_do_crq_crq_result();
+            memset(&local_crq, 0, sizeof(local_crq));
+            local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND;
+            local_crq.msg = SPAPR_VTPM_INIT_CRQ_RESULT;
+            spapr_tpm_send_crq(dev, &local_crq);
+            break;
+
+        case SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT:
+            trace_tpm_spapr_do_crq_crq_complete_result();
+            memset(&local_crq, 0, sizeof(local_crq));
+            local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND;
+            local_crq.msg = SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT;
+            spapr_tpm_send_crq(dev, &local_crq);
+            break;
+        }
+
+        break;
+    case SPAPR_VTPM_VALID_COMMAND: /* Payloads */
+        switch (msg) {
+        case SPAPR_VTPM_TPM_COMMAND:
+            trace_tpm_spapr_do_crq_tpm_command();
+            if (s->state == SPAPR_VTPM_STATE_EXECUTION) {
+                return H_BUSY;
+            }
+            memcpy(crq, crq_data, sizeof(*crq));
+
+            rc = tpm_spapr_process_cmd(s, be32_to_cpu(crq->data));
+
+            if (rc == H_SUCCESS) {
+                crq->valid = be16_to_cpu(0);
+            } else {
+                local_crq.valid = SPAPR_VTPM_MSG_RESULT;
+                local_crq.msg = SPAPR_VTPM_VTPM_ERROR;
+                local_crq.len = cpu_to_be16(0);
+                local_crq.data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_IN_FAILED);
+                spapr_tpm_send_crq(dev, &local_crq);
+            }
+            break;
+
+        case SPAPR_VTPM_GET_RTCE_BUFFER_SIZE:
+            trace_tpm_spapr_do_crq_tpm_get_rtce_buffer_size(s->be_buffer_size);
+            local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+            local_crq.msg = SPAPR_VTPM_GET_RTCE_BUFFER_SIZE |
+                            SPAPR_VTPM_MSG_RESULT;
+            local_crq.len = cpu_to_be16(s->be_buffer_size);
+            spapr_tpm_send_crq(dev, &local_crq);
+            break;
+
+        case SPAPR_VTPM_GET_VERSION:
+            local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+            local_crq.msg = SPAPR_VTPM_GET_VERSION | SPAPR_VTPM_MSG_RESULT;
+            local_crq.len = cpu_to_be16(0);
+            switch (s->be_tpm_version) {
+            case TPM_VERSION_1_2:
+                local_crq.data = cpu_to_be32(1);
+                break;
+            case TPM_VERSION_2_0:
+                local_crq.data = cpu_to_be32(2);
+                break;
+            default:
+                g_assert_not_reached();
+                break;
+            }
+            trace_tpm_spapr_do_crq_get_version(be32_to_cpu(local_crq.data));
+            spapr_tpm_send_crq(dev, &local_crq);
+            break;
+
+        case SPAPR_VTPM_PREPARE_TO_SUSPEND:
+            trace_tpm_spapr_do_crq_prepare_to_suspend();
+            local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+            local_crq.msg = SPAPR_VTPM_PREPARE_TO_SUSPEND |
+                            SPAPR_VTPM_MSG_RESULT;
+            spapr_tpm_send_crq(dev, &local_crq);
+            break;
+
+        default:
+            trace_tpm_spapr_do_crq_unknown_msg_type(crq->msg);
+        }
+        break;
+    default:
+        trace_tpm_spapr_do_crq_unknown_crq(valid, msg);
+    };
+
+    return H_SUCCESS;
+}
+
+static void tpm_spapr_request_completed(TPMIf *ti, int ret)
+{
+    SpaprTpmState *s = VIO_SPAPR_VTPM(ti);
+    TpmCrq *crq = &s->crq;
+    uint32_t len;
+    int rc;
+
+    s->state = SPAPR_VTPM_STATE_COMPLETION;
+
+    /* a max. of be_buffer_size bytes can be transported */
+    len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size);
+
+    if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+        trace_tpm_spapr_caught_response(len);
+        /* defer delivery of response until .post_load */
+        s->numbytes = len;
+        return;
+    }
+
+    rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data),
+                             s->buffer, len);
+
+    if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) {
+        tpm_util_show_buffer(s->buffer, len, "From TPM");
+    }
+
+    crq->valid = SPAPR_VTPM_MSG_RESULT;
+    if (rc == H_SUCCESS) {
+        crq->msg = SPAPR_VTPM_TPM_COMMAND | SPAPR_VTPM_MSG_RESULT;
+        crq->len = cpu_to_be16(len);
+    } else {
+        error_report("%s: DMA write failure", __func__);
+        crq->msg = SPAPR_VTPM_VTPM_ERROR;
+        crq->len = cpu_to_be16(0);
+        crq->data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_OUT_FAILED);
+    }
+
+    rc = spapr_tpm_send_crq(&s->vdev, crq);
+    if (rc) {
+        error_report("%s: Error sending response", __func__);
+    }
+}
+
+static int tpm_spapr_do_startup_tpm(SpaprTpmState *s, size_t buffersize)
+{
+    return tpm_backend_startup_tpm(s->be_driver, buffersize);
+}
+
+static const char *tpm_spapr_get_dt_compatible(SpaprVioDevice *dev)
+{
+    SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+    switch (s->be_tpm_version) {
+    case TPM_VERSION_1_2:
+        return "IBM,vtpm";
+    case TPM_VERSION_2_0:
+        return "IBM,vtpm20";
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tpm_spapr_reset(SpaprVioDevice *dev)
+{
+    SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+    s->state = SPAPR_VTPM_STATE_NONE;
+    s->numbytes = 0;
+
+    s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver);
+
+    s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
+                            TPM_SPAPR_BUFFER_MAX);
+
+    tpm_backend_reset(s->be_driver);
+    tpm_spapr_do_startup_tpm(s, s->be_buffer_size);
+}
+
+static enum TPMVersion tpm_spapr_get_version(TPMIf *ti)
+{
+    SpaprTpmState *s = VIO_SPAPR_VTPM(ti);
+
+    if (tpm_backend_had_startup_error(s->be_driver)) {
+        return TPM_VERSION_UNSPEC;
+    }
+
+    return tpm_backend_get_tpm_version(s->be_driver);
+}
+
+/* persistent state handling */
+
+static int tpm_spapr_pre_save(void *opaque)
+{
+    SpaprTpmState *s = opaque;
+
+    tpm_backend_finish_sync(s->be_driver);
+    /*
+     * we cannot deliver the results to the VM since DMA would touch VM memory
+     */
+
+    return 0;
+}
+
+static int tpm_spapr_post_load(void *opaque, int version_id)
+{
+    SpaprTpmState *s = opaque;
+
+    if (s->numbytes) {
+        trace_tpm_spapr_post_load();
+        /* deliver the results to the VM via DMA */
+        tpm_spapr_request_completed(TPM_IF(s), 0);
+        s->numbytes = 0;
+    }
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_spapr_vtpm = {
+    .name = "tpm-spapr",
+    .pre_save = tpm_spapr_pre_save,
+    .post_load = tpm_spapr_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_SPAPR_VIO(vdev, SpaprTpmState),
+
+        VMSTATE_UINT8(state, SpaprTpmState),
+        VMSTATE_UINT32(numbytes, SpaprTpmState),
+        VMSTATE_VBUFFER_UINT32(buffer, SpaprTpmState, 0, NULL, numbytes),
+        /* remember DMA address */
+        VMSTATE_UINT32(crq.data, SpaprTpmState),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property tpm_spapr_properties[] = {
+    DEFINE_SPAPR_PROPERTIES(SpaprTpmState, vdev),
+    DEFINE_PROP_TPMBE("tpmdev", SpaprTpmState, be_driver),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void tpm_spapr_realizefn(SpaprVioDevice *dev, Error **errp)
+{
+    SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+    if (!tpm_find()) {
+        error_setg(errp, "at most one TPM device is permitted");
+        return;
+    }
+
+    dev->crq.SendFunc = tpm_spapr_do_crq;
+
+    if (!s->be_driver) {
+        error_setg(errp, "'tpmdev' property is required");
+        return;
+    }
+    s->buffer = g_malloc(TPM_SPAPR_BUFFER_MAX);
+}
+
+static void tpm_spapr_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SpaprVioDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass);
+    TPMIfClass *tc = TPM_IF_CLASS(klass);
+
+    k->realize = tpm_spapr_realizefn;
+    k->reset = tpm_spapr_reset;
+    k->dt_name = "vtpm";
+    k->dt_type = "IBM,vtpm";
+    k->get_dt_compatible = tpm_spapr_get_dt_compatible;
+    k->signal_mask = 0x00000001;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    device_class_set_props(dc, tpm_spapr_properties);
+    k->rtce_window_size = 0x10000000;
+    dc->vmsd = &vmstate_spapr_vtpm;
+
+    tc->model = TPM_MODEL_TPM_SPAPR;
+    tc->get_version = tpm_spapr_get_version;
+    tc->request_completed = tpm_spapr_request_completed;
+}
+
+static const TypeInfo tpm_spapr_info = {
+    .name          = TYPE_TPM_SPAPR,
+    .parent        = TYPE_VIO_SPAPR_DEVICE,
+    .instance_size = sizeof(SpaprTpmState),
+    .class_init    = tpm_spapr_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_TPM_IF },
+        { }
+    }
+};
+
+static void tpm_spapr_register_types(void)
+{
+    type_register_static(&tpm_spapr_info);
+}
+
+type_init(tpm_spapr_register_types)
diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
index 5362df2711..31facb896d 100644
--- a/hw/tpm/tpm_tis.c
+++ b/hw/tpm/tpm_tis.c
@@ -107,30 +107,6 @@ static uint8_t tpm_tis_locality_from_addr(hwaddr addr)
     return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7);
 }
 
-static void tpm_tis_show_buffer(const unsigned char *buffer,
-                                size_t buffer_size, const char *string)
-{
-    size_t len, i;
-    char *line_buffer, *p;
-
-    len = MIN(tpm_cmd_get_size(buffer), buffer_size);
-
-    /*
-     * allocate enough room for 3 chars per buffer entry plus a
-     * newline after every 16 chars and a final null terminator.
-     */
-    line_buffer = g_malloc(len * 3 + (len / 16) + 1);
-
-    for (i = 0, p = line_buffer; i < len; i++) {
-        if (i && !(i % 16)) {
-            p += sprintf(p, "\n");
-        }
-        p += sprintf(p, "%.2X ", buffer[i]);
-    }
-    trace_tpm_tis_show_buffer(string, len, line_buffer);
-
-    g_free(line_buffer);
-}
 
 /*
  * Set the given flags in the STS register by clearing the register but
@@ -156,8 +132,8 @@ static void tpm_tis_sts_set(TPMLocality *l, uint32_t flags)
  */
 static void tpm_tis_tpm_send(TPMState *s, uint8_t locty)
 {
-    if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) {
-        tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
+    if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) {
+        tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
     }
 
     /*
@@ -325,8 +301,8 @@ static void tpm_tis_request_completed(TPMIf *ti, int ret)
     s->loc[locty].state = TPM_TIS_STATE_COMPLETION;
     s->rw_offset = 0;
 
-    if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) {
-        tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "From TPM");
+    if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) {
+        tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM");
     }
 
     if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) {
diff --git a/hw/tpm/tpm_util.c b/hw/tpm/tpm_util.c
index 62b091f0c0..c0a0f3d71f 100644
--- a/hw/tpm/tpm_util.c
+++ b/hw/tpm/tpm_util.c
@@ -350,3 +350,28 @@ void tpm_sized_buffer_reset(TPMSizedBuffer *tsb)
     tsb->buffer = NULL;
     tsb->size = 0;
 }
+
+void tpm_util_show_buffer(const unsigned char *buffer,
+                          size_t buffer_size, const char *string)
+{
+    size_t len, i;
+    char *line_buffer, *p;
+
+    len = MIN(tpm_cmd_get_size(buffer), buffer_size);
+
+    /*
+     * allocate enough room for 3 chars per buffer entry plus a
+     * newline after every 16 chars and a final null terminator.
+     */
+    line_buffer = g_malloc(len * 3 + (len / 16) + 1);
+
+    for (i = 0, p = line_buffer; i < len; i++) {
+        if (i && !(i % 16)) {
+            p += sprintf(p, "\n");
+        }
+        p += sprintf(p, "%.2X ", buffer[i]);
+    }
+    trace_tpm_util_show_buffer(string, len, line_buffer);
+
+    g_free(line_buffer);
+}
diff --git a/hw/tpm/tpm_util.h b/hw/tpm/tpm_util.h
index f397ac21b8..7889081fba 100644
--- a/hw/tpm/tpm_util.h
+++ b/hw/tpm/tpm_util.h
@@ -79,4 +79,7 @@ typedef struct TPMSizedBuffer {
 
 void tpm_sized_buffer_reset(TPMSizedBuffer *tsb);
 
+void tpm_util_show_buffer(const unsigned char *buffer,
+                          size_t buffer_size, const char *string);
+
 #endif /* TPM_TPM_UTIL_H */
diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
index 89804bcd64..439e514787 100644
--- a/hw/tpm/trace-events
+++ b/hw/tpm/trace-events
@@ -14,6 +14,7 @@ tpm_util_get_buffer_size_len(uint32_t len, size_t expected) "tpm_resp->len = %u,
 tpm_util_get_buffer_size_hdr_len2(uint32_t len, size_t expected) "tpm2_resp->hdr.len = %u, expected = %zu"
 tpm_util_get_buffer_size_len2(uint32_t len, size_t expected) "tpm2_resp->len = %u, expected = %zu"
 tpm_util_get_buffer_size(size_t len) "buffersize of device: %zu"
+tpm_util_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s"
 
 # tpm_emulator.c
 tpm_emulator_set_locality(uint8_t locty) "setting locality to %d"
@@ -36,7 +37,6 @@ tpm_emulator_pre_save(void) ""
 tpm_emulator_inst_init(void) ""
 
 # tpm_tis.c
-tpm_tis_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\nbuf: %s"
 tpm_tis_raise_irq(uint32_t irqmask) "Raising IRQ for flag 0x%08x"
 tpm_tis_new_active_locality(uint8_t locty) "Active locality is now %d"
 tpm_tis_abort(uint8_t locty) "New active locality is %d"
@@ -55,3 +55,17 @@ tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u"
 
 # tpm_ppi.c
 tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
+
+# hw/tpm/tpm_spapr.c
+tpm_spapr_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s"
+tpm_spapr_do_crq(uint8_t raw1, uint8_t raw2) "1st 2 bytes in CRQ: 0x%02x 0x%02x"
+tpm_spapr_do_crq_crq_result(void) "SPAPR_VTPM_INIT_CRQ_RESULT"
+tpm_spapr_do_crq_crq_complete_result(void) "SPAPR_VTPM_INIT_CRQ_COMP_RESULT"
+tpm_spapr_do_crq_tpm_command(void) "got TPM command payload"
+tpm_spapr_do_crq_tpm_get_rtce_buffer_size(size_t buffersize) "response: buffer size is %zu"
+tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u"
+tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend"
+tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x"
+tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..."
+tpm_spapr_post_load(void) "Delivering TPM response after resume"
+tpm_spapr_caught_response(uint32_t v) "Caught response to deliver after resume: %u bytes"