summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/ide/macio.c71
-rw-r--r--hw/intc/openpic.c99
-rw-r--r--hw/intc/openpic_kvm.c15
-rw-r--r--hw/misc/macio/mac_dbdma.c6
-rw-r--r--hw/misc/macio/macio.c2
-rw-r--r--hw/net/fsl_etsec/rings.c2
-rw-r--r--hw/nvram/spapr_nvram.c2
-rw-r--r--hw/pci-host/ppce500.c41
-rw-r--r--hw/ppc/e500.c147
-rw-r--r--hw/ppc/mac_newworld.c5
-rw-r--r--hw/ppc/mac_oldworld.c5
-rw-r--r--hw/ppc/ppc.c79
-rw-r--r--hw/ppc/spapr.c234
-rw-r--r--hw/ppc/spapr_hcall.c233
-rw-r--r--hw/ppc/spapr_iommu.c181
-rw-r--r--hw/ppc/spapr_pci.c129
-rw-r--r--hw/ppc/spapr_vio.c6
17 files changed, 994 insertions, 263 deletions
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 1c20616f5a..c14a1ddddb 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -193,6 +193,11 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
         goto done;
     }
 
+    if (--io->requests) {
+        /* More requests still in flight */
+        return;
+    }
+
     if (!m->dma_active) {
         MACIO_DPRINTF("waiting for data (%#x - %#x - %x)\n",
                       s->nsector, io->len, s->status);
@@ -212,6 +217,13 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
         s->nsector -= n;
     }
 
+    if (io->finish_remain_read) {
+        /* Finish a stale read from the last iteration */
+        io->finish_remain_read = false;
+        cpu_physical_memory_write(io->finish_addr, io->remainder,
+                                  io->finish_len);
+    }
+
     MACIO_DPRINTF("remainder: %d io->len: %d nsector: %d "
                   "sector_num: %" PRId64 "\n",
                   io->remainder_len, io->len, s->nsector, sector_num);
@@ -229,7 +241,6 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
             break;
         case IDE_DMA_WRITE:
             cpu_physical_memory_read(io->addr, p, remainder_len);
-            bdrv_write(s->bs, sector_num - 1, io->remainder, 1);
             break;
         case IDE_DMA_TRIM:
             break;
@@ -237,6 +248,15 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
         io->addr += remainder_len;
         io->len -= remainder_len;
         io->remainder_len -= remainder_len;
+
+        if (s->dma_cmd == IDE_DMA_WRITE && !io->remainder_len) {
+            io->requests++;
+            qemu_iovec_reset(&io->iov);
+            qemu_iovec_add(&io->iov, io->remainder, 0x200);
+
+            m->aiocb = bdrv_aio_writev(s->bs, sector_num - 1, &io->iov, 1,
+                                       pmac_ide_transfer_cb, io);
+        }
     }
 
     if (s->nsector == 0 && !io->remainder_len) {
@@ -267,20 +287,25 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
 
         switch (s->dma_cmd) {
         case IDE_DMA_READ:
-            bdrv_read(s->bs, sector_num + nsector, io->remainder, 1);
-            cpu_physical_memory_write(io->addr + io->len - unaligned,
-                                      io->remainder, unaligned);
+            io->requests++;
+            io->finish_addr = io->addr + io->len - unaligned;
+            io->finish_len = unaligned;
+            io->finish_remain_read = true;
+            qemu_iovec_reset(&io->iov);
+            qemu_iovec_add(&io->iov, io->remainder, 0x200);
+
+            m->aiocb = bdrv_aio_readv(s->bs, sector_num + nsector, &io->iov, 1,
+                                      pmac_ide_transfer_cb, io);
             break;
         case IDE_DMA_WRITE:
             /* cache the contents in our io struct */
             cpu_physical_memory_read(io->addr + io->len - unaligned,
-                                     io->remainder, unaligned);
+                                     io->remainder + io->remainder_len,
+                                     unaligned);
             break;
         case IDE_DMA_TRIM:
             break;
         }
-
-        io->len -= unaligned;
     }
 
     MACIO_DPRINTF("io->len = %#x\n", io->len);
@@ -292,10 +317,12 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
     io->remainder_len = (0x200 - unaligned) & 0x1ff;
     MACIO_DPRINTF("set remainder to: %d\n", io->remainder_len);
 
-    /* We would read no data from the block layer, thus not get a callback.
-       Just fake completion manually. */
+    /* Only subsector reads happening */
     if (!io->len) {
-        pmac_ide_transfer_cb(opaque, 0);
+        if (!io->requests) {
+            io->requests++;
+            pmac_ide_transfer_cb(opaque, ret);
+        }
         return;
     }
 
@@ -319,6 +346,8 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
                                DMA_DIRECTION_TO_DEVICE);
         break;
     }
+
+    io->requests++;
     return;
 
 done:
@@ -337,6 +366,27 @@ static void pmac_ide_transfer(DBDMA_io *io)
 
     s->io_buffer_size = 0;
     if (s->drive_kind == IDE_CD) {
+
+        /* Handle non-block ATAPI DMA transfers */
+        if (s->lba == -1) {
+            s->io_buffer_size = MIN(io->len, s->packet_transfer_size);
+            bdrv_acct_start(s->bs, &s->acct, s->io_buffer_size,
+                            BDRV_ACCT_READ);
+            MACIO_DPRINTF("non-block ATAPI DMA transfer size: %d\n",
+                          s->io_buffer_size);
+
+            /* Copy ATAPI buffer directly to RAM and finish */
+            cpu_physical_memory_write(io->addr, s->io_buffer,
+                                      s->io_buffer_size);
+            ide_atapi_cmd_ok(s);
+            m->dma_active = false;
+
+            MACIO_DPRINTF("end of non-block ATAPI DMA transfer\n");
+            bdrv_acct_done(s->bs, &s->acct);
+            io->dma_end(io);
+            return;
+        }
+
         bdrv_acct_start(s->bs, &s->acct, io->len, BDRV_ACCT_READ);
         pmac_ide_atapi_transfer_cb(io, 0);
         return;
@@ -353,6 +403,7 @@ static void pmac_ide_transfer(DBDMA_io *io)
         break;
     }
 
+    io->requests++;
     pmac_ide_transfer_cb(io, 0);
 }
 
diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 08e0e19c59..028529e13d 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -192,6 +192,7 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
                                           int idx);
 static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
                                        uint32_t val, int idx);
+static void openpic_reset(DeviceState *d);
 
 typedef enum IRQType {
     IRQ_TYPE_NORMAL = 0,
@@ -529,55 +530,6 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
     }
 }
 
-static void openpic_reset(DeviceState *d)
-{
-    OpenPICState *opp = OPENPIC(d);
-    int i;
-
-    opp->gcr = GCR_RESET;
-    /* Initialise controller registers */
-    opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
-               ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) |
-               (opp->vid << FRR_VID_SHIFT);
-
-    opp->pir = 0;
-    opp->spve = -1 & opp->vector_mask;
-    opp->tfrr = opp->tfrr_reset;
-    /* Initialise IRQ sources */
-    for (i = 0; i < opp->max_irq; i++) {
-        opp->src[i].ivpr = opp->ivpr_reset;
-        opp->src[i].idr  = opp->idr_reset;
-
-        switch (opp->src[i].type) {
-        case IRQ_TYPE_NORMAL:
-            opp->src[i].level = !!(opp->ivpr_reset & IVPR_SENSE_MASK);
-            break;
-
-        case IRQ_TYPE_FSLINT:
-            opp->src[i].ivpr |= IVPR_POLARITY_MASK;
-            break;
-
-        case IRQ_TYPE_FSLSPECIAL:
-            break;
-        }
-    }
-    /* Initialise IRQ destinations */
-    for (i = 0; i < MAX_CPU; i++) {
-        opp->dst[i].ctpr      = 15;
-        memset(&opp->dst[i].raised, 0, sizeof(IRQQueue));
-        opp->dst[i].raised.next = -1;
-        memset(&opp->dst[i].servicing, 0, sizeof(IRQQueue));
-        opp->dst[i].servicing.next = -1;
-    }
-    /* Initialise timers */
-    for (i = 0; i < OPENPIC_MAX_TMR; i++) {
-        opp->timers[i].tccr = 0;
-        opp->timers[i].tbcr = TBCR_CI;
-    }
-    /* Go out of RESET state */
-    opp->gcr = 0;
-}
-
 static inline uint32_t read_IRQreg_idr(OpenPICState *opp, int n_IRQ)
 {
     return opp->src[n_IRQ].idr;
@@ -1461,6 +1413,55 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
     return 0;
 }
 
+static void openpic_reset(DeviceState *d)
+{
+    OpenPICState *opp = OPENPIC(d);
+    int i;
+
+    opp->gcr = GCR_RESET;
+    /* Initialise controller registers */
+    opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
+               ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) |
+               (opp->vid << FRR_VID_SHIFT);
+
+    opp->pir = 0;
+    opp->spve = -1 & opp->vector_mask;
+    opp->tfrr = opp->tfrr_reset;
+    /* Initialise IRQ sources */
+    for (i = 0; i < opp->max_irq; i++) {
+        opp->src[i].ivpr = opp->ivpr_reset;
+        switch (opp->src[i].type) {
+        case IRQ_TYPE_NORMAL:
+            opp->src[i].level = !!(opp->ivpr_reset & IVPR_SENSE_MASK);
+            break;
+
+        case IRQ_TYPE_FSLINT:
+            opp->src[i].ivpr |= IVPR_POLARITY_MASK;
+            break;
+
+        case IRQ_TYPE_FSLSPECIAL:
+            break;
+        }
+
+        write_IRQreg_idr(opp, i, opp->idr_reset);
+    }
+    /* Initialise IRQ destinations */
+    for (i = 0; i < MAX_CPU; i++) {
+        opp->dst[i].ctpr      = 15;
+        memset(&opp->dst[i].raised, 0, sizeof(IRQQueue));
+        opp->dst[i].raised.next = -1;
+        memset(&opp->dst[i].servicing, 0, sizeof(IRQQueue));
+        opp->dst[i].servicing.next = -1;
+    }
+    /* Initialise timers */
+    for (i = 0; i < OPENPIC_MAX_TMR; i++) {
+        opp->timers[i].tccr = 0;
+        opp->timers[i].tbcr = TBCR_CI;
+    }
+    /* Go out of RESET state */
+    opp->gcr = 0;
+}
+
 typedef struct MemReg {
     const char             *name;
     MemoryRegionOps const  *ops;
diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c
index 585ab4ff20..e3bce043a3 100644
--- a/hw/intc/openpic_kvm.c
+++ b/hw/intc/openpic_kvm.c
@@ -31,6 +31,8 @@
 #include "sysemu/kvm.h"
 #include "qemu/log.h"
 
+#define GCR_RESET        0x80000000
+
 #define KVM_OPENPIC(obj) \
     OBJECT_CHECK(KVMOpenPICState, (obj), TYPE_KVM_OPENPIC)
 
@@ -50,11 +52,6 @@ static void kvm_openpic_set_irq(void *opaque, int n_IRQ, int level)
     kvm_set_irq(kvm_state, n_IRQ, level);
 }
 
-static void kvm_openpic_reset(DeviceState *d)
-{
-    qemu_log_mask(LOG_UNIMP, "%s: unimplemented\n", __func__);
-}
-
 static void kvm_openpic_write(void *opaque, hwaddr addr, uint64_t val,
                               unsigned size)
 {
@@ -74,6 +71,14 @@ static void kvm_openpic_write(void *opaque, hwaddr addr, uint64_t val,
     }
 }
 
+static void kvm_openpic_reset(DeviceState *d)
+{
+    KVMOpenPICState *opp = KVM_OPENPIC(d);
+
+    /* Trigger the GCR.RESET bit to reset the PIC */
+    kvm_openpic_write(opp, 0x1020, GCR_RESET, sizeof(uint32_t));
+}
+
 static uint64_t kvm_openpic_read(void *opaque, hwaddr addr, unsigned size)
 {
     KVMOpenPICState *opp = opaque;
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
index 3335476c29..b25e8511b2 100644
--- a/hw/misc/macio/mac_dbdma.c
+++ b/hw/misc/macio/mac_dbdma.c
@@ -748,9 +748,15 @@ static void dbdma_reset(void *opaque)
 void* DBDMA_init (MemoryRegion **dbdma_mem)
 {
     DBDMAState *s;
+    int i;
 
     s = g_malloc0(sizeof(DBDMAState));
 
+    for (i = 0; i < DBDMA_CHANNELS; i++) {
+        DBDMA_io *io = &s->channels[i].io;
+        qemu_iovec_init(&io->iov, 1);
+    }
+
     memory_region_init_io(&s->mem, NULL, &dbdma_ops, s, "dbdma", 0x1000);
     *dbdma_mem = &s->mem;
     vmstate_register(NULL, -1, &vmstate_dbdma, s);
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index 7f99aa0d5c..47f45f5d2c 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -259,7 +259,7 @@ static uint64_t timer_read(void *opaque, hwaddr addr, unsigned size)
 static const MemoryRegionOps timer_ops = {
     .read = timer_read,
     .write = timer_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static int macio_newworld_initfn(PCIDevice *d)
diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
index e36cfbe76d..d4a494f6a3 100644
--- a/hw/net/fsl_etsec/rings.c
+++ b/hw/net/fsl_etsec/rings.c
@@ -159,7 +159,7 @@ static void ievent_set(eTSEC    *etsec,
 
     if ((flags & IEVENT_RXB && etsec->regs[IMASK].value & IMASK_RXBEN)
         || (flags & IEVENT_RXF && etsec->regs[IMASK].value & IMASK_RXFEN)) {
-        qemu_irq_pulse(etsec->rx_irq);
+        qemu_irq_raise(etsec->rx_irq);
         RING_DEBUG("%s Raise Rx IRQ\n", __func__);
     }
 }
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 635713e766..af49c4667d 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -42,7 +42,7 @@ typedef struct sPAPRNVRAM {
 
 #define MIN_NVRAM_SIZE 8192
 #define DEFAULT_NVRAM_SIZE 65536
-#define MAX_NVRAM_SIZE (UINT16_MAX * 16)
+#define MAX_NVRAM_SIZE 1048576
 
 static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                              uint32_t token, uint32_t nargs,
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index e12d731ce8..1b4c0f0023 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -87,8 +87,10 @@ struct PPCE500PCIState {
     struct pci_outbound pob[PPCE500_PCI_NR_POBS];
     struct pci_inbound pib[PPCE500_PCI_NR_PIBS];
     uint32_t gasket_time;
-    qemu_irq irq[4];
+    qemu_irq irq[PCI_NUM_PINS];
+    uint32_t irq_num[PCI_NUM_PINS];
     uint32_t first_slot;
+    uint32_t first_pin_irq;
     /* mmio maps */
     MemoryRegion container;
     MemoryRegion iomem;
@@ -252,26 +254,39 @@ static const MemoryRegionOps e500_pci_reg_ops = {
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
-static int mpc85xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
+static int mpc85xx_pci_map_irq(PCIDevice *pci_dev, int pin)
 {
     int devno = pci_dev->devfn >> 3;
     int ret;
 
-    ret = ppce500_pci_map_irq_slot(devno, irq_num);
+    ret = ppce500_pci_map_irq_slot(devno, pin);
 
     pci_debug("%s: devfn %x irq %d -> %d  devno:%x\n", __func__,
-           pci_dev->devfn, irq_num, ret, devno);
+           pci_dev->devfn, pin, ret, devno);
 
     return ret;
 }
 
-static void mpc85xx_pci_set_irq(void *opaque, int irq_num, int level)
+static void mpc85xx_pci_set_irq(void *opaque, int pin, int level)
 {
-    qemu_irq *pic = opaque;
+    PPCE500PCIState *s = opaque;
+    qemu_irq *pic = s->irq;
 
-    pci_debug("%s: PCI irq %d, level:%d\n", __func__, irq_num, level);
+    pci_debug("%s: PCI irq %d, level:%d\n", __func__, pin , level);
 
-    qemu_set_irq(pic[irq_num], level);
+    qemu_set_irq(pic[pin], level);
+}
+
+static PCIINTxRoute e500_route_intx_pin_to_irq(void *opaque, int pin)
+{
+    PCIINTxRoute route;
+    PPCE500PCIState *s = opaque;
+
+    route.mode = PCI_INTX_ENABLED;
+    route.irq = s->irq_num[pin];
+
+    pci_debug("%s: PCI irq-pin = %d, irq_num= %d\n", __func__, pin, route.irq);
+    return route;
 }
 
 static const VMStateDescription vmstate_pci_outbound = {
@@ -308,7 +323,7 @@ static const VMStateDescription vmstate_ppce500_pci = {
         VMSTATE_STRUCT_ARRAY(pob, PPCE500PCIState, PPCE500_PCI_NR_POBS, 1,
                              vmstate_pci_outbound, struct pci_outbound),
         VMSTATE_STRUCT_ARRAY(pib, PPCE500PCIState, PPCE500_PCI_NR_PIBS, 1,
-                             vmstate_pci_outbound, struct pci_inbound),
+                             vmstate_pci_inbound, struct pci_inbound),
         VMSTATE_UINT32(gasket_time, PPCE500PCIState),
         VMSTATE_END_OF_LIST()
     }
@@ -349,10 +364,14 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
         sysbus_init_irq(dev, &s->irq[i]);
     }
 
+    for (i = 0; i < PCI_NUM_PINS; i++) {
+        s->irq_num[i] = s->first_pin_irq + i;
+    }
+
     memory_region_init(&s->pio, OBJECT(s), "pci-pio", PCIE500_PCI_IOLEN);
 
     b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq,
-                         mpc85xx_pci_map_irq, s->irq, address_space_mem,
+                         mpc85xx_pci_map_irq, s, address_space_mem,
                          &s->pio, PCI_DEVFN(s->first_slot, 0), 4, TYPE_PCI_BUS);
     h->bus = b;
 
@@ -370,6 +389,7 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
     memory_region_add_subregion(&s->container, PCIE500_REG_BASE, &s->iomem);
     sysbus_init_mmio(dev, &s->container);
     sysbus_init_mmio(dev, &s->pio);
+    pci_bus_set_route_irq_fn(b, e500_route_intx_pin_to_irq);
 
     return 0;
 }
@@ -400,6 +420,7 @@ static const TypeInfo e500_host_bridge_info = {
 
 static Property pcihost_properties[] = {
     DEFINE_PROP_UINT32("first_slot", PPCE500PCIState, first_slot, 0x11),
+    DEFINE_PROP_UINT32("first_pin_irq", PPCE500PCIState, first_pin_irq, 0x1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 223bab9eea..a973c18d88 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -39,7 +39,6 @@
 
 #define EPAPR_MAGIC                (0x45504150)
 #define BINARY_DEVICE_TREE_FILE    "mpc8544ds.dtb"
-#define UIMAGE_LOAD_BASE           0
 #define DTC_LOAD_PAD               0x1800000
 #define DTC_PAD_MASK               0xFFFFF
 #define DTB_MAX_SIZE               (8 * 1024 * 1024)
@@ -128,6 +127,8 @@ static int ppce500_load_device_tree(MachineState *machine,
                                     hwaddr addr,
                                     hwaddr initrd_base,
                                     hwaddr initrd_size,
+                                    hwaddr kernel_base,
+                                    hwaddr kernel_size,
                                     bool dry_run)
 {
     CPUPPCState *env = first_cpu->env_ptr;
@@ -204,6 +205,13 @@ static int ppce500_load_device_tree(MachineState *machine,
         if (ret < 0) {
             fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n");
         }
+
+    }
+
+    if (kernel_base != -1ULL) {
+        qemu_fdt_setprop_cells(fdt, "/chosen", "qemu,boot-kernel",
+                                     kernel_base >> 32, kernel_base,
+                                     kernel_size >> 32, kernel_size);
     }
 
     ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
@@ -392,20 +400,25 @@ typedef struct DeviceTreeParams {
     hwaddr addr;
     hwaddr initrd_base;
     hwaddr initrd_size;
+    hwaddr kernel_base;
+    hwaddr kernel_size;
 } DeviceTreeParams;
 
 static void ppce500_reset_device_tree(void *opaque)
 {
     DeviceTreeParams *p = opaque;
     ppce500_load_device_tree(p->machine, &p->params, p->addr, p->initrd_base,
-                             p->initrd_size, false);
+                             p->initrd_size, p->kernel_base, p->kernel_size,
+                             false);
 }
 
 static int ppce500_prep_device_tree(MachineState *machine,
                                     PPCE500Params *params,
                                     hwaddr addr,
                                     hwaddr initrd_base,
-                                    hwaddr initrd_size)
+                                    hwaddr initrd_size,
+                                    hwaddr kernel_base,
+                                    hwaddr kernel_size)
 {
     DeviceTreeParams *p = g_new(DeviceTreeParams, 1);
     p->machine = machine;
@@ -413,12 +426,15 @@ static int ppce500_prep_device_tree(MachineState *machine,
     p->addr = addr;
     p->initrd_base = initrd_base;
     p->initrd_size = initrd_size;
+    p->kernel_base = kernel_base;
+    p->kernel_size = kernel_size;
 
     qemu_register_reset(ppce500_reset_device_tree, p);
 
     /* Issue the device tree loader once, so that we get the size of the blob */
     return ppce500_load_device_tree(machine, params, addr, initrd_base,
-                                    initrd_size, true);
+                                    initrd_size, kernel_base, kernel_size,
+                                    true);
 }
 
 /* Create -kernel TLB entries for BookE.  */
@@ -603,17 +619,22 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     PCIBus *pci_bus;
     CPUPPCState *env = NULL;
-    uint64_t elf_entry;
-    uint64_t elf_lowaddr;
-    hwaddr entry=0;
-    hwaddr loadaddr=UIMAGE_LOAD_BASE;
-    target_long kernel_size=0;
-    target_ulong dt_base = 0;
-    target_ulong initrd_base = 0;
-    target_long initrd_size = 0;
-    target_ulong cur_base = 0;
+    uint64_t loadaddr;
+    hwaddr kernel_base = -1LL;
+    int kernel_size = 0;
+    hwaddr dt_base = 0;
+    hwaddr initrd_base = 0;
+    int initrd_size = 0;
+    hwaddr cur_base = 0;
+    char *filename;
+    hwaddr bios_entry = 0;
+    target_long bios_size;
+    struct boot_info *boot_info;
+    int dt_size;
     int i;
-    unsigned int pci_irq_nrs[4] = {1, 2, 3, 4};
+    /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and
+     * 4 respectively */
+    unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4};
     qemu_irq **irqs, *mpic;
     DeviceState *dev;
     CPUPPCState *firstenv = NULL;
@@ -713,12 +734,13 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
     /* PCI */
     dev = qdev_create(NULL, "e500-pcihost");
     qdev_prop_set_uint32(dev, "first_slot", params->pci_first_slot);
+    qdev_prop_set_uint32(dev, "first_pin_irq", pci_irq_nrs[0]);
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
-    sysbus_connect_irq(s, 0, mpic[pci_irq_nrs[0]]);
-    sysbus_connect_irq(s, 1, mpic[pci_irq_nrs[1]]);
-    sysbus_connect_irq(s, 2, mpic[pci_irq_nrs[2]]);
-    sysbus_connect_irq(s, 3, mpic[pci_irq_nrs[3]]);
+    for (i = 0; i < PCI_NUM_PINS; i++) {
+        sysbus_connect_irq(s, i, mpic[pci_irq_nrs[i]]);
+    }
+
     memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET,
                                 sysbus_mmio_get_region(s, 0));
 
@@ -738,29 +760,24 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
     /* Register spinning region */
     sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL);
 
+    if (cur_base < (32 * 1024 * 1024)) {
+        /* u-boot occupies memory up to 32MB, so load blobs above */
+        cur_base = (32 * 1024 * 1024);
+    }
+
     /* Load kernel. */
     if (machine->kernel_filename) {
-        kernel_size = load_uimage(machine->kernel_filename, &entry,
-                                  &loadaddr, NULL);
-        if (kernel_size < 0) {
-            kernel_size = load_elf(machine->kernel_filename, NULL, NULL,
-                                   &elf_entry, &elf_lowaddr, NULL, 1,
-                                   ELF_MACHINE, 0);
-            entry = elf_entry;
-            loadaddr = elf_lowaddr;
-        }
-        /* XXX try again as binary */
+        kernel_base = cur_base;
+        kernel_size = load_image_targphys(machine->kernel_filename,
+                                          cur_base,
+                                          ram_size - cur_base);
         if (kernel_size < 0) {
             fprintf(stderr, "qemu: could not load kernel '%s'\n",
                     machine->kernel_filename);
             exit(1);
         }
 
-        cur_base = loadaddr + kernel_size;
-
-        /* Reserve space for dtb */
-        dt_base = (cur_base + DTC_LOAD_PAD) & ~DTC_PAD_MASK;
-        cur_base += DTB_MAX_SIZE;
+        cur_base += kernel_size;
     }
 
     /* Load initrd. */
@@ -778,24 +795,60 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
         cur_base = initrd_base + initrd_size;
     }
 
-    /* If we're loading a kernel directly, we must load the device tree too. */
-    if (machine->kernel_filename) {
-        struct boot_info *boot_info;
-        int dt_size;
-
-        dt_size = ppce500_prep_device_tree(machine, params, dt_base,
-                                           initrd_base, initrd_size);
-        if (dt_size < 0) {
-            fprintf(stderr, "couldn't load device tree\n");
+    /*
+     * Smart firmware defaults ahead!
+     *
+     * We follow the following table to select which payload we execute.
+     *
+     *  -kernel | -bios | payload
+     * ---------+-------+---------
+     *     N    |   Y   | u-boot
+     *     N    |   N   | u-boot
+     *     Y    |   Y   | u-boot
+     *     Y    |   N   | kernel
+     *
+     * This ensures backwards compatibility with how we used to expose
+     * -kernel to users but allows them to run through u-boot as well.
+     */
+    if (bios_name == NULL) {
+        if (machine->kernel_filename) {
+            bios_name = machine->kernel_filename;
+        } else {
+            bios_name = "u-boot.e500";
+        }
+    }
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+
+    bios_size = load_elf(filename, NULL, NULL, &bios_entry, &loadaddr, NULL,
+                         1, ELF_MACHINE, 0);
+    if (bios_size < 0) {
+        /*
+         * Hrm. No ELF image? Try a uImage, maybe someone is giving us an
+         * ePAPR compliant kernel
+         */
+        kernel_size = load_uimage(filename, &bios_entry, &loadaddr, NULL);
+        if (kernel_size < 0) {
+            fprintf(stderr, "qemu: could not load firmware '%s'\n", filename);
             exit(1);
         }
-        assert(dt_size < DTB_MAX_SIZE);
+    }
 
-        boot_info = env->load_info;
-        boot_info->entry = entry;
-        boot_info->dt_base = dt_base;
-        boot_info->dt_size = dt_size;
+    /* Reserve space for dtb */
+    dt_base = (loadaddr + bios_size + DTC_LOAD_PAD) & ~DTC_PAD_MASK;
+
+    dt_size = ppce500_prep_device_tree(machine, params, dt_base,
+                                       initrd_base, initrd_size,
+                                       kernel_base, kernel_size);
+    if (dt_size < 0) {
+        fprintf(stderr, "couldn't load device tree\n");
+        exit(1);
     }
+    assert(dt_size < DTB_MAX_SIZE);
+
+    boot_info = env->load_info;
+    boot_info->entry = bios_entry;
+    boot_info->dt_base = dt_base;
+    boot_info->dt_size = dt_size;
 
     if (kvm_enabled()) {
         kvmppc_init();
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 4bdaa8d398..e493dc1b4b 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -72,6 +72,8 @@
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
 #define TBFREQ (100UL * 1000UL * 1000UL)
+#define CLOCKFREQ (266UL * 1000UL * 1000UL)
+#define BUSFREQ (100UL * 1000UL * 1000UL)
 
 /* debug UniNorth */
 //#define DEBUG_UNIN
@@ -467,7 +469,8 @@ static void ppc_core99_init(MachineState *machine)
         fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
     }
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
-    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, 266000000);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
 
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 77598e44cc..4b5e905fc2 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -46,6 +46,8 @@
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
 #define TBFREQ 16600000UL
+#define CLOCKFREQ 266000000UL
+#define BUSFREQ 66000000UL
 
 static int fw_cfg_boot_set(void *opaque, const char *boot_device)
 {
@@ -337,7 +339,8 @@ static void ppc_heathrow_init(MachineState *machine)
         fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
     }
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
-    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, 266000000);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
 
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 71df471746..bec82cd7a9 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -29,9 +29,11 @@
 #include "sysemu/cpus.h"
 #include "hw/timer/m48t59.h"
 #include "qemu/log.h"
+#include "qemu/error-report.h"
 #include "hw/loader.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
+#include "trace.h"
 
 //#define PPC_DEBUG_IRQ
 //#define PPC_DEBUG_TB
@@ -49,6 +51,8 @@
 #  define LOG_TB(...) do { } while (0)
 #endif
 
+#define NSEC_PER_SEC    1000000000LL
+
 static void cpu_ppc_tb_stop (CPUPPCState *env);
 static void cpu_ppc_tb_start (CPUPPCState *env);
 
@@ -829,6 +833,81 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
     cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
 }
 
+static void timebase_pre_save(void *opaque)
+{
+    PPCTimebase *tb = opaque;
+    uint64_t ticks = cpu_get_real_ticks();
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+    if (!first_ppc_cpu->env.tb_env) {
+        error_report("No timebase object");
+        return;
+    }
+
+    tb->time_of_the_day_ns = get_clock_realtime();
+    /*
+     * tb_offset is only expected to be changed by migration so
+     * there is no need to update it from KVM here
+     */
+    tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset;
+}
+
+static int timebase_post_load(void *opaque, int version_id)
+{
+    PPCTimebase *tb_remote = opaque;
+    CPUState *cpu;
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+    int64_t tb_off_adj, tb_off, ns_diff;
+    int64_t migration_duration_ns, migration_duration_tb, guest_tb, host_ns;
+    unsigned long freq;
+
+    if (!first_ppc_cpu->env.tb_env) {
+        error_report("No timebase object");
+        return -1;
+    }
+
+    freq = first_ppc_cpu->env.tb_env->tb_freq;
+    /*
+     * Calculate timebase on the destination side of migration.
+     * The destination timebase must be not less than the source timebase.
+     * We try to adjust timebase by downtime if host clocks are not
+     * too much out of sync (1 second for now).
+     */
+    host_ns = get_clock_realtime();
+    ns_diff = MAX(0, host_ns - tb_remote->time_of_the_day_ns);
+    migration_duration_ns = MIN(NSEC_PER_SEC, ns_diff);
+    migration_duration_tb = muldiv64(migration_duration_ns, freq, NSEC_PER_SEC);
+    guest_tb = tb_remote->guest_timebase + MIN(0, migration_duration_tb);
+
+    tb_off_adj = guest_tb - cpu_get_real_ticks();
+
+    tb_off = first_ppc_cpu->env.tb_env->tb_offset;
+    trace_ppc_tb_adjust(tb_off, tb_off_adj, tb_off_adj - tb_off,
+                        (tb_off_adj - tb_off) / freq);
+
+    /* Set new offset to all CPUs */
+    CPU_FOREACH(cpu) {
+        PowerPCCPU *pcpu = POWERPC_CPU(cpu);
+        pcpu->env.tb_env->tb_offset = tb_off_adj;
+    }
+
+    return 0;
+}
+
+const VMStateDescription vmstate_ppc_timebase = {
+    .name = "timebase",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = timebase_pre_save,
+    .post_load = timebase_post_load,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT64(guest_timebase, PPCTimebase),
+        VMSTATE_INT64(time_of_the_day_ns, PPCTimebase),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 /* Set up (once) timebase frequency (in Hz) */
 clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
 {
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index adac5ffbfb..e06321cf15 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -34,6 +34,7 @@
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
 #include "mmu-hash64.h"
+#include "qom/cpu.h"
 
 #include "hw/boards.h"
 #include "hw/ppc/ppc.h"
@@ -53,6 +54,7 @@
 #include "hw/usb.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "trace.h"
 
 #include <libfdt.h>
 
@@ -78,13 +80,28 @@
 #define TIMEBASE_FREQ           512000000ULL
 
 #define MAX_CPUS                256
-#define XICS_IRQS               1024
 
 #define PHANDLE_XICP            0x00001111
 
 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
 
+
+typedef struct SPAPRMachine SPAPRMachine;
 #define TYPE_SPAPR_MACHINE      "spapr-machine"
+#define SPAPR_MACHINE(obj) \
+    OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE)
+
+/**
+ * SPAPRMachine:
+ */
+struct SPAPRMachine {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /*< public >*/
+    char *kvm_type;
+};
+
 
 sPAPREnvironment *spapr;
 
@@ -202,34 +219,79 @@ static XICSState *xics_system_init(int nr_servers, int nr_irqs)
     return icp;
 }
 
+static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
+                                  int smt_threads)
+{
+    int i, ret = 0;
+    uint32_t servers_prop[smt_threads];
+    uint32_t gservers_prop[smt_threads * 2];
+    int index = ppc_get_vcpu_dt_id(cpu);
+
+    if (cpu->cpu_version) {
+        ret = fdt_setprop(fdt, offset, "cpu-version",
+                          &cpu->cpu_version, sizeof(cpu->cpu_version));
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    /* Build interrupt servers and gservers properties */
+    for (i = 0; i < smt_threads; i++) {
+        servers_prop[i] = cpu_to_be32(index + i);
+        /* Hack, direct the group queues back to cpu 0 */
+        gservers_prop[i*2] = cpu_to_be32(index + i);
+        gservers_prop[i*2 + 1] = 0;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+                      servers_prop, sizeof(servers_prop));
+    if (ret < 0) {
+        return ret;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
+                      gservers_prop, sizeof(gservers_prop));
+
+    return ret;
+}
+
 static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
 {
-    int ret = 0, offset;
-    CPUState *cpu;
+    int ret = 0, offset, cpus_offset;
+    CPUState *cs;
     char cpu_model[32];
     int smt = kvmppc_smt_threads();
     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
 
-    CPU_FOREACH(cpu) {
-        DeviceClass *dc = DEVICE_GET_CLASS(cpu);
-        int index = ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        DeviceClass *dc = DEVICE_GET_CLASS(cs);
+        int index = ppc_get_vcpu_dt_id(cpu);
         uint32_t associativity[] = {cpu_to_be32(0x5),
                                     cpu_to_be32(0x0),
                                     cpu_to_be32(0x0),
                                     cpu_to_be32(0x0),
-                                    cpu_to_be32(cpu->numa_node),
+                                    cpu_to_be32(cs->numa_node),
                                     cpu_to_be32(index)};
 
         if ((index % smt) != 0) {
             continue;
         }
 
-        snprintf(cpu_model, 32, "/cpus/%s@%x", dc->fw_name,
-                 index);
+        snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
 
-        offset = fdt_path_offset(fdt, cpu_model);
+        cpus_offset = fdt_path_offset(fdt, "/cpus");
+        if (cpus_offset < 0) {
+            cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+                                          "cpus");
+            if (cpus_offset < 0) {
+                return cpus_offset;
+            }
+        }
+        offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
         if (offset < 0) {
-            return offset;
+            offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
+            if (offset < 0) {
+                return offset;
+            }
         }
 
         if (nb_numa_nodes > 1) {
@@ -245,6 +307,12 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
         if (ret < 0) {
             return ret;
         }
+
+        ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
+                                     ppc_get_compat_smt_threads(cpu));
+        if (ret < 0) {
+            return ret;
+        }
     }
     return ret;
 }
@@ -293,6 +361,10 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
         }                                                          \
     } while (0)
 
+static void add_str(GString *s, const gchar *s1)
+{
+    g_string_append_len(s, s1, strlen(s1) + 1);
+}
 
 static void *spapr_create_fdt_skel(hwaddr initrd_base,
                                    hwaddr initrd_size,
@@ -306,13 +378,26 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     CPUState *cs;
     uint32_t start_prop = cpu_to_be32(initrd_base);
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
-    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
-        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode";
-    char qemu_hypertas_prop[] = "hcall-memop1";
+    GString *hypertas = g_string_sized_new(256);
+    GString *qemu_hypertas = g_string_sized_new(256);
     uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
-    int i, smt = kvmppc_smt_threads();
+    int smt = kvmppc_smt_threads();
     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
+    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
+    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
+    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+
+    add_str(hypertas, "hcall-pft");
+    add_str(hypertas, "hcall-term");
+    add_str(hypertas, "hcall-dabr");
+    add_str(hypertas, "hcall-interrupt");
+    add_str(hypertas, "hcall-tce");
+    add_str(hypertas, "hcall-vio");
+    add_str(hypertas, "hcall-splpar");
+    add_str(hypertas, "hcall-bulk");
+    add_str(hypertas, "hcall-set-mode");
+    add_str(qemu_hypertas, "hcall-memop1");
 
     fdt = g_malloc0(FDT_MAX_SIZE);
     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
@@ -375,8 +460,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
         DeviceClass *dc = DEVICE_GET_CLASS(cs);
         PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
         int index = ppc_get_vcpu_dt_id(cpu);
-        uint32_t servers_prop[smp_threads];
-        uint32_t gservers_prop[smp_threads * 2];
         char *nodename;
         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                            0xffffffff, 0xffffffff};
@@ -425,18 +508,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
         _FDT((fdt_property_string(fdt, "status", "okay")));
         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
 
-        /* Build interrupt servers and gservers properties */
-        for (i = 0; i < smp_threads; i++) {
-            servers_prop[i] = cpu_to_be32(index + i);
-            /* Hack, direct the group queues back to cpu 0 */
-            gservers_prop[i*2] = cpu_to_be32(index + i);
-            gservers_prop[i*2 + 1] = 0;
-        }
-        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
-                           servers_prop, sizeof(servers_prop))));
-        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
-                           gservers_prop, sizeof(gservers_prop))));
-
         if (env->spr_cb[SPR_PURR].oea_read) {
             _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
         }
@@ -470,6 +541,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
                                page_sizes_prop, page_sizes_prop_size)));
         }
 
+        _FDT((fdt_property_cell(fdt, "ibm,chip-id",
+                                cs->cpu_index / cpus_per_socket)));
+
         _FDT((fdt_end_node(fdt)));
     }
 
@@ -478,10 +552,15 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     /* RTAS */
     _FDT((fdt_begin_node(fdt, "rtas")));
 
-    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
-                       sizeof(hypertas_prop))));
-    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
-                       sizeof(qemu_hypertas_prop))));
+    if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
+        add_str(hypertas, "hcall-multi-tce");
+    }
+    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
+                       hypertas->len)));
+    g_string_free(hypertas, TRUE);
+    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
+                       qemu_hypertas->len)));
+    g_string_free(qemu_hypertas, TRUE);
 
     _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
         refpoints, sizeof(refpoints))));
@@ -521,12 +600,68 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     /* event-sources */
     spapr_events_fdt_skel(fdt, epow_irq);
 
+    /* /hypervisor node */
+    if (kvm_enabled()) {
+        uint8_t hypercall[16];
+
+        /* indicate KVM hypercall interface */
+        _FDT((fdt_begin_node(fdt, "hypervisor")));
+        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
+        if (kvmppc_has_cap_fixup_hcalls()) {
+            /*
+             * Older KVM versions with older guest kernels were broken with the
+             * magic page, don't allow the guest to map it.
+             */
+            kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
+                                 sizeof(hypercall));
+            _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
+                              sizeof(hypercall))));
+        }
+        _FDT((fdt_end_node(fdt)));
+    }
+
     _FDT((fdt_end_node(fdt))); /* close root node */
     _FDT((fdt_finish(fdt)));
 
     return fdt;
 }
 
+int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
+{
+    void *fdt, *fdt_skel;
+    sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
+
+    size -= sizeof(hdr);
+
+    /* Create sceleton */
+    fdt_skel = g_malloc0(size);
+    _FDT((fdt_create(fdt_skel, size)));
+    _FDT((fdt_begin_node(fdt_skel, "")));
+    _FDT((fdt_end_node(fdt_skel)));
+    _FDT((fdt_finish(fdt_skel)));
+    fdt = g_malloc0(size);
+    _FDT((fdt_open_into(fdt_skel, fdt, size)));
+    g_free(fdt_skel);
+
+    /* Fix skeleton up */
+    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
+
+    /* Pack resulting tree */
+    _FDT((fdt_pack(fdt)));
+
+    if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
+        trace_spapr_cas_failed(size);
+        return -1;
+    }
+
+    cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
+    cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
+    trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
+    g_free(fdt);
+
+    return 0;
+}
+
 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
 {
     uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
@@ -817,14 +952,14 @@ static int spapr_vga_init(PCIBus *pci_bus)
 
 static const VMStateDescription vmstate_spapr = {
     .name = "spapr",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(next_irq, sPAPREnvironment),
 
         /* RTC offset */
         VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
-
+        VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -1251,6 +1386,12 @@ static void ppc_spapr_init(MachineState *machine)
             kvmppc_set_papr(cpu);
         }
 
+        if (cpu->max_compat) {
+            if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
+                exit(1);
+            }
+        }
+
         xics_cpu_setup(spapr->icp, cpu);
 
         qemu_register_reset(spapr_cpu_reset, cpu);
@@ -1475,6 +1616,27 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
     return NULL;
 }
 
+static char *spapr_get_kvm_type(Object *obj, Error **errp)
+{
+    SPAPRMachine *sm = SPAPR_MACHINE(obj);
+
+    return g_strdup(sm->kvm_type);
+}
+
+static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
+{
+    SPAPRMachine *sm = SPAPR_MACHINE(obj);
+
+    g_free(sm->kvm_type);
+    sm->kvm_type = g_strdup(value);
+}
+
+static void spapr_machine_initfn(Object *obj)
+{
+    object_property_add_str(obj, "kvm-type",
+                            spapr_get_kvm_type, spapr_set_kvm_type, NULL);
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -1497,6 +1659,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
 static const TypeInfo spapr_machine_info = {
     .name          = TYPE_SPAPR_MACHINE,
     .parent        = TYPE_MACHINE,
+    .instance_size = sizeof(SPAPRMachine),
+    .instance_init = spapr_machine_initfn,
     .class_init    = spapr_machine_class_init,
     .interfaces = (InterfaceInfo[]) {
         { TYPE_FW_PATH_PROVIDER },
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 0bae0535e8..795207799d 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -3,6 +3,9 @@
 #include "helper_regs.h"
 #include "hw/ppc/spapr.h"
 #include "mmu-hash64.h"
+#include "cpu-models.h"
+#include "trace.h"
+#include "kvm_ppc.h"
 
 struct SPRSyncState {
     CPUState *cs;
@@ -709,47 +712,218 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return H_SUCCESS;
 }
 
+static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu,
+                                          target_ulong mflags,
+                                          target_ulong value1,
+                                          target_ulong value2)
+{
+    CPUState *cs;
+
+    if (value1) {
+        return H_P3;
+    }
+    if (value2) {
+        return H_P4;
+    }
+
+    switch (mflags) {
+    case H_SET_MODE_ENDIAN_BIG:
+        CPU_FOREACH(cs) {
+            set_spr(cs, SPR_LPCR, 0, LPCR_ILE);
+        }
+        return H_SUCCESS;
+
+    case H_SET_MODE_ENDIAN_LITTLE:
+        CPU_FOREACH(cs) {
+            set_spr(cs, SPR_LPCR, LPCR_ILE, LPCR_ILE);
+        }
+        return H_SUCCESS;
+    }
+
+    return H_UNSUPPORTED_FLAG;
+}
+
+static target_ulong h_set_mode_resouce_addr_trans_mode(PowerPCCPU *cpu,
+                                                       target_ulong mflags,
+                                                       target_ulong value1,
+                                                       target_ulong value2)
+{
+    CPUState *cs;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    target_ulong prefix;
+
+    if (!(pcc->insns_flags2 & PPC2_ISA207S)) {
+        return H_P2;
+    }
+    if (value1) {
+        return H_P3;
+    }
+    if (value2) {
+        return H_P4;
+    }
+
+    switch (mflags) {
+    case H_SET_MODE_ADDR_TRANS_NONE:
+        prefix = 0;
+        break;
+    case H_SET_MODE_ADDR_TRANS_0001_8000:
+        prefix = 0x18000;
+        break;
+    case H_SET_MODE_ADDR_TRANS_C000_0000_0000_4000:
+        prefix = 0xC000000000004000;
+        break;
+    default:
+        return H_UNSUPPORTED_FLAG;
+    }
+
+    CPU_FOREACH(cs) {
+        CPUPPCState *env = &POWERPC_CPU(cpu)->env;
+
+        set_spr(cs, SPR_LPCR, mflags << LPCR_AIL_SHIFT, LPCR_AIL);
+        env->excp_prefix = prefix;
+    }
+
+    return H_SUCCESS;
+}
+
 static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                                target_ulong opcode, target_ulong *args)
 {
-    CPUState *cs;
-    target_ulong mflags = args[0];
     target_ulong resource = args[1];
-    target_ulong value1 = args[2];
-    target_ulong value2 = args[3];
     target_ulong ret = H_P2;
 
-    if (resource == H_SET_MODE_RESOURCE_LE) {
-        if (value1) {
-            ret = H_P3;
-            goto out;
-        }
-        if (value2) {
-            ret = H_P4;
-            goto out;
-        }
-        switch (mflags) {
-        case H_SET_MODE_ENDIAN_BIG:
-            CPU_FOREACH(cs) {
-                set_spr(cs, SPR_LPCR, 0, LPCR_ILE);
-            }
-            ret = H_SUCCESS;
-            break;
+    switch (resource) {
+    case H_SET_MODE_RESOURCE_LE:
+        ret = h_set_mode_resouce_le(cpu, args[0], args[2], args[3]);
+        break;
+    case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+        ret = h_set_mode_resouce_addr_trans_mode(cpu, args[0],
+                                                 args[2], args[3]);
+        break;
+    }
+
+    return ret;
+}
+
+typedef struct {
+    PowerPCCPU *cpu;
+    uint32_t cpu_version;
+    int ret;
+} SetCompatState;
 
-        case H_SET_MODE_ENDIAN_LITTLE:
-            CPU_FOREACH(cs) {
-                set_spr(cs, SPR_LPCR, LPCR_ILE, LPCR_ILE);
+static void do_set_compat(void *arg)
+{
+    SetCompatState *s = arg;
+
+    cpu_synchronize_state(CPU(s->cpu));
+    s->ret = ppc_set_compat(s->cpu, s->cpu_version);
+}
+
+#define get_compat_level(cpuver) ( \
+    ((cpuver) == CPU_POWERPC_LOGICAL_2_05) ? 2050 : \
+    ((cpuver) == CPU_POWERPC_LOGICAL_2_06) ? 2060 : \
+    ((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \
+    ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0)
+
+static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
+                                                  sPAPREnvironment *spapr,
+                                                  target_ulong opcode,
+                                                  target_ulong *args)
+{
+    target_ulong list = args[0];
+    PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_);
+    CPUState *cs;
+    bool cpu_match = false;
+    unsigned old_cpu_version = cpu_->cpu_version;
+    unsigned compat_lvl = 0, cpu_version = 0;
+    unsigned max_lvl = get_compat_level(cpu_->max_compat);
+    int counter;
+
+    /* Parse PVR list */
+    for (counter = 0; counter < 512; ++counter) {
+        uint32_t pvr, pvr_mask;
+
+        pvr_mask = rtas_ld(list, 0);
+        list += 4;
+        pvr = rtas_ld(list, 0);
+        list += 4;
+
+        trace_spapr_cas_pvr_try(pvr);
+        if (!max_lvl &&
+            ((cpu_->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask))) {
+            cpu_match = true;
+            cpu_version = 0;
+        } else if (pvr == cpu_->cpu_version) {
+            cpu_match = true;
+            cpu_version = cpu_->cpu_version;
+        } else if (!cpu_match) {
+            /* If it is a logical PVR, try to determine the highest level */
+            unsigned lvl = get_compat_level(pvr);
+            if (lvl) {
+                bool is205 = (pcc_->pcr_mask & PCR_COMPAT_2_05) &&
+                     (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_05));
+                bool is206 = (pcc_->pcr_mask & PCR_COMPAT_2_06) &&
+                    ((lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06)) ||
+                    (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06_PLUS)));
+
+                if (is205 || is206) {
+                    if (!max_lvl) {
+                        /* User did not set the level, choose the highest */
+                        if (compat_lvl <= lvl) {
+                            compat_lvl = lvl;
+                            cpu_version = pvr;
+                        }
+                    } else if (max_lvl >= lvl) {
+                        /* User chose the level, don't set higher than this */
+                        compat_lvl = lvl;
+                        cpu_version = pvr;
+                    }
+                }
             }
-            ret = H_SUCCESS;
+        }
+        /* Terminator record */
+        if (~pvr_mask & pvr) {
             break;
+        }
+    }
 
-        default:
-            ret = H_UNSUPPORTED_FLAG;
+    /* For the future use: here @list points to the first capability */
+
+    /* Parsing finished */
+    trace_spapr_cas_pvr(cpu_->cpu_version, cpu_match,
+                        cpu_version, pcc_->pcr_mask);
+
+    /* Update CPUs */
+    if (old_cpu_version != cpu_version) {
+        CPU_FOREACH(cs) {
+            SetCompatState s = {
+                .cpu = POWERPC_CPU(cs),
+                .cpu_version = cpu_version,
+                .ret = 0
+            };
+
+            run_on_cpu(cs, do_set_compat, &s);
+
+            if (s.ret < 0) {
+                fprintf(stderr, "Unable to set compatibility mode\n");
+                return H_HARDWARE;
+            }
         }
     }
 
-out:
-    return ret;
+    if (!cpu_version) {
+        return H_SUCCESS;
+    }
+
+    if (!list) {
+        return H_SUCCESS;
+    }
+
+    if (spapr_h_cas_compose_response(args[1], args[2])) {
+        qemu_system_reset_request();
+    }
+
+    return H_SUCCESS;
 }
 
 static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
@@ -831,6 +1005,9 @@ static void hypercall_register_types(void)
     spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
 
     spapr_register_hypercall(H_SET_MODE, h_set_mode);
+
+    /* ibm,client-architecture-support support */
+    spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
 }
 
 type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 2887ad4827..9e49ec4a5c 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -35,6 +35,9 @@ enum sPAPRTCEAccess {
     SPAPR_TCE_RW = 3,
 };
 
+#define IOMMU_PAGE_SIZE(shift)      (1ULL << (shift))
+#define IOMMU_PAGE_MASK(shift)      (~(IOMMU_PAGE_SIZE(shift) - 1))
+
 static QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables;
 
 static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
@@ -70,12 +73,14 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
 
     if (tcet->bypass) {
         ret.perm = IOMMU_RW;
-    } else if (addr < tcet->window_size) {
+    } else if ((addr >> tcet->page_shift) < tcet->nb_table) {
         /* Check if we are in bound */
-        tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT];
-        ret.iova = addr & ~SPAPR_TCE_PAGE_MASK;
-        ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
-        ret.addr_mask = SPAPR_TCE_PAGE_MASK;
+        hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+        tce = tcet->table[addr >> tcet->page_shift];
+        ret.iova = addr & page_mask;
+        ret.translated_addr = tce & page_mask;
+        ret.addr_mask = ~page_mask;
         ret.perm = tce;
     }
     trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
@@ -84,24 +89,14 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
     return ret;
 }
 
-static int spapr_tce_table_pre_load(void *opaque)
-{
-    sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
-
-    tcet->nb_table = tcet->window_size >> SPAPR_TCE_PAGE_SHIFT;
-
-    return 0;
-}
-
 static const VMStateDescription vmstate_spapr_tce_table = {
     .name = "spapr_iommu",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .pre_load = spapr_tce_table_pre_load,
-    .fields = (VMStateField[]) {
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields      = (VMStateField []) {
         /* Sanity check */
         VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable),
-        VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable),
+        VMSTATE_UINT32_EQUAL(nb_table, sPAPRTCETable),
 
         /* IOMMU state */
         VMSTATE_BOOL(bypass, sPAPRTCETable),
@@ -121,28 +116,33 @@ static int spapr_tce_table_realize(DeviceState *dev)
 
     if (kvm_enabled()) {
         tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
-                                              tcet->window_size,
+                                              tcet->nb_table <<
+                                              tcet->page_shift,
                                               &tcet->fd);
     }
 
     if (!tcet->table) {
-        size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
-            * sizeof(uint64_t);
+        size_t table_size = tcet->nb_table * sizeof(uint64_t);
         tcet->table = g_malloc0(table_size);
     }
-    tcet->nb_table = tcet->window_size >> SPAPR_TCE_PAGE_SHIFT;
 
     trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd);
 
     memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops,
-                             "iommu-spapr", UINT64_MAX);
+                             "iommu-spapr", ram_size);
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
+    vmstate_register(DEVICE(tcet), tcet->liobn, &vmstate_spapr_tce_table,
+                     tcet);
+
     return 0;
 }
 
-sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t window_size)
+sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
+                                   uint64_t bus_offset,
+                                   uint32_t page_shift,
+                                   uint32_t nb_table)
 {
     sPAPRTCETable *tcet;
 
@@ -152,17 +152,19 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t wi
         return NULL;
     }
 
-    if (!window_size) {
+    if (!nb_table) {
         return NULL;
     }
 
     tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
     tcet->liobn = liobn;
-    tcet->window_size = window_size;
+    tcet->bus_offset = bus_offset;
+    tcet->page_shift = page_shift;
+    tcet->nb_table = nb_table;
 
     object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
 
-    qdev_init_nofail(DEVICE(tcet));
+    object_property_set_bool(OBJECT(tcet), true, "realized", NULL);
 
     return tcet;
 }
@@ -175,7 +177,7 @@ static void spapr_tce_table_finalize(Object *obj)
 
     if (!kvm_enabled() ||
         (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
-                                 tcet->window_size) != 0)) {
+                                 tcet->nb_table) != 0)) {
         g_free(tcet->table);
     }
 }
@@ -193,8 +195,7 @@ void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass)
 static void spapr_tce_reset(DeviceState *dev)
 {
     sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
-    size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
-        * sizeof(uint64_t);
+    size_t table_size = tcet->nb_table * sizeof(uint64_t);
 
     tcet->bypass = false;
     memset(tcet->table, 0, table_size);
@@ -204,25 +205,110 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
                                 target_ulong tce)
 {
     IOMMUTLBEntry entry;
+    hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+    unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
 
-    if (ioba >= tcet->window_size) {
+    if (index >= tcet->nb_table) {
         hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
                       TARGET_FMT_lx "\n", ioba);
         return H_PARAMETER;
     }
 
-    tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce;
+    tcet->table[index] = tce;
 
     entry.target_as = &address_space_memory,
-    entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
-    entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
-    entry.addr_mask = SPAPR_TCE_PAGE_MASK;
+    entry.iova = ioba & page_mask;
+    entry.translated_addr = tce & page_mask;
+    entry.addr_mask = ~page_mask;
     entry.perm = tce;
     memory_region_notify_iommu(&tcet->iommu, entry);
 
     return H_SUCCESS;
 }
 
+static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
+                                       sPAPREnvironment *spapr,
+                                       target_ulong opcode, target_ulong *args)
+{
+    int i;
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong ioba1 = ioba;
+    target_ulong tce_list = args[2];
+    target_ulong npages = args[3];
+    target_ulong ret = H_PARAMETER;
+    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+    CPUState *cs = CPU(cpu);
+    hwaddr page_mask, page_size;
+
+    if (!tcet) {
+        return H_PARAMETER;
+    }
+
+    if ((npages > 512) || (tce_list & SPAPR_TCE_PAGE_MASK)) {
+        return H_PARAMETER;
+    }
+
+    page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+    page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
+    ioba &= page_mask;
+
+    for (i = 0; i < npages; ++i, ioba += page_size) {
+        target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
+                                i * sizeof(target_ulong);
+        target_ulong tce = ldq_phys(cs->as, off);
+
+        ret = put_tce_emu(tcet, ioba, tce);
+        if (ret) {
+            break;
+        }
+    }
+
+    /* Trace last successful or the first problematic entry */
+    i = i ? (i - 1) : 0;
+    trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i,
+                               ldq_phys(cs->as,
+                               tce_list + i * sizeof(target_ulong)),
+                               ret);
+
+    return ret;
+}
+
+static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    int i;
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong tce_value = args[2];
+    target_ulong npages = args[3];
+    target_ulong ret = H_PARAMETER;
+    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+    hwaddr page_mask, page_size;
+
+    if (!tcet) {
+        return H_PARAMETER;
+    }
+
+    if (npages > tcet->nb_table) {
+        return H_PARAMETER;
+    }
+
+    page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+    page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
+    ioba &= page_mask;
+
+    for (i = 0; i < npages; ++i, ioba += page_size) {
+        ret = put_tce_emu(tcet, ioba, tce_value);
+        if (ret) {
+            break;
+        }
+    }
+    trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret);
+
+    return ret;
+}
+
 static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                               target_ulong opcode, target_ulong *args)
 {
@@ -232,9 +318,11 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     target_ulong ret = H_PARAMETER;
     sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
 
-    ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
-
     if (tcet) {
+        hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+        ioba &= page_mask;
+
         ret = put_tce_emu(tcet, ioba, tce);
     }
     trace_spapr_iommu_put(liobn, ioba, tce, ret);
@@ -245,13 +333,15 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
                                 target_ulong *tce)
 {
-    if (ioba >= tcet->window_size) {
+    unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
+
+    if (index >= tcet->nb_table) {
         hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
                       TARGET_FMT_lx "\n", ioba);
         return H_PARAMETER;
     }
 
-    *tce = tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT];
+    *tce = tcet->table[index];
 
     return H_SUCCESS;
 }
@@ -265,9 +355,11 @@ static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     target_ulong ret = H_PARAMETER;
     sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
 
-    ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
-
     if (tcet) {
+        hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+        ioba &= page_mask;
+
         ret = get_tce_emu(tcet, ioba, &tce);
         if (!ret) {
             args[0] = tce;
@@ -316,13 +408,12 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
     }
 
     return spapr_dma_dt(fdt, node_off, propname,
-                        tcet->liobn, 0, tcet->window_size);
+                        tcet->liobn, 0, tcet->nb_table << tcet->page_shift);
 }
 
 static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    dc->vmsd = &vmstate_spapr_tce_table;
     dc->init = spapr_tce_table_realize;
     dc->reset = spapr_tce_reset;
 
@@ -331,6 +422,8 @@ static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
     /* hcall-tce */
     spapr_register_hypercall(H_PUT_TCE, h_put_tce);
     spapr_register_hypercall(H_GET_TCE, h_get_tce);
+    spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect);
+    spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce);
 }
 
 static TypeInfo spapr_tce_table_info = {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index a4a51d4990..988f8cb858 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -280,7 +280,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
     unsigned int seq_num = rtas_ld(args, 5);
     unsigned int ret_intr_type;
-    int ndev, irq;
+    int ndev, irq, max_irqs = 0;
     sPAPRPHBState *phb = NULL;
     PCIDevice *pdev = NULL;
 
@@ -333,6 +333,23 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     }
     trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
 
+    /* Check if the device supports as many IRQs as requested */
+    if (ret_intr_type == RTAS_TYPE_MSI) {
+        max_irqs = msi_nr_vectors_allocated(pdev);
+    } else if (ret_intr_type == RTAS_TYPE_MSIX) {
+        max_irqs = pdev->msix_entries_nr;
+    }
+    if (!max_irqs) {
+        error_report("Requested interrupt type %d is not enabled for device#%d",
+                     ret_intr_type, ndev);
+        rtas_st(rets, 0, -1); /* Hardware error */
+        return;
+    }
+    /* Correct the number if the guest asked for too many */
+    if (req_num > max_irqs) {
+        req_num = max_irqs;
+    }
+
     /* Check if there is an old config and MSI number has not changed */
     if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
         /* Unexpected behaviour */
@@ -511,6 +528,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     SysBusDevice *s = SYS_BUS_DEVICE(dev);
     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
     PCIHostState *phb = PCI_HOST_BRIDGE(s);
+    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
     char *namebuf;
     int i;
     PCIBus *bus;
@@ -575,23 +593,14 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
                                 &sphb->memwindow);
 
-    /* On ppc, we only have MMIO no specific IO space from the CPU
-     * perspective.  In theory we ought to be able to embed the PCI IO
-     * memory region direction in the system memory space.  However,
-     * if any of the IO BAR subregions use the old_portio mechanism,
-     * that won't be processed properly unless accessed from the
-     * system io address space.  This hack to bounce things via
-     * system_io works around the problem until all the users of
-     * old_portion are updated */
+    /* Initialize IO regions */
     sprintf(namebuf, "%s.io", sphb->dtbusname);
     memory_region_init(&sphb->iospace, OBJECT(sphb),
                        namebuf, SPAPR_PCI_IO_WIN_SIZE);
-    /* FIXME: fix to support multiple PHBs */
-    memory_region_add_subregion(get_system_io(), 0, &sphb->iospace);
 
     sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
     memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
-                             get_system_io(), 0, SPAPR_PCI_IO_WIN_SIZE);
+                             &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
                                 &sphb->iowindow);
 
@@ -601,16 +610,18 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
                            PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
     phb->bus = bus;
 
-    sphb->dma_window_start = 0;
-    sphb->dma_window_size = 0x40000000;
-    sphb->tcet = spapr_tce_new_table(dev, sphb->dma_liobn,
-                                     sphb->dma_window_size);
-    if (!sphb->tcet) {
-        error_setg(errp, "Unable to create TCE table for %s",
-                   sphb->dtbusname);
-        return;
-    }
-    address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
+    /*
+     * Initialize PHB address space.
+     * By default there will be at least one subregion for default
+     * 32bit DMA window.
+     * Later the guest might want to create another DMA window
+     * which will become another memory subregion.
+     */
+    sprintf(namebuf, "%s.iommu-root", sphb->dtbusname);
+
+    memory_region_init(&sphb->iommu_root, OBJECT(sphb),
+                       namebuf, UINT64_MAX);
+    address_space_init(&sphb->iommu_as, &sphb->iommu_root,
                        sphb->dtbusname);
 
     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
@@ -631,15 +642,49 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
 
         sphb->lsi_table[i].irq = irq;
     }
+
+    if (!info->finish_realize) {
+        error_setg(errp, "finish_realize not defined");
+        return;
+    }
+
+    info->finish_realize(sphb, errp);
 }
 
-static void spapr_phb_reset(DeviceState *qdev)
+static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
 {
-    SysBusDevice *s = SYS_BUS_DEVICE(qdev);
-    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
+    sPAPRTCETable *tcet;
 
+    tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
+                               0,
+                               SPAPR_TCE_PAGE_SHIFT,
+                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
+    if (!tcet) {
+        error_setg(errp, "Unable to create TCE table for %s",
+                   sphb->dtbusname);
+        return ;
+    }
+
+    /* Register default 32bit DMA window */
+    memory_region_add_subregion(&sphb->iommu_root, 0,
+                                spapr_tce_get_iommu(tcet));
+}
+
+static int spapr_phb_children_reset(Object *child, void *opaque)
+{
+    DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
+
+    if (dev) {
+        device_reset(dev);
+    }
+
+    return 0;
+}
+
+static void spapr_phb_reset(DeviceState *qdev)
+{
     /* Reset the IOMMU state */
-    device_reset(DEVICE(sphb->tcet));
+    object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
 }
 
 static Property spapr_phb_properties[] = {
@@ -711,6 +756,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
 {
     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
+    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
 
     hc->root_bus_path = spapr_phb_root_bus_path;
     dc->realize = spapr_phb_realize;
@@ -719,6 +765,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
     dc->vmsd = &vmstate_spapr_pci;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->cannot_instantiate_with_device_add_yet = false;
+    spc->finish_realize = spapr_phb_finish_realize;
 }
 
 static const TypeInfo spapr_phb_info = {
@@ -726,6 +773,7 @@ static const TypeInfo spapr_phb_info = {
     .parent        = TYPE_PCI_HOST_BRIDGE,
     .instance_size = sizeof(sPAPRPHBState),
     .class_init    = spapr_phb_class_init,
+    .class_size    = sizeof(sPAPRPHBClass),
 };
 
 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
@@ -750,6 +798,29 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
 #define b_fff(x)        b_x((x), 8, 3)  /* function number */
 #define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
 
+typedef struct sPAPRTCEDT {
+    void *fdt;
+    int node_off;
+} sPAPRTCEDT;
+
+static int spapr_phb_children_dt(Object *child, void *opaque)
+{
+    sPAPRTCEDT *p = opaque;
+    sPAPRTCETable *tcet;
+
+    tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+    if (!tcet) {
+        return 0;
+    }
+
+    spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window",
+                 tcet->liobn, tcet->bus_offset,
+                 tcet->nb_table << tcet->page_shift);
+    /* Stop after the first window */
+
+    return 1;
+}
+
 int spapr_populate_pci_dt(sPAPRPHBState *phb,
                           uint32_t xics_phandle,
                           void *fdt)
@@ -805,6 +876,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
+    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS));
 
     /* Build the interrupt-map, this must matches what is done
      * in pci_spapr_map_irq
@@ -829,9 +901,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
                      sizeof(interrupt_map)));
 
-    spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
-                 phb->dma_liobn, phb->dma_window_start,
-                 phb->dma_window_size);
+    object_child_foreach(OBJECT(phb), spapr_phb_children_dt,
+                         &((sPAPRTCEDT){ .fdt = fdt, .node_off = bus_off }));
 
     return 0;
 }
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index bce8d7f4e0..04e16ae04d 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -456,7 +456,11 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
 
     if (pc->rtce_window_size) {
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-        dev->tcet = spapr_tce_new_table(qdev, liobn, pc->rtce_window_size);
+        dev->tcet = spapr_tce_new_table(qdev, liobn,
+                                        0,
+                                        SPAPR_TCE_PAGE_SHIFT,
+                                        pc->rtce_window_size >>
+                                        SPAPR_TCE_PAGE_SHIFT);
         address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
     }