summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--hw/core/qdev-properties-system.c3
-rw-r--r--hw/intc/xics.c182
-rw-r--r--hw/intc/xics_kvm.c34
-rw-r--r--hw/misc/vfio.c78
-rw-r--r--hw/net/eepro100.c4
-rw-r--r--hw/net/virtio-net.c2
-rw-r--r--hw/nvram/spapr_nvram.c4
-rw-r--r--hw/pci-host/uninorth.c2
-rw-r--r--hw/ppc/Makefile.objs3
-rw-r--r--hw/ppc/e500.c13
-rw-r--r--hw/ppc/mac_newworld.c18
-rw-r--r--hw/ppc/spapr.c104
-rw-r--r--hw/ppc/spapr_events.c5
-rw-r--r--hw/ppc/spapr_iommu.c7
-rw-r--r--hw/ppc/spapr_pci.c217
-rw-r--r--hw/ppc/spapr_pci_vfio.c102
-rw-r--r--hw/ppc/spapr_rtas.c97
-rw-r--r--hw/ppc/spapr_vio.c9
-rw-r--r--include/hw/misc/vfio.h9
-rw-r--r--include/hw/pci-host/spapr.h32
-rw-r--r--include/hw/ppc/spapr.h82
-rw-r--r--include/hw/ppc/xics.h9
-rw-r--r--include/migration/vmstate.h11
-rw-r--r--include/net/net.h2
-rw-r--r--linux-user/elfload.c44
-rw-r--r--net/Makefile.objs1
-rw-r--r--net/clients.h2
-rw-r--r--net/l2tpv3.c757
-rw-r--r--net/net.c10
-rw-r--r--pc-bios/s390-ccw.imgbin9432 -> 17624 bytes
-rw-r--r--pc-bios/s390-ccw/bootmap.c445
-rw-r--r--pc-bios/s390-ccw/bootmap.h344
-rw-r--r--pc-bios/s390-ccw/main.c13
-rw-r--r--pc-bios/s390-ccw/s390-ccw.h38
-rw-r--r--pc-bios/s390-ccw/sclp-ascii.c4
-rw-r--r--pc-bios/s390-ccw/virtio.c122
-rw-r--r--pc-bios/s390-ccw/virtio.h50
-rw-r--r--qapi-schema.json60
-rw-r--r--qemu-bridge-helper.c9
-rw-r--r--qemu-options.hx82
-rw-r--r--target-ppc/cpu-models.c3
-rw-r--r--target-ppc/cpu-models.h7
-rw-r--r--target-ppc/cpu-qom.h2
-rw-r--r--target-ppc/cpu.h2
-rw-r--r--target-ppc/gdbstub.c137
-rw-r--r--target-ppc/kvm.c7
-rw-r--r--target-ppc/kvm_ppc.h6
-rw-r--r--target-ppc/translate.c13
-rw-r--r--target-ppc/translate_init.c28
-rw-r--r--tcg/ppc/tcg-target.c6
-rw-r--r--trace-events11
-rw-r--r--vmstate.c13
52 files changed, 2738 insertions, 507 deletions
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index de433b2e38..52c2f8afa5 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -180,7 +180,6 @@ PropertyInfo qdev_prop_chr = {
 static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
 {
     NICPeers *peers_ptr = (NICPeers *)ptr;
-    NICConf *conf = container_of(peers_ptr, NICConf, peers);
     NetClientState **ncs = peers_ptr->ncs;
     NetClientState *peers[MAX_QUEUE_NUM];
     int queues, i = 0;
@@ -219,7 +218,7 @@ static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
         ncs[i]->queue_index = i;
     }
 
-    conf->queues = queues;
+    peers_ptr->queues = queues;
 
     return 0;
 
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 76dd6f5708..0fd2a84c7b 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -437,7 +437,7 @@ static void ics_set_irq(void *opaque, int srcno, int val)
 {
     ICSState *ics = (ICSState *)opaque;
 
-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
         set_irq_lsi(ics, srcno, val);
     } else {
         set_irq_msi(ics, srcno, val);
@@ -474,7 +474,7 @@ static void ics_write_xive(ICSState *ics, int nr, int server,
 
     trace_xics_ics_write_xive(nr, srcno, server, priority);
 
-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
         write_xive_lsi(ics, srcno);
     } else {
         write_xive_msi(ics, srcno);
@@ -496,7 +496,7 @@ static void ics_resend(ICSState *ics)
 
     for (i = 0; i < ics->nr_irqs; i++) {
         /* FIXME: filter by server#? */
-        if (ics->islsi[i]) {
+        if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
             resend_lsi(ics, i);
         } else {
             resend_msi(ics, i);
@@ -511,7 +511,7 @@ static void ics_eoi(ICSState *ics, int nr)
 
     trace_xics_ics_eoi(nr);
 
-    if (ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
         irq->status &= ~XICS_STATUS_SENT;
     }
 }
@@ -520,11 +520,18 @@ static void ics_reset(DeviceState *dev)
 {
     ICSState *ics = ICS(dev);
     int i;
+    uint8_t flags[ics->nr_irqs];
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        flags[i] = ics->irqs[i].flags;
+    }
 
     memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
     for (i = 0; i < ics->nr_irqs; i++) {
         ics->irqs[i].priority = 0xff;
         ics->irqs[i].saved_priority = 0xff;
+        ics->irqs[i].flags = flags[i];
     }
 }
 
@@ -563,13 +570,14 @@ static int ics_dispatch_post_load(void *opaque, int version_id)
 
 static const VMStateDescription vmstate_ics_irq = {
     .name = "ics/irq",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(server, ICSIRQState),
         VMSTATE_UINT8(priority, ICSIRQState),
         VMSTATE_UINT8(saved_priority, ICSIRQState),
         VMSTATE_UINT8(status, ICSIRQState),
+        VMSTATE_UINT8(flags, ICSIRQState),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -606,7 +614,6 @@ static void ics_realize(DeviceState *dev, Error **errp)
         return;
     }
     ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
     ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
 }
 
@@ -633,21 +640,166 @@ static const TypeInfo ics_info = {
 /*
  * Exported functions
  */
+static int xics_find_source(XICSState *icp, int irq)
+{
+    int sources = 1;
+    int src;
+
+    /* FIXME: implement multiple sources */
+    for (src = 0; src < sources; ++src) {
+        ICSState *ics = &icp->ics[src];
+        if (ics_valid_irq(ics, irq)) {
+            return src;
+        }
+    }
+
+    return -1;
+}
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq)
 {
-    if (!ics_valid_irq(icp->ics, irq)) {
-        return NULL;
+    int src = xics_find_source(icp, irq);
+
+    if (src >= 0) {
+        ICSState *ics = &icp->ics[src];
+        return ics->qirqs[irq - ics->offset];
     }
 
-    return icp->ics->qirqs[irq - icp->ics->offset];
+    return NULL;
+}
+
+static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
+{
+    assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
+
+    ics->irqs[srcno].flags |=
+        lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
 }
 
 void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
 {
-    assert(ics_valid_irq(icp->ics, irq));
+    int src = xics_find_source(icp, irq);
+    ICSState *ics;
 
-    icp->ics->islsi[irq - icp->ics->offset] = lsi;
+    assert(src >= 0);
+
+    ics = &icp->ics[src];
+    ics_set_irq_type(ics, irq - ics->offset, lsi);
+}
+
+#define ICS_IRQ_FREE(ics, srcno)   \
+    (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
+
+static int ics_find_free_block(ICSState *ics, int num, int alignnum)
+{
+    int first, i;
+
+    for (first = 0; first < ics->nr_irqs; first += alignnum) {
+        if (num > (ics->nr_irqs - first)) {
+            return -1;
+        }
+        for (i = first; i < first + num; ++i) {
+            if (!ICS_IRQ_FREE(ics, i)) {
+                break;
+            }
+        }
+        if (i == (first + num)) {
+            return first;
+        }
+    }
+
+    return -1;
+}
+
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
+{
+    ICSState *ics = &icp->ics[src];
+    int irq;
+
+    if (irq_hint) {
+        assert(src == xics_find_source(icp, irq_hint));
+        if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
+            trace_xics_alloc_failed_hint(src, irq_hint);
+            return -1;
+        }
+        irq = irq_hint;
+    } else {
+        irq = ics_find_free_block(ics, 1, 1);
+        if (irq < 0) {
+            trace_xics_alloc_failed_no_left(src);
+            return -1;
+        }
+        irq += ics->offset;
+    }
+
+    ics_set_irq_type(ics, irq - ics->offset, lsi);
+    trace_xics_alloc(src, irq);
+
+    return irq;
+}
+
+/*
+ * Allocate block of consequtive IRQs, returns a number of the first.
+ * If align==true, aligns the first IRQ number to num.
+ */
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
+{
+    int i, first = -1;
+    ICSState *ics = &icp->ics[src];
+
+    assert(src == 0);
+    /*
+     * MSIMesage::data is used for storing VIRQ so
+     * it has to be aligned to num to support multiple
+     * MSI vectors. MSI-X is not affected by this.
+     * The hint is used for the first IRQ, the rest should
+     * be allocated continuously.
+     */
+    if (align) {
+        assert((num == 1) || (num == 2) || (num == 4) ||
+               (num == 8) || (num == 16) || (num == 32));
+        first = ics_find_free_block(ics, num, num);
+    } else {
+        first = ics_find_free_block(ics, num, 1);
+    }
+
+    if (first >= 0) {
+        for (i = first; i < first + num; ++i) {
+            ics_set_irq_type(ics, i, lsi);
+        }
+    }
+    first += ics->offset;
+
+    trace_xics_alloc_block(src, first, num, lsi, align);
+
+    return first;
+}
+
+static void ics_free(ICSState *ics, int srcno, int num)
+{
+    int i;
+
+    for (i = srcno; i < srcno + num; ++i) {
+        if (ICS_IRQ_FREE(ics, i)) {
+            trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
+        }
+        memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
+    }
+}
+
+void xics_free(XICSState *icp, int irq, int num)
+{
+    int src = xics_find_source(icp, irq);
+
+    if (src >= 0) {
+        ICSState *ics = &icp->ics[src];
+
+        /* FIXME: implement multiple sources */
+        assert(src == 0);
+
+        trace_xics_ics_free(ics - icp->ics, irq, num);
+        ics_free(ics, irq - ics->offset, num);
+    }
 }
 
 /*
@@ -866,10 +1018,10 @@ static void xics_realize(DeviceState *dev, Error **errp)
     }
 
     /* Registration of global state belongs into realize */
-    spapr_rtas_register("ibm,set-xive", rtas_set_xive);
-    spapr_rtas_register("ibm,get-xive", rtas_get_xive);
-    spapr_rtas_register("ibm,int-off", rtas_int_off);
-    spapr_rtas_register("ibm,int-on", rtas_int_on);
+    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
+    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
+    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
+    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);
 
     spapr_register_hypercall(H_CPPR, h_cppr);
     spapr_register_hypercall(H_IPI, h_ipi);
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 09476ae34d..20b19e9d4f 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -38,10 +38,6 @@
 typedef struct KVMXICSState {
     XICSState parent_obj;
 
-    uint32_t set_xive_token;
-    uint32_t get_xive_token;
-    uint32_t int_off_token;
-    uint32_t int_on_token;
     int kernel_xics_fd;
 } KVMXICSState;
 
@@ -224,7 +220,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
             state |= KVM_XICS_MASKED;
         }
 
-        if (ics->islsi[i]) {
+        if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
             state |= KVM_XICS_LEVEL_SENSITIVE;
             if (irq->status & XICS_STATUS_ASSERTED) {
                 state |= KVM_XICS_PENDING;
@@ -253,7 +249,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val)
     int rc;
 
     args.irq = srcno + ics->offset;
-    if (!ics->islsi[srcno]) {
+    if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
         if (!val) {
             return;
         }
@@ -271,11 +267,18 @@ static void ics_kvm_reset(DeviceState *dev)
 {
     ICSState *ics = ICS(dev);
     int i;
+    uint8_t flags[ics->nr_irqs];
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        flags[i] = ics->irqs[i].flags;
+    }
 
     memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
     for (i = 0; i < ics->nr_irqs; i++) {
         ics->irqs[i].priority = 0xff;
         ics->irqs[i].saved_priority = 0xff;
+        ics->irqs[i].flags = flags[i];
     }
 
     ics_set_kvm_state(ics, 1);
@@ -290,7 +293,6 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp)
         return;
     }
     ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
     ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs);
 }
 
@@ -392,32 +394,30 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
         goto fail;
     }
 
-    icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy);
-    icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy);
-    icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy);
-    icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_dummy);
+    spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_dummy);
 
-    rc = kvmppc_define_rtas_kernel_token(icpkvm->set_xive_token,
-                                         "ibm,set-xive");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive");
     if (rc < 0) {
         error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,set-xive");
         goto fail;
     }
 
-    rc = kvmppc_define_rtas_kernel_token(icpkvm->get_xive_token,
-                                         "ibm,get-xive");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive");
     if (rc < 0) {
         error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,get-xive");
         goto fail;
     }
 
-    rc = kvmppc_define_rtas_kernel_token(icpkvm->int_on_token, "ibm,int-on");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on");
     if (rc < 0) {
         error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-on");
         goto fail;
     }
 
-    rc = kvmppc_define_rtas_kernel_token(icpkvm->int_off_token, "ibm,int-off");
+    rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off");
     if (rc < 0) {
         error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-off");
         goto fail;
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 7437c2e3c3..7b279c4f05 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -39,6 +39,7 @@
 #include "qemu/range.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
+#include "hw/misc/vfio.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -3649,6 +3650,39 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
 
         container->iommu_data.type1.initialized = true;
 
+    } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
+        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+        if (ret) {
+            error_report("vfio: failed to set group container: %m");
+            ret = -errno;
+            goto free_container_exit;
+        }
+
+        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU);
+        if (ret) {
+            error_report("vfio: failed to set iommu for container: %m");
+            ret = -errno;
+            goto free_container_exit;
+        }
+
+        /*
+         * The host kernel code implementing VFIO_IOMMU_DISABLE is called
+         * when container fd is closed so we do not call it explicitly
+         * in this file.
+         */
+        ret = ioctl(fd, VFIO_IOMMU_ENABLE);
+        if (ret) {
+            error_report("vfio: failed to enable container: %m");
+            ret = -errno;
+            goto free_container_exit;
+        }
+
+        container->iommu_data.type1.listener = vfio_memory_listener;
+        container->iommu_data.release = vfio_listener_release;
+
+        memory_listener_register(&container->iommu_data.type1.listener,
+                                 container->space->as);
+
     } else {
         error_report("vfio: No available IOMMU models");
         ret = -EINVAL;
@@ -4318,3 +4352,47 @@ static void register_vfio_pci_dev_type(void)
 }
 
 type_init(register_vfio_pci_dev_type)
+
+static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid,
+                                   int req, void *param)
+{
+    VFIOGroup *group;
+    VFIOContainer *container;
+    int ret = -1;
+
+    group = vfio_get_group(groupid, as);
+    if (!group) {
+        error_report("vfio: group %d not registered", groupid);
+        return ret;
+    }
+
+    container = group->container;
+    if (group->container) {
+        ret = ioctl(container->fd, req, param);
+        if (ret < 0) {
+            error_report("vfio: failed to ioctl container: ret=%d, %s",
+                         ret, strerror(errno));
+        }
+    }
+
+    vfio_put_group(group);
+
+    return ret;
+}
+
+int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
+                         int req, void *param)
+{
+    /* We allow only certain ioctls to the container */
+    switch (req) {
+    case VFIO_CHECK_EXTENSION:
+    case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
+        break;
+    default:
+        /* Return an error on unknown requests */
+        error_report("vfio: unsupported ioctl %X", req);
+        return -1;
+    }
+
+    return vfio_container_do_ioctl(as, groupid, req, param);
+}
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index aaa3ff2360..3263e3fe90 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -1217,7 +1217,6 @@ static void eepro100_write_mdi(EEPRO100State *s)
                 break;
             case 1:            /* Status Register */
                 missing("not writable");
-                data = s->mdimem[reg];
                 break;
             case 2:            /* PHY Identification Register (Word 1) */
             case 3:            /* PHY Identification Register (Word 2) */
@@ -1230,7 +1229,8 @@ static void eepro100_write_mdi(EEPRO100State *s)
             default:
                 missing("not implemented");
             }
-            s->mdimem[reg] = data;
+            s->mdimem[reg] &= eepro100_mdi_mask[reg];
+            s->mdimem[reg] |= data & ~eepro100_mdi_mask[reg];
         } else if (opcode == 2) {
             /* MDI read */
             switch (reg) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 00b5e07ddd..e51d753cee 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1542,7 +1542,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
 
     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
 
-    n->max_queues = MAX(n->nic_conf.queues, 1);
+    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
     n->vqs[0].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
     n->curr_queues = 1;
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index af49c4667d..6a72ef4814 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -153,8 +153,8 @@ static int spapr_nvram_init(VIOsPAPRDevice *dev)
         return -1;
     }
 
-    spapr_rtas_register("nvram-fetch", rtas_nvram_fetch);
-    spapr_rtas_register("nvram-store", rtas_nvram_store);
+    spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch);
+    spapr_rtas_register(RTAS_NVRAM_STORE, "nvram-store", rtas_nvram_store);
 
     return 0;
 }
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index e72fe2a70b..21f805f314 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -230,7 +230,7 @@ PCIBus *pci_pmac_init(qemu_irq *pic,
     d = UNI_NORTH_PCI_HOST_BRIDGE(dev);
     memory_region_init(&d->pci_mmio, OBJECT(d), "pci-mmio", 0x100000000ULL);
     memory_region_init_alias(&d->pci_hole, OBJECT(d), "pci-hole", &d->pci_mmio,
-                             0x80000000ULL, 0x70000000ULL);
+                             0x80000000ULL, 0x10000000ULL);
     memory_region_add_subregion(address_space_mem, 0x80000000ULL,
                                 &d->pci_hole);
 
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index ea747f0a20..edd44d03e7 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,6 +4,9 @@ obj-y += ppc.o ppc_booke.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o
+ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
+obj-y += spapr_pci_vfio.o
+endif
 # PowerPC 4xx boards
 obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
 obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index a973c18d88..bb2e75fe1b 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -316,10 +316,15 @@ static int ppce500_load_device_tree(MachineState *machine,
      * device it finds in the dt as serial output device. And we generate
      * devices in reverse order to the dt.
      */
-    dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
-                     soc, mpic, "serial1", 1, false);
-    dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
-                     soc, mpic, "serial0", 0, true);
+    if (serial_hds[1]) {
+        dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
+                         soc, mpic, "serial1", 1, false);
+    }
+
+    if (serial_hds[0]) {
+        dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
+                         soc, mpic, "serial0", 0, true);
+    }
 
     snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc,
              MPC8544_UTIL_OFFSET);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index e493dc1b4b..89d3cadf19 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -373,18 +373,11 @@ static void ppc_core99_init(MachineState *machine)
         machine_arch = ARCH_MAC99;
     }
     /* init basic PC hardware */
-    pci_vga_init(pci_bus);
-
     escc_mem = escc_init(0, pic[0x25], pic[0x24],
                          serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
     memory_region_init_alias(escc_bar, NULL, "escc-bar",
                              escc_mem, 0, memory_region_size(escc_mem));
 
-    for(i = 0; i < nb_nics; i++)
-        pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
-
-    ide_drive_get(hd, MAX_IDE_BUS);
-
     macio = pci_create(pci_bus, -1, TYPE_NEWWORLD_MACIO);
     dev = DEVICE(macio);
     qdev_connect_gpio_out(dev, 0, pic[0x19]); /* CUDA */
@@ -395,6 +388,8 @@ static void ppc_core99_init(MachineState *machine)
     macio_init(macio, pic_mem, escc_bar);
 
     /* We only emulate 2 out of 3 IDE controllers for now */
+    ide_drive_get(hd, MAX_IDE_BUS);
+
     macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
                                                         "ide[0]"));
     macio_ide_init_drives(macio_ide, hd);
@@ -420,8 +415,15 @@ static void ppc_core99_init(MachineState *machine)
         }
     }
 
-    if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8)
+    pci_vga_init(pci_bus);
+
+    if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) {
         graphic_depth = 15;
+    }
+
+    for (i = 0; i < nb_nics; i++) {
+        pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
+    }
 
     /* The NewWorld NVRAM is not located in the MacIO device */
     dev = qdev_create(NULL, TYPE_MACIO_NVRAM);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 82f183f173..a8ba916970 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -85,16 +85,16 @@
 
 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
 
+typedef struct sPAPRMachineState sPAPRMachineState;
 
-typedef struct SPAPRMachine SPAPRMachine;
 #define TYPE_SPAPR_MACHINE      "spapr-machine"
 #define SPAPR_MACHINE(obj) \
-    OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE)
+    OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
 
 /**
- * SPAPRMachine:
+ * sPAPRMachineState:
  */
-struct SPAPRMachine {
+struct sPAPRMachineState {
     /*< private >*/
     MachineState parent_obj;
 
@@ -102,76 +102,8 @@ struct SPAPRMachine {
     char *kvm_type;
 };
 
-
 sPAPREnvironment *spapr;
 
-int spapr_allocate_irq(int hint, bool lsi)
-{
-    int irq;
-
-    if (hint) {
-        irq = hint;
-        if (hint >= spapr->next_irq) {
-            spapr->next_irq = hint + 1;
-        }
-        /* FIXME: we should probably check for collisions somehow */
-    } else {
-        irq = spapr->next_irq++;
-    }
-
-    /* Configure irq type */
-    if (!xics_get_qirq(spapr->icp, irq)) {
-        return 0;
-    }
-
-    xics_set_irq_type(spapr->icp, irq, lsi);
-
-    return irq;
-}
-
-/*
- * Allocate block of consequtive IRQs, returns a number of the first.
- * If msi==true, aligns the first IRQ number to num.
- */
-int spapr_allocate_irq_block(int num, bool lsi, bool msi)
-{
-    int first = -1;
-    int i, hint = 0;
-
-    /*
-     * MSIMesage::data is used for storing VIRQ so
-     * it has to be aligned to num to support multiple
-     * MSI vectors. MSI-X is not affected by this.
-     * The hint is used for the first IRQ, the rest should
-     * be allocated continuously.
-     */
-    if (msi) {
-        assert((num == 1) || (num == 2) || (num == 4) ||
-               (num == 8) || (num == 16) || (num == 32));
-        hint = (spapr->next_irq + num - 1) & ~(num - 1);
-    }
-
-    for (i = 0; i < num; ++i) {
-        int irq;
-
-        irq = spapr_allocate_irq(hint, lsi);
-        if (!irq) {
-            return -1;
-        }
-
-        if (0 == i) {
-            first = irq;
-            hint = 0;
-        }
-
-        /* If the above doesn't create a consecutive block then that's
-         * an internal bug */
-        assert(irq == (first + i));
-    }
-
-    return first;
-}
-
 static XICSState *try_create_xics(const char *type, int nr_servers,
                                   int nr_irqs)
 {
@@ -442,6 +374,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     if (boot_device) {
         _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
     }
+    if (boot_menu) {
+        _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
+    }
     _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
     _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
     _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
@@ -956,7 +891,7 @@ static const VMStateDescription vmstate_spapr = {
     .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32(next_irq, sPAPREnvironment),
+        VMSTATE_UNUSED(4), /* used to be @next_irq */
 
         /* RTC offset */
         VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
@@ -1360,7 +1295,6 @@ static void ppc_spapr_init(MachineState *machine)
     /* Set up Interrupt Controller before we create the VCPUs */
     spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
                                   XICS_IRQS);
-    spapr->next_irq = XICS_IRQ_BASE;
 
     /* init CPUs */
     if (cpu_model == NULL) {
@@ -1619,14 +1553,14 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
 
 static char *spapr_get_kvm_type(Object *obj, Error **errp)
 {
-    SPAPRMachine *sm = SPAPR_MACHINE(obj);
+    sPAPRMachineState *sm = SPAPR_MACHINE(obj);
 
     return g_strdup(sm->kvm_type);
 }
 
 static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
 {
-    SPAPRMachine *sm = SPAPR_MACHINE(obj);
+    sPAPRMachineState *sm = SPAPR_MACHINE(obj);
 
     g_free(sm->kvm_type);
     sm->kvm_type = g_strdup(value);
@@ -1660,7 +1594,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
 static const TypeInfo spapr_machine_info = {
     .name          = TYPE_SPAPR_MACHINE,
     .parent        = TYPE_MACHINE,
-    .instance_size = sizeof(SPAPRMachine),
+    .instance_size = sizeof(sPAPRMachineState),
     .instance_init = spapr_machine_initfn,
     .class_init    = spapr_machine_class_init,
     .interfaces = (InterfaceInfo[]) {
@@ -1669,9 +1603,25 @@ static const TypeInfo spapr_machine_info = {
     },
 };
 
+static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->name = "pseries-2.1";
+    mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1";
+    mc->is_default = 0;
+}
+
+static const TypeInfo spapr_machine_2_1_info = {
+    .name          = TYPE_SPAPR_MACHINE "2.1",
+    .parent        = TYPE_SPAPR_MACHINE,
+    .class_init    = spapr_machine_2_1_class_init,
+};
+
 static void spapr_machine_register_types(void)
 {
     type_register_static(&spapr_machine_info);
+    type_register_static(&spapr_machine_2_1_info);
 }
 
 type_init(spapr_machine_register_types)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 16fa49e886..1b6157dec4 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -314,8 +314,9 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
 void spapr_events_init(sPAPREnvironment *spapr)
 {
-    spapr->epow_irq = spapr_allocate_msi(0);
+    spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false);
     spapr->epow_notifier.notify = spapr_powerdown_req;
     qemu_register_powerdown_notifier(&spapr->epow_notifier);
-    spapr_rtas_register("check-exception", check_exception);
+    spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
+                        check_exception);
 }
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 9e49ec4a5c..698ae60953 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -118,7 +118,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
         tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
                                               tcet->nb_table <<
                                               tcet->page_shift,
-                                              &tcet->fd);
+                                              &tcet->fd,
+                                              tcet->vfio_accel);
     }
 
     if (!tcet->table) {
@@ -142,7 +143,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
-                                   uint32_t nb_table)
+                                   uint32_t nb_table,
+                                   bool vfio_accel)
 {
     sPAPRTCETable *tcet;
 
@@ -161,6 +163,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
     tcet->bus_offset = bus_offset;
     tcet->page_shift = page_shift;
     tcet->nb_table = nb_table;
+    tcet->vfio_accel = vfio_accel;
 
     object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
 
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 988f8cb858..9ed39a93b7 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -220,36 +220,12 @@ static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 }
 
 /*
- * Find an entry with config_addr or returns the empty one if not found AND
- * alloc_new is set.
- * At the moment the msi_table entries are never released so there is
- * no point to look till the end of the list if we need to find the free entry.
- */
-static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
-                             bool alloc_new)
-{
-    int i;
-
-    for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
-        if (!phb->msi_table[i].nvec) {
-            break;
-        }
-        if (phb->msi_table[i].config_addr == config_addr) {
-            return i;
-        }
-    }
-    if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
-        trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
-        return i;
-    }
-
-    return -1;
-}
-
-/*
  * Set MSI/MSIX message data.
  * This is required for msi_notify()/msix_notify() which
  * will write at the addresses via spapr_msi_write().
+ *
+ * If hwaddr == 0, all entries will have .data == first_irq i.e.
+ * table will be reset.
  */
 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
                              unsigned first_irq, unsigned req_num)
@@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
         return;
     }
 
-    for (i = 0; i < req_num; ++i, ++msg.data) {
+    for (i = 0; i < req_num; ++i) {
         msix_set_message(pdev, i, msg);
         trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+        if (addr) {
+            ++msg.data;
+        }
     }
 }
 
@@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
     unsigned int seq_num = rtas_ld(args, 5);
     unsigned int ret_intr_type;
-    int ndev, irq, max_irqs = 0;
+    unsigned int irq, max_irqs = 0, num = 0;
     sPAPRPHBState *phb = NULL;
     PCIDevice *pdev = NULL;
+    bool msix = false;
+    spapr_pci_msi *msi;
+    int *config_addr_key;
 
     switch (func) {
     case RTAS_CHANGE_MSI_FN:
@@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
     /* Releasing MSIs */
     if (!req_num) {
-        ndev = spapr_msicfg_find(phb, config_addr, false);
-        if (ndev < 0) {
-            trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+        msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
+        if (!msi) {
+            trace_spapr_pci_msi("Releasing wrong config", config_addr);
             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
             return;
         }
-        trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
+
+        xics_free(spapr->icp, msi->first_irq, msi->num);
+        spapr_msi_setmsg(pdev, 0, msix, 0, num);
+        g_hash_table_remove(phb->msi, &config_addr);
+
+        trace_spapr_pci_msi("Released MSIs", config_addr);
         rtas_st(rets, 0, RTAS_OUT_SUCCESS);
         rtas_st(rets, 1, 0);
         return;
@@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
     /* Enabling MSI */
 
-    /* Find a device number in the map to add or reuse the existing one */
-    ndev = spapr_msicfg_find(phb, config_addr, true);
-    if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
-        error_report("No free entry for a new MSI device");
-        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
-        return;
-    }
-    trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
-
     /* Check if the device supports as many IRQs as requested */
     if (ret_intr_type == RTAS_TYPE_MSI) {
         max_irqs = msi_nr_vectors_allocated(pdev);
@@ -340,48 +318,47 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
         max_irqs = pdev->msix_entries_nr;
     }
     if (!max_irqs) {
-        error_report("Requested interrupt type %d is not enabled for device#%d",
-                     ret_intr_type, ndev);
+        error_report("Requested interrupt type %d is not enabled for device %x",
+                     ret_intr_type, config_addr);
         rtas_st(rets, 0, -1); /* Hardware error */
         return;
     }
     /* Correct the number if the guest asked for too many */
     if (req_num > max_irqs) {
+        trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
         req_num = max_irqs;
+        irq = 0; /* to avoid misleading trace */
+        goto out;
     }
 
-    /* Check if there is an old config and MSI number has not changed */
-    if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
-        /* Unexpected behaviour */
-        error_report("Cannot reuse MSI config for device#%d", ndev);
+    /* Allocate MSIs */
+    irq = xics_alloc_block(spapr->icp, 0, req_num, false,
+                           ret_intr_type == RTAS_TYPE_MSI);
+    if (!irq) {
+        error_report("Cannot allocate MSIs for device %x", config_addr);
         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
         return;
     }
 
-    /* There is no cached config, allocate MSIs */
-    if (!phb->msi_table[ndev].nvec) {
-        irq = spapr_allocate_irq_block(req_num, false,
-                                       ret_intr_type == RTAS_TYPE_MSI);
-        if (irq < 0) {
-            error_report("Cannot allocate MSIs for device#%d", ndev);
-            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
-            return;
-        }
-        phb->msi_table[ndev].irq = irq;
-        phb->msi_table[ndev].nvec = req_num;
-        phb->msi_table[ndev].config_addr = config_addr;
-    }
-
     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
     spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
-                     phb->msi_table[ndev].irq, req_num);
+                     irq, req_num);
 
+    /* Add MSI device to cache */
+    msi = g_new(spapr_pci_msi, 1);
+    msi->first_irq = irq;
+    msi->num = req_num;
+    config_addr_key = g_new(int, 1);
+    *config_addr_key = config_addr;
+    g_hash_table_insert(phb->msi, config_addr_key, msi);
+
+out:
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
     rtas_st(rets, 1, req_num);
     rtas_st(rets, 2, ++seq_num);
     rtas_st(rets, 3, ret_intr_type);
 
-    trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
+    trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
 }
 
 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
@@ -395,25 +372,28 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
     uint32_t config_addr = rtas_ld(args, 0);
     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
     unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
-    int ndev;
     sPAPRPHBState *phb = NULL;
+    PCIDevice *pdev = NULL;
+    spapr_pci_msi *msi;
 
-    /* Fins sPAPRPHBState */
+    /* Find sPAPRPHBState */
     phb = find_phb(spapr, buid);
-    if (!phb) {
+    if (phb) {
+        pdev = find_dev(spapr, buid, config_addr);
+    }
+    if (!phb || !pdev) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
 
     /* Find device descriptor and start IRQ */
-    ndev = spapr_msicfg_find(phb, config_addr, false);
-    if (ndev < 0) {
-        trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+    msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
+    if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
+        trace_spapr_pci_msi("Failed to return vector", config_addr);
         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
         return;
     }
-
-    intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
+    intr_src_num = msi->first_irq + ioa_intr_num;
     trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
                                                            intr_src_num);
 
@@ -634,7 +614,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < PCI_NUM_PINS; i++) {
         uint32_t irq;
 
-        irq = spapr_allocate_lsi(0);
+        irq = xics_alloc_block(spapr->icp, 0, 1, true, false);
         if (!irq) {
             error_setg(errp, "spapr_allocate_lsi failed");
             return;
@@ -649,6 +629,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     }
 
     info->finish_realize(sphb, errp);
+
+    sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
 }
 
 static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
@@ -658,7 +640,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
     tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
                                0,
                                SPAPR_TCE_PAGE_SHIFT,
-                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
+                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
     if (!tcet) {
         error_setg(errp, "Unable to create TCE table for %s",
                    sphb->dtbusname);
@@ -712,22 +694,69 @@ static const VMStateDescription vmstate_spapr_pci_lsi = {
 };
 
 static const VMStateDescription vmstate_spapr_pci_msi = {
-    .name = "spapr_pci/lsi",
+    .name = "spapr_pci/msi",
     .version_id = 1,
     .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
-        VMSTATE_UINT32(irq, struct spapr_pci_msi),
-        VMSTATE_UINT32(nvec, struct spapr_pci_msi),
-
+    .fields = (VMStateField []) {
+        VMSTATE_UINT32(key, spapr_pci_msi_mig),
+        VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig),
+        VMSTATE_UINT32(value.num, spapr_pci_msi_mig),
         VMSTATE_END_OF_LIST()
     },
 };
 
+static void spapr_pci_pre_save(void *opaque)
+{
+    sPAPRPHBState *sphb = opaque;
+    GHashTableIter iter;
+    gpointer key, value;
+    int i;
+
+    if (sphb->msi_devs) {
+        g_free(sphb->msi_devs);
+        sphb->msi_devs = NULL;
+    }
+    sphb->msi_devs_num = g_hash_table_size(sphb->msi);
+    if (!sphb->msi_devs_num) {
+        return;
+    }
+    sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig));
+
+    g_hash_table_iter_init(&iter, sphb->msi);
+    for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
+        sphb->msi_devs[i].key = *(uint32_t *) key;
+        sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
+    }
+}
+
+static int spapr_pci_post_load(void *opaque, int version_id)
+{
+    sPAPRPHBState *sphb = opaque;
+    gpointer key, value;
+    int i;
+
+    for (i = 0; i < sphb->msi_devs_num; ++i) {
+        key = g_memdup(&sphb->msi_devs[i].key,
+                       sizeof(sphb->msi_devs[i].key));
+        value = g_memdup(&sphb->msi_devs[i].value,
+                         sizeof(sphb->msi_devs[i].value));
+        g_hash_table_insert(sphb->msi, key, value);
+    }
+    if (sphb->msi_devs) {
+        g_free(sphb->msi_devs);
+        sphb->msi_devs = NULL;
+    }
+    sphb->msi_devs_num = 0;
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_spapr_pci = {
     .name = "spapr_pci",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .pre_save = spapr_pci_pre_save,
+    .post_load = spapr_pci_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
         VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
@@ -737,9 +766,9 @@ static const VMStateDescription vmstate_spapr_pci = {
         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
-        VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
-                             vmstate_spapr_pci_msi, struct spapr_pci_msi),
-
+        VMSTATE_INT32(msi_devs_num, sPAPRPHBState),
+        VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0,
+                                    vmstate_spapr_pci_msi, spapr_pci_msi_mig),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -909,14 +938,20 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
 
 void spapr_pci_rtas_init(void)
 {
-    spapr_rtas_register("read-pci-config", rtas_read_pci_config);
-    spapr_rtas_register("write-pci-config", rtas_write_pci_config);
-    spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
-    spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+    spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
+                        rtas_read_pci_config);
+    spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
+                        rtas_write_pci_config);
+    spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
+                        rtas_ibm_read_pci_config);
+    spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
+                        rtas_ibm_write_pci_config);
     if (msi_supported) {
-        spapr_rtas_register("ibm,query-interrupt-source-number",
+        spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
+                            "ibm,query-interrupt-source-number",
                             rtas_ibm_query_interrupt_source_number);
-        spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
+        spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
+                            rtas_ibm_change_msi);
     }
 }
 
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
new file mode 100644
index 0000000000..d3bddf2887
--- /dev/null
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -0,0 +1,102 @@
+/*
+ * QEMU sPAPR PCI host for VFIO
+ *
+ * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License,
+ *  or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "linux/vfio.h"
+#include "hw/misc/vfio.h"
+
+static Property spapr_phb_vfio_properties[] = {
+    DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
+{
+    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
+    struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
+    int ret;
+    sPAPRTCETable *tcet;
+    uint32_t liobn = svphb->phb.dma_liobn;
+
+    if (svphb->iommugroupid == -1) {
+        error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
+        return;
+    }
+
+    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+                               VFIO_CHECK_EXTENSION,
+                               (void *) VFIO_SPAPR_TCE_IOMMU);
+    if (ret != 1) {
+        error_setg_errno(errp, -ret,
+                         "spapr-vfio: SPAPR extension is not supported");
+        return;
+    }
+
+    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+                               VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "spapr-vfio: get info from container failed");
+        return;
+    }
+
+    tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
+                               SPAPR_TCE_PAGE_SHIFT,
+                               info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT,
+                               true);
+    if (!tcet) {
+        error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
+        return;
+    }
+
+    /* Register default 32bit DMA window */
+    memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
+                                spapr_tce_get_iommu(tcet));
+}
+
+static void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+    /* Do nothing */
+}
+
+static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
+
+    dc->props = spapr_phb_vfio_properties;
+    dc->reset = spapr_phb_vfio_reset;
+    spc->finish_realize = spapr_phb_vfio_finish_realize;
+}
+
+static const TypeInfo spapr_phb_vfio_info = {
+    .name          = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
+    .parent        = TYPE_SPAPR_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(sPAPRPHBVFIOState),
+    .class_init    = spapr_phb_vfio_class_init,
+    .class_size    = sizeof(sPAPRPHBClass),
+};
+
+static void spapr_pci_vfio_register_types(void)
+{
+    type_register_static(&spapr_phb_vfio_info);
+}
+
+type_init(spapr_pci_vfio_register_types)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 8d08539baa..9ba1ba69f9 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -36,9 +36,6 @@
 
 #include <libfdt.h>
 
-#define TOKEN_BASE      0x2000
-#define TOKEN_MAX       0x100
-
 static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                                    uint32_t token, uint32_t nargs,
                                    target_ulong args,
@@ -225,8 +222,6 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     env->msr = 0;
 }
 
-#define DIAGNOSTICS_RUN_MODE        42
-
 static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
                                           sPAPREnvironment *spapr,
                                           uint32_t token, uint32_t nargs,
@@ -236,16 +231,28 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
     target_ulong parameter = rtas_ld(args, 0);
     target_ulong buffer = rtas_ld(args, 1);
     target_ulong length = rtas_ld(args, 2);
-    target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
+    target_ulong ret = RTAS_OUT_SUCCESS;
 
     switch (parameter) {
-    case DIAGNOSTICS_RUN_MODE:
-        if (length == 1) {
-            rtas_st(buffer, 0, 0);
-            ret = RTAS_OUT_SUCCESS;
-        }
+    case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: {
+        char *param_val = g_strdup_printf("MaxEntCap=%d,MaxPlatProcs=%d",
+                                          max_cpus, smp_cpus);
+        rtas_st_buffer(buffer, length, (uint8_t *)param_val, strlen(param_val));
+        g_free(param_val);
         break;
     }
+    case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: {
+        uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED;
+
+        rtas_st_buffer(buffer, length, &param_val, sizeof(param_val));
+        break;
+    }
+    case RTAS_SYSPARM_UUID:
+        rtas_st_buffer(buffer, length, qemu_uuid, (qemu_uuid_set ? 16 : 0));
+        break;
+    default:
+        ret = RTAS_OUT_NOT_SUPPORTED;
+    }
 
     rtas_st(rets, 0, ret);
 }
@@ -260,7 +267,9 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
     target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
 
     switch (parameter) {
-    case DIAGNOSTICS_RUN_MODE:
+    case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS:
+    case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE:
+    case RTAS_SYSPARM_UUID:
         ret = RTAS_OUT_NOT_AUTHORIZED;
         break;
     }
@@ -271,17 +280,14 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
-} rtas_table[TOKEN_MAX];
-
-static struct rtas_call *rtas_next = rtas_table;
+} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE];
 
 target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets)
 {
-    if ((token >= TOKEN_BASE)
-        && ((token - TOKEN_BASE) < TOKEN_MAX)) {
-        struct rtas_call *call = rtas_table + (token - TOKEN_BASE);
+    if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) {
+        struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE);
 
         if (call->fn) {
             call->fn(cpu, spapr, token, nargs, args, nret, rets);
@@ -303,23 +309,22 @@ target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return H_PARAMETER;
 }
 
-int spapr_rtas_register(const char *name, spapr_rtas_fn fn)
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
 {
-    int i;
-
-    for (i = 0; i < (rtas_next - rtas_table); i++) {
-        if (strcmp(name, rtas_table[i].name) == 0) {
-            fprintf(stderr, "RTAS call \"%s\" registered twice\n", name);
-            exit(1);
-        }
+    if (!((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX))) {
+        fprintf(stderr, "RTAS invalid token 0x%x\n", token);
+        exit(1);
     }
 
-    assert(rtas_next < (rtas_table + TOKEN_MAX));
-
-    rtas_next->name = name;
-    rtas_next->fn = fn;
+    token -= RTAS_TOKEN_BASE;
+    if (rtas_table[token].name) {
+        fprintf(stderr, "RTAS call \"%s\" is registered already as 0x%x\n",
+                rtas_table[token].name, token);
+        exit(1);
+    }
 
-    return (rtas_next++ - rtas_table) + TOKEN_BASE;
+    rtas_table[token].name = name;
+    rtas_table[token].fn = fn;
 }
 
 int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
@@ -359,7 +364,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
         return ret;
     }
 
-    for (i = 0; i < TOKEN_MAX; i++) {
+    for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
         struct rtas_call *call = &rtas_table[i];
 
         if (!call->name) {
@@ -367,7 +372,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
         }
 
         ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name,
-                                    i + TOKEN_BASE);
+                                    i + RTAS_TOKEN_BASE);
         if (ret < 0) {
             fprintf(stderr, "Couldn't add rtas token for %s: %s\n",
                     call->name, fdt_strerror(ret));
@@ -380,18 +385,24 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
 
 static void core_rtas_register_types(void)
 {
-    spapr_rtas_register("display-character", rtas_display_character);
-    spapr_rtas_register("get-time-of-day", rtas_get_time_of_day);
-    spapr_rtas_register("set-time-of-day", rtas_set_time_of_day);
-    spapr_rtas_register("power-off", rtas_power_off);
-    spapr_rtas_register("system-reboot", rtas_system_reboot);
-    spapr_rtas_register("query-cpu-stopped-state",
+    spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
+                        rtas_display_character);
+    spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day",
+                        rtas_get_time_of_day);
+    spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day",
+                        rtas_set_time_of_day);
+    spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off);
+    spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot",
+                        rtas_system_reboot);
+    spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state",
                         rtas_query_cpu_stopped_state);
-    spapr_rtas_register("start-cpu", rtas_start_cpu);
-    spapr_rtas_register("stop-self", rtas_stop_self);
-    spapr_rtas_register("ibm,get-system-parameter",
+    spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
+    spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
+    spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
+                        "ibm,get-system-parameter",
                         rtas_ibm_get_system_parameter);
-    spapr_rtas_register("ibm,set-system-parameter",
+    spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
+                        "ibm,set-system-parameter",
                         rtas_ibm_set_system_parameter);
 }
 
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 04e16ae04d..dc9e46a7b1 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -449,7 +449,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
         dev->qdev.id = id;
     }
 
-    dev->irq = spapr_allocate_msi(dev->irq);
+    dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false);
     if (!dev->irq) {
         return -1;
     }
@@ -460,7 +460,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
                                         0,
                                         SPAPR_TCE_PAGE_SHIFT,
                                         pc->rtce_window_size >>
-                                        SPAPR_TCE_PAGE_SHIFT);
+                                        SPAPR_TCE_PAGE_SHIFT, false);
         address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
     }
 
@@ -517,8 +517,9 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
     spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq);
 
     /* RTAS calls */
-    spapr_rtas_register("ibm,set-tce-bypass", rtas_set_tce_bypass);
-    spapr_rtas_register("quiesce", rtas_quiesce);
+    spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass",
+                        rtas_set_tce_bypass);
+    spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce);
 
     return bus;
 }
diff --git a/include/hw/misc/vfio.h b/include/hw/misc/vfio.h
new file mode 100644
index 0000000000..0b26cd8e11
--- /dev/null
+++ b/include/hw/misc/vfio.h
@@ -0,0 +1,9 @@
+#ifndef VFIO_API_H
+#define VFIO_API_H
+
+#include "qemu/typedefs.h"
+
+extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
+                                int req, void *param);
+
+#endif
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 0934518bbd..32f0aa7d10 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -27,13 +27,15 @@
 #include "hw/pci/pci_host.h"
 #include "hw/ppc/xics.h"
 
-#define SPAPR_MSIX_MAX_DEVS 32
-
 #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge"
+#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
 
 #define SPAPR_PCI_HOST_BRIDGE(obj) \
     OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
 
+#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
+    OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
+
 #define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \
      OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE)
 #define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \
@@ -41,6 +43,7 @@
 
 typedef struct sPAPRPHBClass sPAPRPHBClass;
 typedef struct sPAPRPHBState sPAPRPHBState;
+typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
 
 struct sPAPRPHBClass {
     PCIHostBridgeClass parent_class;
@@ -48,6 +51,16 @@ struct sPAPRPHBClass {
     void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
 };
 
+typedef struct spapr_pci_msi {
+    uint32_t first_irq;
+    uint32_t num;
+} spapr_pci_msi;
+
+typedef struct spapr_pci_msi_mig {
+    uint32_t key;
+    spapr_pci_msi value;
+} spapr_pci_msi_mig;
+
 struct sPAPRPHBState {
     PCIHostState parent_obj;
 
@@ -67,15 +80,20 @@ struct sPAPRPHBState {
         uint32_t irq;
     } lsi_table[PCI_NUM_PINS];
 
-    struct spapr_pci_msi {
-        uint32_t config_addr;
-        uint32_t irq;
-        uint32_t nvec;
-    } msi_table[SPAPR_MSIX_MAX_DEVS];
+    GHashTable *msi;
+    /* Temporary cache for migration purposes */
+    int32_t msi_devs_num;
+    spapr_pci_msi_mig *msi_devs;
 
     QLIST_ENTRY(sPAPRPHBState) list;
 };
 
+struct sPAPRPHBVFIOState {
+    sPAPRPHBState phb;
+
+    int32_t iommugroupid;
+};
+
 #define SPAPR_PCI_BASE_BUID          0x800000020000000ULL
 
 #define SPAPR_PCI_WINDOW_BASE        0x10000000000ULL
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 08c301f38d..bbba51a703 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -27,7 +27,6 @@ typedef struct sPAPREnvironment {
     long rtas_size;
     void *fdt_skel;
     target_ulong entry_point;
-    uint32_t next_irq;
     uint64_t rtc_offset;
     struct PPCTimebase tb;
     bool has_graphics;
@@ -339,16 +338,6 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
 int spapr_allocate_irq(int hint, bool lsi);
 int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 
-static inline int spapr_allocate_msi(int hint)
-{
-    return spapr_allocate_irq(hint, false);
-}
-
-static inline int spapr_allocate_lsi(int hint)
-{
-    return spapr_allocate_irq(hint, true);
-}
-
 /* RTAS return codes */
 #define RTAS_OUT_SUCCESS            0
 #define RTAS_OUT_NO_ERRORS_FOUND    1
@@ -358,6 +347,58 @@ static inline int spapr_allocate_lsi(int hint)
 #define RTAS_OUT_NOT_SUPPORTED      -3
 #define RTAS_OUT_NOT_AUTHORIZED     -9002
 
+/* RTAS tokens */
+#define RTAS_TOKEN_BASE      0x2000
+
+#define RTAS_DISPLAY_CHARACTER                  (RTAS_TOKEN_BASE + 0x00)
+#define RTAS_GET_TIME_OF_DAY                    (RTAS_TOKEN_BASE + 0x01)
+#define RTAS_SET_TIME_OF_DAY                    (RTAS_TOKEN_BASE + 0x02)
+#define RTAS_POWER_OFF                          (RTAS_TOKEN_BASE + 0x03)
+#define RTAS_SYSTEM_REBOOT                      (RTAS_TOKEN_BASE + 0x04)
+#define RTAS_QUERY_CPU_STOPPED_STATE            (RTAS_TOKEN_BASE + 0x05)
+#define RTAS_START_CPU                          (RTAS_TOKEN_BASE + 0x06)
+#define RTAS_STOP_SELF                          (RTAS_TOKEN_BASE + 0x07)
+#define RTAS_IBM_GET_SYSTEM_PARAMETER           (RTAS_TOKEN_BASE + 0x08)
+#define RTAS_IBM_SET_SYSTEM_PARAMETER           (RTAS_TOKEN_BASE + 0x09)
+#define RTAS_IBM_SET_XIVE                       (RTAS_TOKEN_BASE + 0x0A)
+#define RTAS_IBM_GET_XIVE                       (RTAS_TOKEN_BASE + 0x0B)
+#define RTAS_IBM_INT_OFF                        (RTAS_TOKEN_BASE + 0x0C)
+#define RTAS_IBM_INT_ON                         (RTAS_TOKEN_BASE + 0x0D)
+#define RTAS_CHECK_EXCEPTION                    (RTAS_TOKEN_BASE + 0x0E)
+#define RTAS_EVENT_SCAN                         (RTAS_TOKEN_BASE + 0x0F)
+#define RTAS_IBM_SET_TCE_BYPASS                 (RTAS_TOKEN_BASE + 0x10)
+#define RTAS_QUIESCE                            (RTAS_TOKEN_BASE + 0x11)
+#define RTAS_NVRAM_FETCH                        (RTAS_TOKEN_BASE + 0x12)
+#define RTAS_NVRAM_STORE                        (RTAS_TOKEN_BASE + 0x13)
+#define RTAS_READ_PCI_CONFIG                    (RTAS_TOKEN_BASE + 0x14)
+#define RTAS_WRITE_PCI_CONFIG                   (RTAS_TOKEN_BASE + 0x15)
+#define RTAS_IBM_READ_PCI_CONFIG                (RTAS_TOKEN_BASE + 0x16)
+#define RTAS_IBM_WRITE_PCI_CONFIG               (RTAS_TOKEN_BASE + 0x17)
+#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER  (RTAS_TOKEN_BASE + 0x18)
+#define RTAS_IBM_CHANGE_MSI                     (RTAS_TOKEN_BASE + 0x19)
+#define RTAS_SET_INDICATOR                      (RTAS_TOKEN_BASE + 0x1A)
+#define RTAS_SET_POWER_LEVEL                    (RTAS_TOKEN_BASE + 0x1B)
+#define RTAS_GET_POWER_LEVEL                    (RTAS_TOKEN_BASE + 0x1C)
+#define RTAS_GET_SENSOR_STATE                   (RTAS_TOKEN_BASE + 0x1D)
+#define RTAS_IBM_CONFIGURE_CONNECTOR            (RTAS_TOKEN_BASE + 0x1E)
+#define RTAS_IBM_OS_TERM                        (RTAS_TOKEN_BASE + 0x1F)
+#define RTAS_IBM_EXTENDED_OS_TERM               (RTAS_TOKEN_BASE + 0x20)
+
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x21)
+
+/* RTAS ibm,get-system-parameter token values */
+#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
+#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE        42
+#define RTAS_SYSPARM_UUID                        48
+
+/* Possible values for the platform-processor-diagnostics-run-mode parameter
+ * of the RTAS ibm,get-system-parameter call.
+ */
+#define DIAGNOSTICS_RUN_MODE_DISABLED  0
+#define DIAGNOSTICS_RUN_MODE_STAGGERED 1
+#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2
+#define DIAGNOSTICS_RUN_MODE_PERIODIC  3
+
 static inline uint64_t ppc64_phys_to_real(uint64_t addr)
 {
     return addr & ~0xF000000000000000ULL;
@@ -373,11 +414,24 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val)
     stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val);
 }
 
+
+static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len,
+                                  uint8_t *buffer, uint16_t buffer_len)
+{
+    if (phys_len < 2) {
+        return;
+    }
+    stw_be_phys(&address_space_memory,
+                ppc64_phys_to_real(phys), buffer_len);
+    cpu_physical_memory_write(ppc64_phys_to_real(phys + 2),
+                              buffer, MIN(buffer_len, phys_len - 2));
+}
+
 typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                               uint32_t token,
                               uint32_t nargs, target_ulong args,
                               uint32_t nret, target_ulong rets);
-int spapr_rtas_register(const char *name, spapr_rtas_fn fn);
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
 target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets);
@@ -407,6 +461,7 @@ struct sPAPRTCETable {
     uint32_t page_shift;
     uint64_t *table;
     bool bypass;
+    bool vfio_accel;
     int fd;
     MemoryRegion iommu;
     QLIST_ENTRY(sPAPRTCETable) list;
@@ -418,7 +473,8 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
-                                   uint32_t nb_table);
+                                   uint32_t nb_table,
+                                   bool vfio_accel);
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 85e4c8a3dd..a214dd7f28 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -136,7 +136,6 @@ struct ICSState {
     uint32_t nr_irqs;
     uint32_t offset;
     qemu_irq *qirqs;
-    bool *islsi;
     ICSIRQState *irqs;
     XICSState *icp;
 };
@@ -150,12 +149,20 @@ struct ICSIRQState {
 #define XICS_STATUS_REJECTED           0x4
 #define XICS_STATUS_MASKED_PENDING     0x8
     uint8_t status;
+/* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */
+#define XICS_FLAGS_IRQ_LSI             0x1
+#define XICS_FLAGS_IRQ_MSI             0x2
+#define XICS_FLAGS_IRQ_MASK            0x3
+    uint8_t flags;
 };
 
 #define XICS_IRQS               1024
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq);
 void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
+void xics_free(XICSState *icp, int irq, int num);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
 
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 71a8a95641..9a001bd28f 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -101,6 +101,7 @@ enum VMStateFlags {
     VMS_VARRAY_UINT8     = 0x400,  /* Array with size in uint8_t field*/
     VMS_VARRAY_UINT32    = 0x800,  /* Array with size in uint32_t field*/
     VMS_MUST_EXIST       = 0x1000, /* Field must exist in input */
+    VMS_ALLOC            = 0x2000, /* Alloc a buffer on the destination */
 };
 
 typedef struct {
@@ -429,6 +430,16 @@ extern const VMStateInfo vmstate_info_bitmap;
     .offset     = offsetof(_state, _field),                          \
 }
 
+#define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) {\
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .vmsd       = &(_vmsd),                                          \
+    .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+    .size       = sizeof(_type),                                     \
+    .flags      = VMS_STRUCT|VMS_VARRAY_INT32|VMS_ALLOC|VMS_POINTER, \
+    .offset     = vmstate_offset_pointer(_state, _field, _type),     \
+}
+
 #define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \
     .name         = (stringify(_field)),                             \
     .version_id   = (_version),                                      \
diff --git a/include/net/net.h b/include/net/net.h
index 8b189da5ee..ed594f9bdb 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -24,13 +24,13 @@ struct MACAddr {
 
 typedef struct NICPeers {
     NetClientState *ncs[MAX_QUEUE_NUM];
+    int32_t queues;
 } NICPeers;
 
 typedef struct NICConf {
     MACAddr macaddr;
     NICPeers peers;
     int32_t bootindex;
-    int32_t queues;
 } NICConf;
 
 #define DEFINE_NIC_PROPERTIES(_state, _conf)                            \
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 1248eda272..60777fecf6 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -736,6 +736,14 @@ enum {
 
     QEMU_PPC_FEATURE_TRUE_LE = 0x00000002,
     QEMU_PPC_FEATURE_PPC_LE = 0x00000001,
+
+    /* Feature definitions in AT_HWCAP2.  */
+    QEMU_PPC_FEATURE2_ARCH_2_07 = 0x80000000, /* ISA 2.07 */
+    QEMU_PPC_FEATURE2_HAS_HTM = 0x40000000, /* Hardware Transactional Memory */
+    QEMU_PPC_FEATURE2_HAS_DSCR = 0x20000000, /* Data Stream Control Register */
+    QEMU_PPC_FEATURE2_HAS_EBB = 0x10000000, /* Event Base Branching */
+    QEMU_PPC_FEATURE2_HAS_ISEL = 0x08000000, /* Integer Select */
+    QEMU_PPC_FEATURE2_HAS_TAR = 0x04000000, /* Target Address Register */
 };
 
 #define ELF_HWCAP get_elf_hwcap()
@@ -749,6 +757,8 @@ static uint32_t get_elf_hwcap(void)
        Altivec/FP/SPE support.  Anything else is just a bonus.  */
 #define GET_FEATURE(flag, feature)                                      \
     do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flag, feature)                                      \
+    do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
     GET_FEATURE(PPC_64B, QEMU_PPC_FEATURE_64);
     GET_FEATURE(PPC_FLOAT, QEMU_PPC_FEATURE_HAS_FPU);
     GET_FEATURE(PPC_ALTIVEC, QEMU_PPC_FEATURE_HAS_ALTIVEC);
@@ -757,7 +767,36 @@ static uint32_t get_elf_hwcap(void)
     GET_FEATURE(PPC_SPE_DOUBLE, QEMU_PPC_FEATURE_HAS_EFP_DOUBLE);
     GET_FEATURE(PPC_BOOKE, QEMU_PPC_FEATURE_BOOKE);
     GET_FEATURE(PPC_405_MAC, QEMU_PPC_FEATURE_HAS_4xxMAC);
+    GET_FEATURE2(PPC2_DFP, QEMU_PPC_FEATURE_HAS_DFP);
+    GET_FEATURE2(PPC2_VSX, QEMU_PPC_FEATURE_HAS_VSX);
+    GET_FEATURE2((PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 |
+                  PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206),
+                  QEMU_PPC_FEATURE_ARCH_2_06);
+#undef GET_FEATURE
+#undef GET_FEATURE2
+
+    return features;
+}
+
+#define ELF_HWCAP2 get_elf_hwcap2()
+
+static uint32_t get_elf_hwcap2(void)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(thread_cpu);
+    uint32_t features = 0;
+
+#define GET_FEATURE(flag, feature)                                      \
+    do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flag, feature)                                      \
+    do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
+
+    GET_FEATURE(PPC_ISEL, QEMU_PPC_FEATURE2_HAS_ISEL);
+    GET_FEATURE2(PPC2_BCTAR_ISA207, QEMU_PPC_FEATURE2_HAS_TAR);
+    GET_FEATURE2((PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+                  PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07);
+
 #undef GET_FEATURE
+#undef GET_FEATURE2
 
     return features;
 }
@@ -774,8 +813,9 @@ static uint32_t get_elf_hwcap(void)
 #define DLINFO_ARCH_ITEMS       5
 #define ARCH_DLINFO                                     \
     do {                                                \
-        NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20);              \
-        NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20);              \
+        PowerPCCPU *cpu = POWERPC_CPU(thread_cpu);              \
+        NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \
+        NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \
         NEW_AUX_ENT(AT_UCACHEBSIZE, 0);                 \
         /*                                              \
          * Now handle glibc compatibility.              \
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 301f6b6b51..a06ba59dad 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
 common-obj-y += socket.o
 common-obj-y += dump.o
 common-obj-y += eth.o
+common-obj-$(CONFIG_LINUX) += l2tpv3.o
 common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
 common-obj-$(CONFIG_LINUX) += tap-linux.o
 common-obj-$(CONFIG_WIN32) += tap-win32.o
diff --git a/net/clients.h b/net/clients.h
index 7f3d4ae9f3..2e8fedad8d 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -47,6 +47,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
 int net_init_bridge(const NetClientOptions *opts, const char *name,
                     NetClientState *peer);
 
+int net_init_l2tpv3(const NetClientOptions *opts, const char *name,
+                    NetClientState *peer);
 #ifdef CONFIG_VDE
 int net_init_vde(const NetClientOptions *opts, const char *name,
                  NetClientState *peer);
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
new file mode 100644
index 0000000000..528d95b641
--- /dev/null
+++ b/net/l2tpv3.c
@@ -0,0 +1,757 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2012-2014 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <netdb.h>
+#include "config-host.h"
+#include "net/net.h"
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+
+/* The buffer size needs to be investigated for optimum numbers and
+ * optimum means of paging in on different systems. This size is
+ * chosen to be sufficient to accommodate one packet with some headers
+ */
+
+#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
+#define BUFFER_SIZE 2048
+#define IOVSIZE 2
+#define MAX_L2TPV3_MSGCNT 64
+#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
+
+/* Header set to 0x30000 signifies a data packet */
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+typedef struct NetL2TPV3State {
+    NetClientState nc;
+    int fd;
+
+    /*
+     * these are used for xmit - that happens packet a time
+     * and for first sign of life packet (easier to parse that once)
+     */
+
+    uint8_t *header_buf;
+    struct iovec *vec;
+
+    /*
+     * these are used for receive - try to "eat" up to 32 packets at a time
+     */
+
+    struct mmsghdr *msgvec;
+
+    /*
+     * peer address
+     */
+
+    struct sockaddr_storage *dgram_dst;
+    uint32_t dst_size;
+
+    /*
+     * L2TPv3 parameters
+     */
+
+    uint64_t rx_cookie;
+    uint64_t tx_cookie;
+    uint32_t rx_session;
+    uint32_t tx_session;
+    uint32_t header_size;
+    uint32_t counter;
+
+    /*
+    * DOS avoidance in error handling
+    */
+
+    bool header_mismatch;
+
+    /*
+     * Ring buffer handling
+     */
+
+    int queue_head;
+    int queue_tail;
+    int queue_depth;
+
+    /*
+     * Precomputed offsets
+     */
+
+    uint32_t offset;
+    uint32_t cookie_offset;
+    uint32_t counter_offset;
+    uint32_t session_offset;
+
+    /* Poll Control */
+
+    bool read_poll;
+    bool write_poll;
+
+    /* Flags */
+
+    bool ipv6;
+    bool udp;
+    bool has_counter;
+    bool pin_counter;
+    bool cookie;
+    bool cookie_is_64;
+
+} NetL2TPV3State;
+
+static int l2tpv3_can_send(void *opaque);
+static void net_l2tpv3_send(void *opaque);
+static void l2tpv3_writable(void *opaque);
+
+static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
+{
+    qemu_set_fd_handler2(s->fd,
+                         s->read_poll ? l2tpv3_can_send : NULL,
+                         s->read_poll ? net_l2tpv3_send     : NULL,
+                         s->write_poll ? l2tpv3_writable : NULL,
+                         s);
+}
+
+static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
+{
+    if (s->read_poll != enable) {
+        s->read_poll = enable;
+        l2tpv3_update_fd_handler(s);
+    }
+}
+
+static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
+{
+    if (s->write_poll != enable) {
+        s->write_poll = enable;
+        l2tpv3_update_fd_handler(s);
+    }
+}
+
+static void l2tpv3_writable(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+    l2tpv3_write_poll(s, false);
+    qemu_flush_queued_packets(&s->nc);
+}
+
+static int l2tpv3_can_send(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+
+    return qemu_can_send_packet(&s->nc);
+}
+
+static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    l2tpv3_read_poll(s, true);
+}
+
+static void l2tpv3_poll(NetClientState *nc, bool enable)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    l2tpv3_write_poll(s, enable);
+    l2tpv3_read_poll(s, enable);
+}
+
+static void l2tpv3_form_header(NetL2TPV3State *s)
+{
+    uint32_t *counter;
+
+    if (s->udp) {
+        stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
+    }
+    stl_be_p(
+            (uint32_t *) (s->header_buf + s->session_offset),
+            s->tx_session
+        );
+    if (s->cookie) {
+        if (s->cookie_is_64) {
+            stq_be_p(
+                (uint64_t *)(s->header_buf + s->cookie_offset),
+                s->tx_cookie
+            );
+        } else {
+            stl_be_p(
+                (uint32_t *) (s->header_buf + s->cookie_offset),
+                s->tx_cookie
+            );
+        }
+    }
+    if (s->has_counter) {
+        counter = (uint32_t *)(s->header_buf + s->counter_offset);
+        if (s->pin_counter) {
+            *counter = 0;
+        } else {
+            stl_be_p(counter, ++s->counter);
+        }
+    }
+}
+
+static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
+                    const struct iovec *iov,
+                    int iovcnt)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    struct msghdr message;
+    int ret;
+
+    if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
+        error_report(
+            "iovec too long %d > %d, change l2tpv3.h",
+            iovcnt, MAX_L2TPV3_IOVCNT
+        );
+        return -1;
+    }
+    l2tpv3_form_header(s);
+    memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
+    s->vec->iov_base = s->header_buf;
+    s->vec->iov_len = s->offset;
+    message.msg_name = s->dgram_dst;
+    message.msg_namelen = s->dst_size;
+    message.msg_iov = s->vec;
+    message.msg_iovlen = iovcnt + 1;
+    message.msg_control = NULL;
+    message.msg_controllen = 0;
+    message.msg_flags = 0;
+    do {
+        ret = sendmsg(s->fd, &message, 0);
+    } while ((ret == -1) && (errno == EINTR));
+    if (ret > 0) {
+        ret -= s->offset;
+    } else if (ret == 0) {
+        /* belt and braces - should not occur on DGRAM
+        * we should get an error and never a 0 send
+        */
+        ret = iov_size(iov, iovcnt);
+    } else {
+        /* signal upper layer that socket buffer is full */
+        ret = -errno;
+        if (ret == -EAGAIN || ret == -ENOBUFS) {
+            l2tpv3_write_poll(s, true);
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
+                    const uint8_t *buf,
+                    size_t size)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    struct iovec *vec;
+    struct msghdr message;
+    ssize_t ret = 0;
+
+    l2tpv3_form_header(s);
+    vec = s->vec;
+    vec->iov_base = s->header_buf;
+    vec->iov_len = s->offset;
+    vec++;
+    vec->iov_base = (void *) buf;
+    vec->iov_len = size;
+    message.msg_name = s->dgram_dst;
+    message.msg_namelen = s->dst_size;
+    message.msg_iov = s->vec;
+    message.msg_iovlen = 2;
+    message.msg_control = NULL;
+    message.msg_controllen = 0;
+    message.msg_flags = 0;
+    do {
+        ret = sendmsg(s->fd, &message, 0);
+    } while ((ret == -1) && (errno == EINTR));
+    if (ret > 0) {
+        ret -= s->offset;
+    } else if (ret == 0) {
+        /* belt and braces - should not occur on DGRAM
+        * we should get an error and never a 0 send
+        */
+        ret = size;
+    } else {
+        ret = -errno;
+        if (ret == -EAGAIN || ret == -ENOBUFS) {
+            /* signal upper layer that socket buffer is full */
+            l2tpv3_write_poll(s, true);
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
+{
+
+    uint32_t *session;
+    uint64_t cookie;
+
+    if ((!s->udp) && (!s->ipv6)) {
+        buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
+    }
+
+    /* we do not do a strict check for "data" packets as per
+    * the RFC spec because the pure IP spec does not have
+    * that anyway.
+    */
+
+    if (s->cookie) {
+        if (s->cookie_is_64) {
+            cookie = ldq_be_p(buf + s->cookie_offset);
+        } else {
+            cookie = ldl_be_p(buf + s->cookie_offset);
+        }
+        if (cookie != s->rx_cookie) {
+            if (!s->header_mismatch) {
+                error_report("unknown cookie id");
+            }
+            return -1;
+        }
+    }
+    session = (uint32_t *) (buf + s->session_offset);
+    if (ldl_be_p(session) != s->rx_session) {
+        if (!s->header_mismatch) {
+            error_report("session mismatch");
+        }
+        return -1;
+    }
+    return 0;
+}
+
+static void net_l2tpv3_process_queue(NetL2TPV3State *s)
+{
+    int size = 0;
+    struct iovec *vec;
+    bool bad_read;
+    int data_size;
+    struct mmsghdr *msgvec;
+
+    /* go into ring mode only if there is a "pending" tail */
+    if (s->queue_depth > 0) {
+        do {
+            msgvec = s->msgvec + s->queue_tail;
+            if (msgvec->msg_len > 0) {
+                data_size = msgvec->msg_len - s->header_size;
+                vec = msgvec->msg_hdr.msg_iov;
+                if ((data_size > 0) &&
+                    (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
+                    vec++;
+                    /* Use the legacy delivery for now, we will
+                     * switch to using our own ring as a queueing mechanism
+                     * at a later date
+                     */
+                    size = qemu_send_packet_async(
+                            &s->nc,
+                            vec->iov_base,
+                            data_size,
+                            l2tpv3_send_completed
+                        );
+                    if (size == 0) {
+                        l2tpv3_read_poll(s, false);
+                    }
+                    bad_read = false;
+                } else {
+                    bad_read = true;
+                    if (!s->header_mismatch) {
+                        /* report error only once */
+                        error_report("l2tpv3 header verification failed");
+                        s->header_mismatch = true;
+                    }
+                }
+            } else {
+                bad_read = true;
+            }
+            s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
+            s->queue_depth--;
+        } while (
+                (s->queue_depth > 0) &&
+                 qemu_can_send_packet(&s->nc) &&
+                ((size > 0) || bad_read)
+            );
+    }
+}
+
+static void net_l2tpv3_send(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+    int target_count, count;
+    struct mmsghdr *msgvec;
+
+    /* go into ring mode only if there is a "pending" tail */
+
+    if (s->queue_depth) {
+
+        /* The ring buffer we use has variable intake
+         * count of how much we can read varies - adjust accordingly
+         */
+
+        target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
+
+        /* Ensure we do not overrun the ring when we have
+         * a lot of enqueued packets
+         */
+
+        if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
+            target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
+        }
+    } else {
+
+        /* we do not have any pending packets - we can use
+        * the whole message vector linearly instead of using
+        * it as a ring
+        */
+
+        s->queue_head = 0;
+        s->queue_tail = 0;
+        target_count = MAX_L2TPV3_MSGCNT;
+    }
+
+    msgvec = s->msgvec + s->queue_head;
+    if (target_count > 0) {
+        do {
+            count = recvmmsg(
+                s->fd,
+                msgvec,
+                target_count, MSG_DONTWAIT, NULL);
+        } while ((count == -1) && (errno == EINTR));
+        if (count < 0) {
+            /* Recv error - we still need to flush packets here,
+             * (re)set queue head to current position
+             */
+            count = 0;
+        }
+        s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
+        s->queue_depth += count;
+    }
+    net_l2tpv3_process_queue(s);
+}
+
+static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
+{
+    int i, j;
+    struct iovec *iov;
+    struct mmsghdr *cleanup = msgvec;
+    if (cleanup) {
+        for (i = 0; i < count; i++) {
+            if (cleanup->msg_hdr.msg_iov) {
+                iov = cleanup->msg_hdr.msg_iov;
+                for (j = 0; j < iovcount; j++) {
+                    g_free(iov->iov_base);
+                    iov++;
+                }
+                g_free(cleanup->msg_hdr.msg_iov);
+            }
+            cleanup++;
+        }
+        g_free(msgvec);
+    }
+}
+
+static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
+{
+    int i;
+    struct iovec *iov;
+    struct mmsghdr *msgvec, *result;
+
+    msgvec = g_malloc(sizeof(struct mmsghdr) * count);
+    result = msgvec;
+    for (i = 0; i < count ; i++) {
+        msgvec->msg_hdr.msg_name = NULL;
+        msgvec->msg_hdr.msg_namelen = 0;
+        iov =  g_malloc(sizeof(struct iovec) * IOVSIZE);
+        msgvec->msg_hdr.msg_iov = iov;
+        iov->iov_base = g_malloc(s->header_size);
+        iov->iov_len = s->header_size;
+        iov++ ;
+        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
+        iov->iov_len = BUFFER_SIZE;
+        msgvec->msg_hdr.msg_iovlen = 2;
+        msgvec->msg_hdr.msg_control = NULL;
+        msgvec->msg_hdr.msg_controllen = 0;
+        msgvec->msg_hdr.msg_flags = 0;
+        msgvec++;
+    }
+    return result;
+}
+
+static void net_l2tpv3_cleanup(NetClientState *nc)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    qemu_purge_queued_packets(nc);
+    l2tpv3_read_poll(s, false);
+    l2tpv3_write_poll(s, false);
+    if (s->fd > 0) {
+        close(s->fd);
+    }
+    destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
+    g_free(s->vec);
+    g_free(s->header_buf);
+    g_free(s->dgram_dst);
+}
+
+static NetClientInfo net_l2tpv3_info = {
+    .type = NET_CLIENT_OPTIONS_KIND_L2TPV3,
+    .size = sizeof(NetL2TPV3State),
+    .receive = net_l2tpv3_receive_dgram,
+    .receive_iov = net_l2tpv3_receive_dgram_iov,
+    .poll = l2tpv3_poll,
+    .cleanup = net_l2tpv3_cleanup,
+};
+
+int net_init_l2tpv3(const NetClientOptions *opts,
+                    const char *name,
+                    NetClientState *peer)
+{
+
+
+    const NetdevL2TPv3Options *l2tpv3;
+    NetL2TPV3State *s;
+    NetClientState *nc;
+    int fd = -1, gairet;
+    struct addrinfo hints;
+    struct addrinfo *result = NULL;
+    char *srcport, *dstport;
+
+    nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
+
+    s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    s->queue_head = 0;
+    s->queue_tail = 0;
+    s->header_mismatch = false;
+
+    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3);
+    l2tpv3 = opts->l2tpv3;
+
+    if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
+        s->ipv6 = l2tpv3->ipv6;
+    } else {
+        s->ipv6 = false;
+    }
+
+    if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
+        error_report("l2tpv3_open : offset must be less than 256 bytes");
+        goto outerr;
+    }
+
+    if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
+        if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
+            s->cookie = true;
+        } else {
+            goto outerr;
+        }
+    } else {
+        s->cookie = false;
+    }
+
+    if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
+        s->cookie_is_64  = true;
+    } else {
+        s->cookie_is_64  = false;
+    }
+
+    if (l2tpv3->has_udp && l2tpv3->udp) {
+        s->udp = true;
+        if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
+            error_report("l2tpv3_open : need both src and dst port for udp");
+            goto outerr;
+        } else {
+            srcport = l2tpv3->srcport;
+            dstport = l2tpv3->dstport;
+        }
+    } else {
+        s->udp = false;
+        srcport = NULL;
+        dstport = NULL;
+    }
+
+
+    s->offset = 4;
+    s->session_offset = 0;
+    s->cookie_offset = 4;
+    s->counter_offset = 4;
+
+    s->tx_session = l2tpv3->txsession;
+    if (l2tpv3->has_rxsession) {
+        s->rx_session = l2tpv3->rxsession;
+    } else {
+        s->rx_session = s->tx_session;
+    }
+
+    if (s->cookie) {
+        s->rx_cookie = l2tpv3->rxcookie;
+        s->tx_cookie = l2tpv3->txcookie;
+        if (s->cookie_is_64 == true) {
+            /* 64 bit cookie */
+            s->offset += 8;
+            s->counter_offset += 8;
+        } else {
+            /* 32 bit cookie */
+            s->offset += 4;
+            s->counter_offset += 4;
+        }
+    }
+
+    memset(&hints, 0, sizeof(hints));
+
+    if (s->ipv6) {
+        hints.ai_family = AF_INET6;
+    } else {
+        hints.ai_family = AF_INET;
+    }
+    if (s->udp) {
+        hints.ai_socktype = SOCK_DGRAM;
+        hints.ai_protocol = 0;
+        s->offset += 4;
+        s->counter_offset += 4;
+        s->session_offset += 4;
+        s->cookie_offset += 4;
+    } else {
+        hints.ai_socktype = SOCK_RAW;
+        hints.ai_protocol = IPPROTO_L2TP;
+    }
+
+    gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
+
+    if ((gairet != 0) || (result == NULL)) {
+        error_report(
+            "l2tpv3_open : could not resolve src, errno = %s",
+            gai_strerror(gairet)
+        );
+        goto outerr;
+    }
+    fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
+    if (fd == -1) {
+        fd = -errno;
+        error_report("l2tpv3_open : socket creation failed, errno = %d", -fd);
+        freeaddrinfo(result);
+        goto outerr;
+    }
+    if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
+        error_report("l2tpv3_open :  could not bind socket err=%i", errno);
+        goto outerr;
+    }
+    if (result) {
+        freeaddrinfo(result);
+    }
+
+    memset(&hints, 0, sizeof(hints));
+
+    if (s->ipv6) {
+        hints.ai_family = AF_INET6;
+    } else {
+        hints.ai_family = AF_INET;
+    }
+    if (s->udp) {
+        hints.ai_socktype = SOCK_DGRAM;
+        hints.ai_protocol = 0;
+    } else {
+        hints.ai_socktype = SOCK_RAW;
+        hints.ai_protocol = IPPROTO_L2TP;
+    }
+
+    result = NULL;
+    gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
+    if ((gairet != 0) || (result == NULL)) {
+        error_report(
+            "l2tpv3_open : could not resolve dst, error = %s",
+            gai_strerror(gairet)
+        );
+        goto outerr;
+    }
+
+    s->dgram_dst = g_malloc(sizeof(struct sockaddr_storage));
+    memset(s->dgram_dst, '\0' , sizeof(struct sockaddr_storage));
+    memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
+    s->dst_size = result->ai_addrlen;
+
+    if (result) {
+        freeaddrinfo(result);
+    }
+
+    if (l2tpv3->has_counter && l2tpv3->counter) {
+        s->has_counter = true;
+        s->offset += 4;
+    } else {
+        s->has_counter = false;
+    }
+
+    if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
+        s->has_counter = true;  /* pin counter implies that there is counter */
+        s->pin_counter = true;
+    } else {
+        s->pin_counter = false;
+    }
+
+    if (l2tpv3->has_offset) {
+        /* extra offset */
+        s->offset += l2tpv3->offset;
+    }
+
+    if ((s->ipv6) || (s->udp)) {
+        s->header_size = s->offset;
+    } else {
+        s->header_size = s->offset + sizeof(struct iphdr);
+    }
+
+    s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
+    s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT);
+    s->header_buf = g_malloc(s->header_size);
+
+    qemu_set_nonblock(fd);
+
+    s->fd = fd;
+    s->counter = 0;
+
+    l2tpv3_read_poll(s, true);
+
+    snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+             "l2tpv3: connected");
+    return 0;
+outerr:
+    qemu_del_net_client(nc);
+    if (fd > 0) {
+        close(fd);
+    }
+    if (result) {
+        freeaddrinfo(result);
+    }
+    return -1;
+}
+
diff --git a/net/net.c b/net/net.c
index 3dac29b844..0869c60bee 100644
--- a/net/net.c
+++ b/net/net.c
@@ -250,7 +250,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
 {
     NetClientState **peers = conf->peers.ncs;
     NICState *nic;
-    int i, queues = MAX(1, conf->queues);
+    int i, queues = MAX(1, conf->peers.queues);
 
     assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC);
     assert(info->size >= sizeof(NICState));
@@ -363,7 +363,7 @@ void qemu_del_net_client(NetClientState *nc)
 
 void qemu_del_nic(NICState *nic)
 {
-    int i, queues = MAX(nic->conf->queues, 1);
+    int i, queues = MAX(nic->conf->peers.queues, 1);
 
     /* If this is a peer NIC and peer has already been deleted, free it now. */
     if (nic->peer_deleted) {
@@ -806,6 +806,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
 #ifdef CONFIG_VHOST_NET_USED
         [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
 #endif
+#ifdef CONFIG_LINUX
+        [NET_CLIENT_OPTIONS_KIND_L2TPV3]    = net_init_l2tpv3,
+#endif
 };
 
 
@@ -842,6 +845,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
 #ifdef CONFIG_VHOST_NET_USED
         case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
 #endif
+#ifdef CONFIG_LINUX
+        case NET_CLIENT_OPTIONS_KIND_L2TPV3:
+#endif
             break;
 
         default:
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 1c7f7640fc..603e19e003 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differdiff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 5ee3fcb38f..fa54abb4d3 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -9,8 +9,12 @@
  */
 
 #include "s390-ccw.h"
+#include "bootmap.h"
+#include "virtio.h"
 
-// #define DEBUG_FALLBACK
+#ifdef DEBUG
+/* #define DEBUG_FALLBACK */
+#endif
 
 #ifdef DEBUG_FALLBACK
 #define dputs(txt) \
@@ -20,43 +24,8 @@
     do { } while (0)
 #endif
 
-struct scsi_blockptr {
-    uint64_t blockno;
-    uint16_t size;
-    uint16_t blockct;
-    uint8_t reserved[4];
-} __attribute__ ((packed));
-
-struct component_entry {
-    struct scsi_blockptr data;
-    uint8_t pad[7];
-    uint8_t component_type;
-    uint64_t load_address;
-} __attribute((packed));
-
-struct component_header {
-    uint8_t magic[4];
-    uint8_t type;
-    uint8_t reserved[27];
-} __attribute((packed));
-
-struct mbr {
-    uint8_t magic[4];
-    uint32_t version_id;
-    uint8_t reserved[8];
-    struct scsi_blockptr blockptr;
-} __attribute__ ((packed));
-
-#define ZIPL_MAGIC			"zIPL"
-
-#define ZIPL_COMP_HEADER_IPL		0x00
-#define ZIPL_COMP_HEADER_DUMP		0x01
-
-#define ZIPL_COMP_ENTRY_LOAD		0x02
-#define ZIPL_COMP_ENTRY_EXEC		0x01
-
 /* Scratch space */
-static uint8_t sec[SECTOR_SIZE] __attribute__((__aligned__(SECTOR_SIZE)));
+static uint8_t sec[MAX_SECTOR_SIZE*4] __attribute__((__aligned__(PAGE_SIZE)));
 
 typedef struct ResetInfo {
     uint32_t ipl_mask;
@@ -104,43 +73,243 @@ static void jump_to_IPL_code(uint64_t address)
     virtio_panic("\n! IPL returns !\n");
 }
 
-/* Check for ZIPL magic. Returns 0 if not matched. */
-static int zipl_magic(uint8_t *ptr)
+/***********************************************************************
+ * IPL an ECKD DASD (CDL or LDL/CMS format)
+ */
+
+static unsigned char _bprs[8*1024]; /* guessed "max" ECKD sector size */
+const int max_bprs_entries = sizeof(_bprs) / sizeof(ExtEckdBlockPtr);
+
+static inline void verify_boot_info(BootInfo *bip)
+{
+    IPL_assert(magic_match(bip->magic, ZIPL_MAGIC), "No zIPL magic");
+    IPL_assert(bip->version == BOOT_INFO_VERSION, "Wrong zIPL version");
+    IPL_assert(bip->bp_type == BOOT_INFO_BP_TYPE_IPL, "DASD is not for IPL");
+    IPL_assert(bip->dev_type == BOOT_INFO_DEV_TYPE_ECKD, "DASD is not ECKD");
+    IPL_assert(bip->flags == BOOT_INFO_FLAGS_ARCH, "Not for this arch");
+    IPL_assert(block_size_ok(bip->bp.ipl.bm_ptr.eckd.bptr.size),
+               "Bad block size in zIPL section of the 1st record.");
+}
+
+static bool eckd_valid_address(BootMapPointer *p)
 {
-    uint32_t *p = (void*)ptr;
-    uint32_t *z = (void*)ZIPL_MAGIC;
+    const uint64_t cylinder = p->eckd.cylinder
+                            + ((p->eckd.head & 0xfff0) << 12);
+    const uint64_t head = p->eckd.head & 0x000f;
+
+    if (head >= virtio_get_heads()
+        ||  p->eckd.sector > virtio_get_sectors()
+        ||  p->eckd.sector <= 0) {
+        return false;
+    }
 
-    if (*p != *z) {
-        debug_print_int("invalid magic", *p);
-        virtio_panic("invalid magic");
+    if (!virtio_guessed_disk_nature() && cylinder >= virtio_get_cylinders()) {
+        return false;
     }
 
-    return 1;
+    return true;
 }
 
-#define FREE_SPACE_FILLER '\xAA'
+static block_number_t eckd_block_num(BootMapPointer *p)
+{
+    const uint64_t sectors = virtio_get_sectors();
+    const uint64_t heads = virtio_get_heads();
+    const uint64_t cylinder = p->eckd.cylinder
+                            + ((p->eckd.head & 0xfff0) << 12);
+    const uint64_t head = p->eckd.head & 0x000f;
+    const block_number_t block = sectors * heads * cylinder
+                               + sectors * head
+                               + p->eckd.sector
+                               - 1; /* block nr starts with zero */
+    return block;
+}
 
-static inline bool unused_space(const void *p, unsigned int size)
+static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
 {
-    int i;
-    const unsigned char *m = p;
+    block_number_t block_nr;
+    int j, rc;
+    BootMapPointer *bprs = (void *)_bprs;
+    bool more_data;
+
+    memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs));
+    read_block(blk, bprs, "BPRS read failed");
+
+    do {
+        more_data = false;
+        for (j = 0;; j++) {
+            block_nr = eckd_block_num((void *)&(bprs[j].xeckd));
+            if (is_null_block_number(block_nr)) { /* end of chunk */
+                break;
+            }
+
+            /* we need the updated blockno for the next indirect entry
+             * in the chain, but don't want to advance address
+             */
+            if (j == (max_bprs_entries - 1)) {
+                break;
+            }
+
+            IPL_assert(block_size_ok(bprs[j].xeckd.bptr.size),
+                       "bad chunk block size");
+            IPL_assert(eckd_valid_address(&bprs[j]), "bad chunk ECKD addr");
+
+            if ((bprs[j].xeckd.bptr.count == 0) && unused_space(&(bprs[j+1]),
+                sizeof(EckdBlockPtr))) {
+                /* This is a "continue" pointer.
+                 * This ptr should be the last one in the current
+                 * script section.
+                 * I.e. the next ptr must point to the unused memory area
+                 */
+                memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs));
+                read_block(block_nr, bprs, "BPRS continuation read failed");
+                more_data = true;
+                break;
+            }
 
-    for (i = 0; i < size; i++) {
-        if (m[i] != FREE_SPACE_FILLER) {
-            return false;
+            /* Load (count+1) blocks of code at (block_nr)
+             * to memory (address).
+             */
+            rc = virtio_read_many(block_nr, (void *)(*address),
+                                  bprs[j].xeckd.bptr.count+1);
+            IPL_assert(rc == 0, "code chunk read failed");
+
+            *address += (bprs[j].xeckd.bptr.count+1) * virtio_get_block_size();
         }
+    } while (more_data);
+    return block_nr;
+}
+
+static void run_eckd_boot_script(block_number_t mbr_block_nr)
+{
+    int i;
+    block_number_t block_nr;
+    uint64_t address;
+    ScsiMbr *scsi_mbr = (void *)sec;
+    BootMapScript *bms = (void *)sec;
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(mbr_block_nr, sec, "Cannot read MBR");
+
+    block_nr = eckd_block_num((void *)&(scsi_mbr->blockptr));
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(block_nr, sec, "Cannot read Boot Map Script");
+
+    for (i = 0; bms->entry[i].type == BOOT_SCRIPT_LOAD; i++) {
+        address = bms->entry[i].address.load_address;
+        block_nr = eckd_block_num(&(bms->entry[i].blkptr));
+
+        do {
+            block_nr = load_eckd_segments(block_nr, &address);
+        } while (block_nr != -1);
+    }
+
+    IPL_assert(bms->entry[i].type == BOOT_SCRIPT_EXEC,
+               "Unknown script entry type");
+    jump_to_IPL_code(bms->entry[i].address.load_address); /* no return */
+}
+
+static void ipl_eckd_cdl(void)
+{
+    XEckdMbr *mbr;
+    Ipl2 *ipl2 = (void *)sec;
+    IplVolumeLabel *vlbl = (void *)sec;
+    block_number_t block_nr;
+
+    /* we have just read the block #0 and recognized it as "IPL1" */
+    sclp_print("CDL\n");
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(1, ipl2, "Cannot read IPL2 record at block 1");
+    IPL_assert(magic_match(ipl2, IPL2_MAGIC), "No IPL2 record");
+
+    mbr = &ipl2->u.x.mbr;
+    IPL_assert(magic_match(mbr, ZIPL_MAGIC), "No zIPL section in IPL2 record.");
+    IPL_assert(block_size_ok(mbr->blockptr.xeckd.bptr.size),
+               "Bad block size in zIPL section of IPL2 record.");
+    IPL_assert(mbr->dev_type == DEV_TYPE_ECKD,
+               "Non-ECKD device type in zIPL section of IPL2 record.");
+
+    /* save pointer to Boot Script */
+    block_nr = eckd_block_num((void *)&(mbr->blockptr));
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(2, vlbl, "Cannot read Volume Label at block 2");
+    IPL_assert(magic_match(vlbl->key, VOL1_MAGIC),
+               "Invalid magic of volume label block");
+    IPL_assert(magic_match(vlbl->f.key, VOL1_MAGIC),
+               "Invalid magic of volser block");
+    print_volser(vlbl->f.volser);
+
+    run_eckd_boot_script(block_nr);
+    /* no return */
+}
+
+static void ipl_eckd_ldl(ECKD_IPL_mode_t mode)
+{
+    LDL_VTOC *vlbl = (void *)sec; /* already read, 3rd block */
+    char msg[4] = { '?', '.', '\n', '\0' };
+    block_number_t block_nr;
+    BootInfo *bip;
+
+    sclp_print((mode == ECKD_CMS) ? "CMS" : "LDL");
+    sclp_print(" version ");
+    switch (vlbl->LDL_version) {
+    case LDL1_VERSION:
+        msg[0] = '1';
+        break;
+    case LDL2_VERSION:
+        msg[0] = '2';
+        break;
+    default:
+        msg[0] = vlbl->LDL_version;
+        msg[0] &= 0x0f; /* convert EBCDIC   */
+        msg[0] |= 0x30; /* to ASCII (digit) */
+        msg[1] = '?';
+        break;
+    }
+    sclp_print(msg);
+    print_volser(vlbl->volser);
+
+    /* DO NOT read BootMap pointer (only one, xECKD) at block #2 */
+
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(0, sec, "Cannot read block 0");
+    bip = (void *)(sec + 0x70); /* "boot info" is "eckd mbr" for LDL */
+    verify_boot_info(bip);
+
+    block_nr = eckd_block_num((void *)&(bip->bp.ipl.bm_ptr.eckd.bptr));
+    run_eckd_boot_script(block_nr);
+    /* no return */
+}
+
+static void ipl_eckd(ECKD_IPL_mode_t mode)
+{
+    switch (mode) {
+    case ECKD_CDL:
+        ipl_eckd_cdl(); /* no return */
+    case ECKD_CMS:
+    case ECKD_LDL:
+        ipl_eckd_ldl(mode); /* no return */
+    default:
+        virtio_panic("\n! Unknown ECKD IPL mode !\n");
     }
-    return true;
 }
 
-static int zipl_load_segment(struct component_entry *entry)
+/***********************************************************************
+ * IPL a SCSI disk
+ */
+
+static void zipl_load_segment(ComponentEntry *entry)
 {
-    const int max_entries = (SECTOR_SIZE / sizeof(struct scsi_blockptr));
-    struct scsi_blockptr *bprs = (void*)sec;
+    const int max_entries = (MAX_SECTOR_SIZE / sizeof(ScsiBlockPtr));
+    ScsiBlockPtr *bprs = (void *)sec;
     const int bprs_size = sizeof(sec);
-    uint64_t blockno;
-    long address;
+    block_number_t blockno;
+    uint64_t address;
     int i;
+    char err_msg[] = "zIPL failed to read BPRS at 0xZZZZZZZZZZZZZZZZ";
+    char *blk_no = &err_msg[30]; /* where to print blockno in (those ZZs) */
 
     blockno = entry->data.blockno;
     address = entry->load_address;
@@ -150,25 +319,25 @@ static int zipl_load_segment(struct component_entry *entry)
 
     do {
         memset(bprs, FREE_SPACE_FILLER, bprs_size);
-        if (virtio_read(blockno, (uint8_t *)bprs)) {
-            debug_print_int("failed reading bprs at", blockno);
-            goto fail;
-        }
+        fill_hex_val(blk_no, &blockno, sizeof(blockno));
+        read_block(blockno, bprs, err_msg);
 
         for (i = 0;; i++) {
-            u64 *cur_desc = (void*)&bprs[i];
+            uint64_t *cur_desc = (void *)&bprs[i];
 
             blockno = bprs[i].blockno;
-            if (!blockno)
+            if (!blockno) {
                 break;
+            }
 
             /* we need the updated blockno for the next indirect entry in the
                chain, but don't want to advance address */
-            if (i == (max_entries - 1))
+            if (i == (max_entries - 1)) {
                 break;
+            }
 
             if (bprs[i].blockct == 0 && unused_space(&bprs[i + 1],
-                sizeof(struct scsi_blockptr))) {
+                sizeof(ScsiBlockPtr))) {
                 /* This is a "continue" pointer.
                  * This ptr is the last one in the current script section.
                  * I.e. the next ptr must point to the unused memory area.
@@ -178,102 +347,66 @@ static int zipl_load_segment(struct component_entry *entry)
                 break;
             }
             address = virtio_load_direct(cur_desc[0], cur_desc[1], 0,
-                                         (void*)address);
-            if (address == -1)
-                goto fail;
+                                         (void *)address);
+            IPL_assert(address != -1, "zIPL load segment failed");
         }
     } while (blockno);
-
-    return 0;
-
-fail:
-    sclp_print("failed loading segment\n");
-    return -1;
 }
 
 /* Run a zipl program */
-static int zipl_run(struct scsi_blockptr *pte)
+static void zipl_run(ScsiBlockPtr *pte)
 {
-    struct component_header *header;
-    struct component_entry *entry;
-    uint8_t tmp_sec[SECTOR_SIZE];
+    ComponentHeader *header;
+    ComponentEntry *entry;
+    uint8_t tmp_sec[MAX_SECTOR_SIZE];
 
-    virtio_read(pte->blockno, tmp_sec);
-    header = (struct component_header *)tmp_sec;
+    read_block(pte->blockno, tmp_sec, "Cannot read header");
+    header = (ComponentHeader *)tmp_sec;
 
-    if (!zipl_magic(tmp_sec)) {
-        goto fail;
-    }
-
-    if (header->type != ZIPL_COMP_HEADER_IPL) {
-        goto fail;
-    }
+    IPL_assert(magic_match(tmp_sec, ZIPL_MAGIC), "No zIPL magic");
+    IPL_assert(header->type == ZIPL_COMP_HEADER_IPL, "Bad header type");
 
     dputs("start loading images\n");
 
     /* Load image(s) into RAM */
-    entry = (struct component_entry *)(&header[1]);
+    entry = (ComponentEntry *)(&header[1]);
     while (entry->component_type == ZIPL_COMP_ENTRY_LOAD) {
-        if (zipl_load_segment(entry) < 0) {
-            goto fail;
-        }
+        zipl_load_segment(entry);
 
         entry++;
 
-        if ((uint8_t*)(&entry[1]) > (tmp_sec + SECTOR_SIZE)) {
-            goto fail;
-        }
+        IPL_assert((uint8_t *)(&entry[1]) <= (tmp_sec + MAX_SECTOR_SIZE),
+                   "Wrong entry value");
     }
 
-    if (entry->component_type != ZIPL_COMP_ENTRY_EXEC) {
-        goto fail;
-    }
+    IPL_assert(entry->component_type == ZIPL_COMP_ENTRY_EXEC, "No EXEC entry");
 
     /* should not return */
     jump_to_IPL_code(entry->load_address);
-
-    return 0;
-
-fail:
-    sclp_print("failed running zipl\n");
-    return -1;
 }
 
-int zipl_load(void)
+static void ipl_scsi(void)
 {
-    struct mbr *mbr = (void*)sec;
+    ScsiMbr *mbr = (void *)sec;
     uint8_t *ns, *ns_end;
     int program_table_entries = 0;
-    int pte_len = sizeof(struct scsi_blockptr);
-    struct scsi_blockptr *prog_table_entry;
-    const char *error = "";
-
-    /* Grab the MBR */
-    virtio_read(0, (void*)mbr);
-
-    dputs("checking magic\n");
+    const int pte_len = sizeof(ScsiBlockPtr);
+    ScsiBlockPtr *prog_table_entry;
 
-    if (!zipl_magic(mbr->magic)) {
-        error = "zipl_magic 1";
-        goto fail;
-    }
+    /* The 0-th block (MBR) was already read into sec[] */
 
+    sclp_print("Using SCSI scheme.\n");
     debug_print_int("program table", mbr->blockptr.blockno);
 
     /* Parse the program table */
-    if (virtio_read(mbr->blockptr.blockno, sec)) {
-        error = "virtio_read";
-        goto fail;
-    }
+    read_block(mbr->blockptr.blockno, sec,
+               "Error reading Program Table");
 
-    if (!zipl_magic(sec)) {
-        error = "zipl_magic 2";
-        goto fail;
-    }
+    IPL_assert(magic_match(sec, ZIPL_MAGIC), "No zIPL magic");
 
-    ns_end = sec + SECTOR_SIZE;
+    ns_end = sec + virtio_get_block_size();
     for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns++) {
-        prog_table_entry = (struct scsi_blockptr *)ns;
+        prog_table_entry = (ScsiBlockPtr *)ns;
         if (!prog_table_entry->blockno) {
             break;
         }
@@ -283,19 +416,55 @@ int zipl_load(void)
 
     debug_print_int("program table entries", program_table_entries);
 
-    if (!program_table_entries) {
-        goto fail;
-    }
+    IPL_assert(program_table_entries != 0, "Empty Program Table");
 
     /* Run the default entry */
 
-    prog_table_entry = (struct scsi_blockptr *)(sec + pte_len);
+    prog_table_entry = (ScsiBlockPtr *)(sec + pte_len);
 
-    return zipl_run(prog_table_entry);
+    zipl_run(prog_table_entry); /* no return */
+}
 
-fail:
-    sclp_print("failed loading zipl: ");
-    sclp_print(error);
-    sclp_print("\n");
-    return -1;
+/***********************************************************************
+ * IPL starts here
+ */
+
+void zipl_load(void)
+{
+    ScsiMbr *mbr = (void *)sec;
+    LDL_VTOC *vlbl = (void *)sec;
+
+    /* Grab the MBR */
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(0, mbr, "Cannot read block 0");
+
+    dputs("checking magic\n");
+
+    if (magic_match(mbr->magic, ZIPL_MAGIC)) {
+        ipl_scsi(); /* no return */
+    }
+
+    /* We have failed to follow the SCSI scheme, so */
+    sclp_print("Using ECKD scheme.\n");
+    if (virtio_guessed_disk_nature()) {
+        sclp_print("Using guessed DASD geometry.\n");
+        virtio_assume_eckd();
+    }
+
+    if (magic_match(mbr->magic, IPL1_MAGIC)) {
+        ipl_eckd(ECKD_CDL); /* no return */
+    }
+
+    /* LDL/CMS? */
+    memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+    read_block(2, vlbl, "Cannot read block 2");
+
+    if (magic_match(vlbl->magic, CMS1_MAGIC)) {
+        ipl_eckd(ECKD_CMS); /* no return */
+    }
+    if (magic_match(vlbl->magic, LNX1_MAGIC)) {
+        ipl_eckd(ECKD_LDL); /* no return */
+    }
+
+    virtio_panic("\n* invalid MBR magic *\n");
 }
diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h
new file mode 100644
index 0000000000..30ef22fe61
--- /dev/null
+++ b/pc-bios/s390-ccw/bootmap.h
@@ -0,0 +1,344 @@
+/*
+ * QEMU S390 bootmap interpreter -- declarations
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef _PC_BIOS_S390_CCW_BOOTMAP_H
+#define _PC_BIOS_S390_CCW_BOOTMAP_H
+
+#include "s390-ccw.h"
+#include "virtio.h"
+
+typedef uint64_t block_number_t;
+#define NULL_BLOCK_NR 0xffffffffffffffff
+
+#define FREE_SPACE_FILLER '\xAA'
+
+typedef struct ScsiBlockPtr {
+    uint64_t blockno;
+    uint16_t size;
+    uint16_t blockct;
+    uint8_t reserved[4];
+} __attribute__ ((packed)) ScsiBlockPtr;
+
+typedef struct FbaBlockPtr {
+    uint32_t blockno;
+    uint16_t size;
+    uint16_t blockct;
+} __attribute__ ((packed)) FbaBlockPtr;
+
+typedef struct EckdBlockPtr {
+    uint16_t cylinder; /* cylinder/head/sector is an address of the block */
+    uint16_t head;
+    uint8_t sector;
+    uint16_t size;
+    uint8_t count; /* (size_in_blocks-1);
+                    * it's 0 for TablePtr, ScriptPtr, and SectionPtr */
+} __attribute__ ((packed)) EckdBlockPtr;
+
+typedef struct ExtEckdBlockPtr {
+    EckdBlockPtr bptr;
+    uint8_t reserved[8];
+} __attribute__ ((packed)) ExtEckdBlockPtr;
+
+typedef union BootMapPointer {
+    ScsiBlockPtr scsi;
+    FbaBlockPtr fba;
+    EckdBlockPtr eckd;
+    ExtEckdBlockPtr xeckd;
+} __attribute__ ((packed)) BootMapPointer;
+
+typedef struct ComponentEntry {
+    ScsiBlockPtr data;
+    uint8_t pad[7];
+    uint8_t component_type;
+    uint64_t load_address;
+} __attribute((packed)) ComponentEntry;
+
+typedef struct ComponentHeader {
+    uint8_t magic[4];   /* == "zIPL"                    */
+    uint8_t type;       /* == ZIPL_COMP_HEADER_*        */
+    uint8_t reserved[27];
+} __attribute((packed)) ComponentHeader;
+
+typedef struct ScsiMbr {
+    uint8_t magic[4];
+    uint32_t version_id;
+    uint8_t reserved[8];
+    ScsiBlockPtr blockptr;
+} __attribute__ ((packed)) ScsiMbr;
+
+#define ZIPL_MAGIC              "zIPL"
+#define IPL1_MAGIC "\xc9\xd7\xd3\xf1" /* == "IPL1" in EBCDIC */
+#define IPL2_MAGIC "\xc9\xd7\xd3\xf2" /* == "IPL2" in EBCDIC */
+#define VOL1_MAGIC "\xe5\xd6\xd3\xf1" /* == "VOL1" in EBCDIC */
+#define LNX1_MAGIC "\xd3\xd5\xe7\xf1" /* == "LNX1" in EBCDIC */
+#define CMS1_MAGIC "\xc3\xd4\xe2\xf1" /* == "CMS1" in EBCDIC */
+
+#define LDL1_VERSION '\x40' /* == ' ' in EBCDIC */
+#define LDL2_VERSION '\xf2' /* == '2' in EBCDIC */
+
+#define ZIPL_COMP_HEADER_IPL    0x00
+#define ZIPL_COMP_HEADER_DUMP   0x01
+
+#define ZIPL_COMP_ENTRY_LOAD    0x02
+#define ZIPL_COMP_ENTRY_EXEC    0x01
+
+typedef struct XEckdMbr {
+    uint8_t magic[4];   /* == "xIPL"        */
+    uint8_t version;
+    uint8_t bp_type;
+    uint8_t dev_type;   /* == DEV_TYPE_*    */
+#define DEV_TYPE_ECKD 0x00
+#define DEV_TYPE_FBA  0x01
+    uint8_t flags;
+    BootMapPointer blockptr;
+    uint8_t reserved[8];
+} __attribute__ ((packed)) XEckdMbr; /* see also BootInfo */
+
+typedef struct BootMapScriptEntry {
+    BootMapPointer blkptr;
+    uint8_t pad[7];
+    uint8_t type;   /* == BOOT_SCRIPT_* */
+#define BOOT_SCRIPT_EXEC 0x01
+#define BOOT_SCRIPT_LOAD 0x02
+    union {
+        uint64_t load_address;
+        uint64_t load_psw;
+    } address;
+} __attribute__ ((packed)) BootMapScriptEntry;
+
+typedef struct BootMapScriptHeader {
+    uint32_t magic;
+    uint8_t type;
+#define BOOT_SCRIPT_HDR_IPL 0x00
+    uint8_t reserved[27];
+} __attribute__ ((packed)) BootMapScriptHeader;
+
+typedef struct BootMapScript {
+    BootMapScriptHeader header;
+    BootMapScriptEntry  entry[0];
+} __attribute__ ((packed)) BootMapScript;
+
+/*
+ * These aren't real VTOCs, but referred to this way in some docs.
+ * They are "volume labels" actually.
+ *
+ * Some structures looks similar to described above, but left
+ * separate as there is no indication that they are the same.
+ * So, the value definitions are left separate too.
+ */
+typedef struct LDL_VTOC {       /* @ rec.3 cyl.0 trk.0 for ECKD */
+    char magic[4];              /* "LNX1", EBCDIC               */
+    char volser[6];             /* volser, EBCDIC               */
+    uint8_t reserved[69];       /* reserved, 0x40               */
+    uint8_t LDL_version;        /* 0x40 or 0xF2                 */
+    uint64_t formatted_blocks;  /* if LDL_version >= 0xF2       */
+} __attribute__ ((packed)) LDL_VTOC;
+
+typedef struct format_date {
+    uint8_t YY;
+    uint8_t MM;
+    uint8_t DD;
+    uint8_t hh;
+    uint8_t mm;
+    uint8_t ss;
+} __attribute__ ((packed)) format_date_t;
+
+typedef struct CMS_VTOC {       /* @ rec.3 cyl.0 trk.0 for ECKD */
+                                /* @ blk.1 (zero based) for FBA */
+    char magic[4];              /* 'CMS1', EBCDIC               */
+    char volser[6];             /* volser, EBCDIC               */
+    uint16_t version;           /* = 0                          */
+    uint32_t block_size;        /* = 512, 1024, 2048, or 4096   */
+    uint32_t disk_origin;       /* = 4 or 5                     */
+    uint32_t blocks;            /* Number of usable cyls/blocks */
+    uint32_t formatted;         /* Max number of fmtd cyls/blks */
+    uint32_t CMS_blocks;        /* disk size in CMS blocks      */
+    uint32_t CMS_used;          /* Number of CMS blocks in use  */
+    uint32_t FST_size;          /* = 64, bytes                  */
+    uint32_t FST_per_CMS_blk;   /*                              */
+    format_date_t format_date;  /* YYMMDDhhmmss as 6 bytes      */
+    uint8_t reserved1[2];       /* = 0                          */
+    uint32_t offset;            /* disk offset when reserved    */
+    uint32_t next_hole;         /* block nr                     */
+    uint32_t HBLK_hole_offset;  /* >> HBLK data of next hole    */
+    uint32_t alloc_map_usr_off; /* >> user part of Alloc map    */
+    uint8_t reserved2[4];       /* = 0                          */
+    char shared_seg_name[8];    /*                              */
+} __attribute__ ((packed)) CMS_VTOC;
+
+/* from zipl/include/boot.h */
+typedef struct BootInfoBpIpl {
+    union {
+        ExtEckdBlockPtr eckd;
+        ScsiBlockPtr linr;
+    } bm_ptr;
+    uint8_t unused[16];
+} __attribute__ ((packed)) BootInfoBpIpl;
+
+typedef struct EckdDumpParam {
+    uint32_t        start_blk;
+    uint32_t        end_blk;
+    uint16_t        blocksize;
+    uint8_t         num_heads;
+    uint8_t         bpt;
+    char            reserved[4];
+} __attribute((packed, may_alias)) EckdDumpParam;
+
+typedef struct FbaDumpParam {
+    uint64_t        start_blk;
+    uint64_t        blockct;
+} __attribute((packed)) FbaDumpParam;
+
+typedef struct BootInfoBpDump {
+    union {
+        EckdDumpParam eckd;
+        FbaDumpParam fba;
+    } param;
+    uint8_t         unused[16];
+} __attribute__ ((packed)) BootInfoBpDump;
+
+typedef struct BootInfo {          /* @ 0x70, record #0    */
+    unsigned char magic[4]; /* = 'zIPL', ASCII      */
+    uint8_t version;        /* = 1                  */
+#define BOOT_INFO_VERSION               1
+    uint8_t bp_type;        /* = 0                  */
+#define BOOT_INFO_BP_TYPE_IPL           0x00
+#define BOOT_INFO_BP_TYPE_DUMP          0x01
+    uint8_t dev_type;       /* = 0                  */
+#define BOOT_INFO_DEV_TYPE_ECKD         0x00
+#define BOOT_INFO_DEV_TYPE_FBA          0x01
+    uint8_t flags;          /* = 1                  */
+#ifdef __s390x__
+#define BOOT_INFO_FLAGS_ARCH            0x01
+#else
+#define BOOT_INFO_FLAGS_ARCH            0x00
+#endif
+    union {
+        BootInfoBpDump dump;
+        BootInfoBpIpl ipl;
+    } bp;
+} __attribute__ ((packed)) BootInfo; /* see also XEckdMbr   */
+
+typedef struct Ipl1 {
+    unsigned char key[4]; /* == "IPL1" */
+    unsigned char data[24];
+} __attribute__((packed)) Ipl1;
+
+typedef struct Ipl2 {
+    unsigned char key[4]; /* == "IPL2" */
+    union {
+        unsigned char data[144];
+        struct {
+            unsigned char reserved1[92-4];
+            XEckdMbr mbr;
+            unsigned char reserved2[144-(92-4)-sizeof(XEckdMbr)];
+        } x;
+    } u;
+} __attribute__((packed)) Ipl2;
+
+typedef struct IplVolumeLabel {
+    unsigned char key[4]; /* == "VOL1" */
+    union {
+        unsigned char data[80];
+        struct {
+            unsigned char key[4]; /* == "VOL1" */
+            unsigned char volser[6];
+            unsigned char reserved[6];
+        } f;
+    };
+} __attribute__((packed)) IplVolumeLabel;
+
+typedef enum {
+    ECKD_NO_IPL,
+    ECKD_CDL,
+    ECKD_CMS,
+    ECKD_LDL,
+} ECKD_IPL_mode_t;
+
+/* utility code below */
+
+static inline void IPL_assert(bool term, const char *message)
+{
+    if (!term) {
+        sclp_print("\n! ");
+        sclp_print(message);
+        virtio_panic(" !\n"); /* no return */
+    }
+}
+
+static const unsigned char ebc2asc[256] =
+      /* 0123456789abcdef0123456789abcdef */
+        "................................" /* 1F */
+        "................................" /* 3F */
+        " ...........<(+|&.........!$*);." /* 5F first.chr.here.is.real.space */
+        "-/.........,%_>?.........`:#@'=\""/* 7F */
+        ".abcdefghi.......jklmnopqr......" /* 9F */
+        "..stuvwxyz......................" /* BF */
+        ".ABCDEFGHI.......JKLMNOPQR......" /* DF */
+        "..STUVWXYZ......0123456789......";/* FF */
+
+static inline void ebcdic_to_ascii(const char *src,
+                                   char *dst,
+                                   unsigned int size)
+{
+    unsigned int i;
+    for (i = 0; i < size; i++) {
+        unsigned c = src[i];
+        dst[i] = ebc2asc[c];
+    }
+}
+
+static inline void print_volser(const void *volser)
+{
+    char ascii[8];
+
+    ebcdic_to_ascii((char *)volser, ascii, 6);
+    ascii[6] = '\0';
+    sclp_print("VOLSER=[");
+    sclp_print(ascii);
+    sclp_print("]\n");
+}
+
+static inline bool unused_space(const void *p, size_t size)
+{
+    size_t i;
+    const unsigned char *m = p;
+
+    for (i = 0; i < size; i++) {
+        if (m[i] != FREE_SPACE_FILLER) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static inline bool is_null_block_number(block_number_t x)
+{
+    return x == NULL_BLOCK_NR;
+}
+
+static inline void read_block(block_number_t blockno,
+                              void *buffer,
+                              const char *errmsg)
+{
+    IPL_assert(virtio_read(blockno, buffer) == 0, errmsg);
+}
+
+static inline bool block_size_ok(uint32_t block_size)
+{
+    return block_size == virtio_get_block_size();
+}
+
+static inline bool magic_match(const void *data, const void *magic)
+{
+    return *((uint32_t *)data) == *((uint32_t *)magic);
+}
+
+#endif /* _PC_BIOS_S390_CCW_BOOTMAP_H */
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 5c33766533..dbfb40e685 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -9,6 +9,7 @@
  */
 
 #include "s390-ccw.h"
+#include "virtio.h"
 
 char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
 uint64_t boot_value;
@@ -64,6 +65,10 @@ static void virtio_setup(uint64_t dev_info)
     }
 
     virtio_setup_block(blk_schid);
+
+    if (!virtio_ipl_disk_is_valid()) {
+        virtio_panic("No valid hard disk detected.\n");
+    }
 }
 
 int main(void)
@@ -72,8 +77,8 @@ int main(void)
     debug_print_int("boot reg[7] ", boot_value);
     virtio_setup(boot_value);
 
-    if (zipl_load() < 0)
-        sclp_print("Failed to load OS from hard disk\n");
-    disabled_wait();
-    while (1) { }
+    zipl_load(); /* no return */
+
+    virtio_panic("Failed to load OS from hard disk\n");
+    return 0; /* make compiler happy */
 }
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index 5e871ac84c..959aed0d0b 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -34,10 +34,10 @@ typedef unsigned long long __u64;
 #define PAGE_SIZE 4096
 
 #ifndef EIO
-#define EIO	1
+#define EIO     1
 #endif
 #ifndef EBUSY
-#define EBUSY	2
+#define EBUSY   2
 #endif
 #ifndef NULL
 #define NULL    0
@@ -57,14 +57,14 @@ void sclp_setup(void);
 
 /* virtio.c */
 unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
-				 ulong subchan_id, void *load_addr);
+                                 ulong subchan_id, void *load_addr);
 bool virtio_is_blk(struct subchannel_id schid);
 void virtio_setup_block(struct subchannel_id schid);
 int virtio_read(ulong sector, void *load_addr);
 int enable_mss_facility(void);
 
 /* bootmap.c */
-int zipl_load(void);
+void zipl_load(void);
 
 static inline void *memset(void *s, int c, size_t n)
 {
@@ -86,15 +86,21 @@ static inline void fill_hex(char *out, unsigned char val)
     out[1] = hex[val & 0xf];
 }
 
-static inline void print_int(const char *desc, u64 addr)
+static inline void fill_hex_val(char *out, void *ptr, unsigned size)
 {
-    unsigned char *addr_c = (unsigned char*)&addr;
-    char out[] = ": 0xffffffffffffffff\n";
+    unsigned char *value = ptr;
     unsigned int i;
 
-    for (i = 0; i < sizeof(addr); i++) {
-        fill_hex(&out[4 + (i*2)], addr_c[i]);
+    for (i = 0; i < size; i++) {
+        fill_hex(&out[i*2], value[i]);
     }
+}
+
+static inline void print_int(const char *desc, u64 addr)
+{
+    char out[] = ": 0xffffffffffffffff\n";
+
+    fill_hex_val(&out[4], &addr, sizeof(addr));
 
     sclp_print(desc);
     sclp_print(out);
@@ -118,18 +124,18 @@ static inline void debug_print_addr(const char *desc, void *p)
  *           Hypercall functions               *
  ***********************************************/
 
-#define KVM_S390_VIRTIO_NOTIFY		0
-#define KVM_S390_VIRTIO_RESET		1
-#define KVM_S390_VIRTIO_SET_STATUS	2
+#define KVM_S390_VIRTIO_NOTIFY          0
+#define KVM_S390_VIRTIO_RESET           1
+#define KVM_S390_VIRTIO_SET_STATUS      2
 #define KVM_S390_VIRTIO_CCW_NOTIFY      3
 
 static inline void yield(void)
 {
-	asm volatile ("diag 0,0,0x44"
-                      : :
-		      : "memory", "cc");
+    asm volatile ("diag 0,0,0x44"
+                  : :
+                  : "memory", "cc");
 }
 
-#define SECTOR_SIZE 512
+#define MAX_SECTOR_SIZE 4096
 
 #endif /* S390_CCW_H */
diff --git a/pc-bios/s390-ccw/sclp-ascii.c b/pc-bios/s390-ccw/sclp-ascii.c
index 1c93937104..761fb44ff5 100644
--- a/pc-bios/s390-ccw/sclp-ascii.c
+++ b/pc-bios/s390-ccw/sclp-ascii.c
@@ -33,7 +33,7 @@ static int sclp_service_call(unsigned int command, void *sccb)
 
 static void sclp_set_write_mask(void)
 {
-    WriteEventMask *sccb = (void*)_sccb;
+    WriteEventMask *sccb = (void *)_sccb;
 
     sccb->h.length = sizeof(WriteEventMask);
     sccb->mask_length = sizeof(unsigned int);
@@ -68,7 +68,7 @@ static void _memcpy(char *dest, const char *src, int len)
 void sclp_print(const char *str)
 {
     int len = _strlen(str);
-    WriteEventData *sccb = (void*)_sccb;
+    WriteEventData *sccb = (void *)_sccb;
 
     sccb->h.length = sizeof(WriteEventData) + len;
     sccb->h.function_code = SCLP_FC_NORMAL_WRITE;
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index bbb3c4f36d..31b23b086c 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -18,22 +18,22 @@ static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
 static long kvm_hypercall(unsigned long nr, unsigned long param1,
                           unsigned long param2)
 {
-	register ulong r_nr asm("1") = nr;
-	register ulong r_param1 asm("2") = param1;
-	register ulong r_param2 asm("3") = param2;
-	register long retval asm("2");
+    register ulong r_nr asm("1") = nr;
+    register ulong r_param1 asm("2") = param1;
+    register ulong r_param2 asm("3") = param2;
+    register long retval asm("2");
 
-	asm volatile ("diag 2,4,0x500"
-		      : "=d" (retval)
-		      : "d" (r_nr), "0" (r_param1), "r"(r_param2)
-		      : "memory", "cc");
+    asm volatile ("diag 2,4,0x500"
+                  : "=d" (retval)
+                  : "d" (r_nr), "0" (r_param1), "r"(r_param2)
+                  : "memory", "cc");
 
-	return retval;
+    return retval;
 }
 
 static void virtio_notify(struct subchannel_id schid)
 {
-    kvm_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY, *(u32*)&schid, 0);
+    kvm_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY, *(u32 *)&schid, 0);
 }
 
 /***********************************************
@@ -202,7 +202,7 @@ static int vring_wait_reply(struct vring *vr, int timeout)
  *               Virtio block                  *
  ***********************************************/
 
-static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
+int virtio_read_many(ulong sector, void *load_addr, int sec_num)
 {
     struct virtio_blk_outhdr out_hdr;
     u8 status;
@@ -211,12 +211,12 @@ static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
     /* Tell the host we want to read */
     out_hdr.type = VIRTIO_BLK_T_IN;
     out_hdr.ioprio = 99;
-    out_hdr.sector = sector;
+    out_hdr.sector = virtio_sector_adjust(sector);
 
     vring_send_buf(&block, &out_hdr, sizeof(out_hdr), VRING_DESC_F_NEXT);
 
     /* This is where we want to receive data */
-    vring_send_buf(&block, load_addr, SECTOR_SIZE * sec_num,
+    vring_send_buf(&block, load_addr, virtio_get_block_size() * sec_num,
                    VRING_DESC_F_WRITE | VRING_HIDDEN_IS_CHAIN |
                    VRING_DESC_F_NEXT);
 
@@ -236,7 +236,7 @@ static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
 }
 
 unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
-				 ulong subchan_id, void *load_addr)
+                                 ulong subchan_id, void *load_addr)
 {
     u8 status;
     int sec = rec_list1;
@@ -244,16 +244,16 @@ unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
     int sec_len = rec_list2 >> 48;
     ulong addr = (ulong)load_addr;
 
-    if (sec_len != SECTOR_SIZE) {
+    if (sec_len != virtio_get_block_size()) {
         return -1;
     }
 
     sclp_print(".");
-    status = virtio_read_many(sec, (void*)addr, sec_num);
+    status = virtio_read_many(sec, (void *)addr, sec_num);
     if (status) {
         virtio_panic("I/O Error");
     }
-    addr += sec_num * SECTOR_SIZE;
+    addr += sec_num * virtio_get_block_size();
 
     return addr;
 }
@@ -263,18 +263,98 @@ int virtio_read(ulong sector, void *load_addr)
     return virtio_read_many(sector, load_addr, 1);
 }
 
+static VirtioBlkConfig blk_cfg = {};
+static bool guessed_disk_nature;
+
+bool virtio_guessed_disk_nature(void)
+{
+    return guessed_disk_nature;
+}
+
+void virtio_assume_scsi(void)
+{
+    guessed_disk_nature = true;
+    blk_cfg.blk_size = 512;
+}
+
+void virtio_assume_eckd(void)
+{
+    guessed_disk_nature = true;
+    blk_cfg.blk_size = 4096;
+
+    /* this must be here to calculate code segment position */
+    blk_cfg.geometry.heads = 15;
+    blk_cfg.geometry.sectors = 12;
+}
+
+bool virtio_disk_is_scsi(void)
+{
+    if (guessed_disk_nature) {
+        return (blk_cfg.blk_size  == 512);
+    }
+    return (blk_cfg.geometry.heads == 255)
+        && (blk_cfg.geometry.sectors == 63)
+        && (blk_cfg.blk_size  == 512);
+}
+
+bool virtio_disk_is_eckd(void)
+{
+    if (guessed_disk_nature) {
+        return (blk_cfg.blk_size  == 4096);
+    }
+    return (blk_cfg.geometry.heads == 15)
+        && (blk_cfg.geometry.sectors == 12)
+        && (blk_cfg.blk_size  == 4096);
+}
+
+bool virtio_ipl_disk_is_valid(void)
+{
+    return blk_cfg.blk_size && (virtio_disk_is_scsi() || virtio_disk_is_eckd());
+}
+
+int virtio_get_block_size(void)
+{
+    return blk_cfg.blk_size;
+}
+
+uint16_t virtio_get_cylinders(void)
+{
+    return blk_cfg.geometry.cylinders;
+}
+
+uint8_t virtio_get_heads(void)
+{
+    return blk_cfg.geometry.heads;
+}
+
+uint8_t virtio_get_sectors(void)
+{
+    return blk_cfg.geometry.sectors;
+}
+
 void virtio_setup_block(struct subchannel_id schid)
 {
     struct vq_info_block info;
     struct vq_config_block config = {};
 
+    blk_cfg.blk_size = 0; /* mark "illegal" - setup started... */
+
     virtio_reset(schid);
 
+    /*
+     * Skipping CCW_CMD_READ_FEAT. We're not doing anything fancy, and
+     * we'll just stop dead anyway if anything does not work like we
+     * expect it.
+     */
+
     config.index = 0;
     if (run_ccw(schid, CCW_CMD_READ_VQ_CONF, &config, sizeof(config))) {
+        virtio_panic("Could not get block device VQ configuration\n");
+    }
+    if (run_ccw(schid, CCW_CMD_READ_CONF, &blk_cfg, sizeof(blk_cfg))) {
         virtio_panic("Could not get block device configuration\n");
     }
-    vring_init(&block, config.num, (void*)(100 * 1024 * 1024),
+    vring_init(&block, config.num, (void *)(100 * 1024 * 1024),
                KVM_S390_VIRTIO_RING_ALIGN);
 
     info.queue = (100ULL * 1024ULL* 1024ULL);
@@ -286,6 +366,12 @@ void virtio_setup_block(struct subchannel_id schid)
     if (!run_ccw(schid, CCW_CMD_SET_VQ, &info, sizeof(info))) {
         virtio_set_status(schid, VIRTIO_CONFIG_S_DRIVER_OK);
     }
+
+    if (!virtio_ipl_disk_is_valid()) {
+        /* make sure all getters but blocksize return 0 for invalid IPL disk */
+        memset(&blk_cfg, 0, sizeof(blk_cfg));
+        virtio_assume_scsi();
+    }
 }
 
 bool virtio_is_blk(struct subchannel_id schid)
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index 772a63f152..f1fb1b08fa 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -66,7 +66,7 @@ struct virtio_dev {
     char *config;
 };
 
-#define KVM_S390_VIRTIO_RING_ALIGN	4096
+#define KVM_S390_VIRTIO_RING_ALIGN  4096
 
 #define VRING_USED_F_NO_NOTIFY  1
 
@@ -161,4 +161,52 @@ struct virtio_blk_outhdr {
         u64 sector;
 };
 
+typedef struct VirtioBlkConfig {
+    u64 capacity; /* in 512-byte sectors */
+    u32 size_max; /* max segment size (if VIRTIO_BLK_F_SIZE_MAX) */
+    u32 seg_max;  /* max number of segments (if VIRTIO_BLK_F_SEG_MAX) */
+
+    struct virtio_blk_geometry {
+        u16 cylinders;
+        u8 heads;
+        u8 sectors;
+    } geometry; /* (if VIRTIO_BLK_F_GEOMETRY) */
+
+    u32 blk_size; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+
+    /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY  */
+    u8 physical_block_exp; /* exponent for physical block per logical block */
+    u8 alignment_offset;   /* alignment offset in logical blocks */
+    u16 min_io_size;       /* min I/O size without performance penalty
+                              in logical blocks */
+    u32 opt_io_size;       /* optimal sustained I/O size in logical blocks */
+
+    u8 wce; /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
+} __attribute__((packed)) VirtioBlkConfig;
+
+bool virtio_guessed_disk_nature(void);
+void virtio_assume_scsi(void);
+void virtio_assume_eckd(void);
+
+extern bool virtio_disk_is_scsi(void);
+extern bool virtio_disk_is_eckd(void);
+extern bool virtio_ipl_disk_is_valid(void);
+extern int virtio_get_block_size(void);
+extern uint16_t virtio_get_cylinders(void);
+extern uint8_t virtio_get_heads(void);
+extern uint8_t virtio_get_sectors(void);
+extern int virtio_read_many(ulong sector, void *load_addr, int sec_num);
+
+#define VIRTIO_SECTOR_SIZE 512
+
+static inline ulong virtio_eckd_sector_adjust(ulong sector)
+{
+     return sector * (virtio_get_block_size() / VIRTIO_SECTOR_SIZE);
+}
+
+static inline ulong virtio_sector_adjust(ulong sector)
+{
+    return virtio_disk_is_eckd() ? virtio_eckd_sector_adjust(sector) : sector;
+}
+
 #endif /* VIRTIO_H */
diff --git a/qapi-schema.json b/qapi-schema.json
index 2284f0f12a..a83befc05b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2050,6 +2050,62 @@
     '*udp':       'str' } }
 
 ##
+# @NetdevL2TPv3Options
+#
+# Connect the VLAN to Ethernet over L2TPv3 Static tunnel
+#
+# @src: source address
+#
+# @dst: destination address
+#
+# @srcport: #optional source port - mandatory for udp, optional for ip
+#
+# @dstport: #optional destination port - mandatory for udp, optional for ip
+#
+# @ipv6: #optional - force the use of ipv6
+#
+# @udp: #optional - use the udp version of l2tpv3 encapsulation
+#
+# @cookie64: #optional - use 64 bit coookies
+#
+# @counter: #optional have sequence counter
+#
+# @pincounter: #optional pin sequence counter to zero -
+#              workaround for buggy implementations or
+#              networks with packet reorder
+#
+# @txcookie: #optional 32 or 64 bit transmit cookie
+#
+# @rxcookie: #optional 32 or 64 bit receive cookie
+#
+# @txsession: 32 bit transmit session
+#
+# @rxsession: #optional 32 bit receive session - if not specified
+#             set to the same value as transmit
+#
+# @offset: #optional additional offset - allows the insertion of
+#          additional application-specific data before the packet payload
+#
+# Since 2.1
+##
+{ 'type': 'NetdevL2TPv3Options',
+  'data': {
+    'src':          'str',
+    'dst':          'str',
+    '*srcport':     'str',
+    '*dstport':     'str',
+    '*ipv6':        'bool',
+    '*udp':         'bool',
+    '*cookie64':    'bool',
+    '*counter':     'bool',
+    '*pincounter':  'bool',
+    '*txcookie':    'uint64',
+    '*rxcookie':    'uint64',
+    'txsession':    'uint32',
+    '*rxsession':   'uint32',
+    '*offset':      'uint32' } }
+
+##
 # @NetdevVdeOptions
 #
 # Connect the VLAN to a vde switch running on the host.
@@ -2160,6 +2216,9 @@
 # A discriminated record of network device traits.
 #
 # Since 1.2
+#
+# 'l2tpv3' - since 2.1
+#
 ##
 { 'union': 'NetClientOptions',
   'data': {
@@ -2167,6 +2226,7 @@
     'nic':      'NetLegacyNicOptions',
     'user':     'NetdevUserOptions',
     'tap':      'NetdevTapOptions',
+    'l2tpv3':   'NetdevL2TPv3Options',
     'socket':   'NetdevSocketOptions',
     'vde':      'NetdevVdeOptions',
     'dump':     'NetdevDumpOptions',
diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c
index 6a0974eb48..36eb3bcfd6 100644
--- a/qemu-bridge-helper.c
+++ b/qemu-bridge-helper.c
@@ -229,7 +229,7 @@ int main(int argc, char **argv)
     unsigned long ifargs[4];
 #endif
     int ifindex;
-    int fd, ctlfd, unixfd = -1;
+    int fd = -1, ctlfd = -1, unixfd = -1;
     int use_vnet = 0;
     int mtu;
     const char *bridge = NULL;
@@ -436,7 +436,12 @@ int main(int argc, char **argv)
     /* profit! */
 
 cleanup:
-
+    if (fd >= 0) {
+        close(fd);
+    }
+    if (ctlfd >= 0) {
+        close(ctlfd);
+    }
     while ((acl_rule = QSIMPLEQ_FIRST(&acl_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&acl_list, entry);
         g_free(acl_rule);
diff --git a/qemu-options.hx b/qemu-options.hx
index ff76ad4830..9e5468678b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1433,6 +1433,29 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "                (default=" DEFAULT_BRIDGE_INTERFACE ") using the program 'helper'\n"
     "                (default=" DEFAULT_BRIDGE_HELPER ")\n"
 #endif
+#ifdef __linux__
+    "-net l2tpv3[,vlan=n][,name=str],src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6=on/off][,udp=on/off][,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]\n"
+    "                connect the VLAN to an Ethernet over L2TPv3 pseudowire\n"
+    "                Linux kernel 3.3+ as well as most routers can talk\n"
+    "                L2TPv3. This transport allows to connect a VM to a VM,\n"
+    "                VM to a router and even VM to Host. It is a nearly-universal\n"
+    "                standard (RFC3391). Note - this implementation uses static\n"
+    "                pre-configured tunnels (same as the Linux kernel).\n"
+    "                use 'src=' to specify source address\n"
+    "                use 'dst=' to specify destination address\n"
+    "                use 'udp=on' to specify udp encapsulation\n"
+    "                use 'dstport=' to specify destination udp port\n"
+    "                use 'dstport=' to specify destination udp port\n"
+    "                use 'ipv6=on' to force v6\n"
+    "                L2TPv3 uses cookies to prevent misconfiguration as\n"
+    "                well as a weak security measure\n"
+    "                use 'rxcookie=0x012345678' to specify a rxcookie\n"
+    "                use 'txcookie=0x012345678' to specify a txcookie\n"
+    "                use 'cookie64=on' to set cookie size to 64 bit, otherwise 32\n"
+    "                use 'counter=off' to force a 'cut-down' L2TPv3 with no counter\n"
+    "                use 'pincounter=on' to work around broken counter handling in peer\n"
+    "                use 'offset=X' to add an extra offset between header and data\n"
+#endif
     "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n"
     "                connect the vlan 'n' to another VLAN using a socket connection\n"
     "-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]]\n"
@@ -1778,6 +1801,65 @@ qemu-system-i386 linux.img \
                  -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4
 @end example
 
+@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+@item -net l2tpv3[,vlan=@var{n}][,name=@var{name}],src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+Connect VLAN @var{n} to L2TPv3 pseudowire. L2TPv3 (RFC3391) is a popular
+protocol to transport Ethernet (and other Layer 2) data frames between
+two systems. It is present in routers, firewalls and the Linux kernel
+(from version 3.3 onwards).
+
+This transport allows a VM to communicate to another VM, router or firewall directly.
+
+@item src=@var{srcaddr}
+    source address (mandatory)
+@item dst=@var{dstaddr}
+    destination address (mandatory)
+@item udp
+    select udp encapsulation (default is ip).
+@item srcport=@var{srcport}
+    source udp port.
+@item dstport=@var{dstport}
+    destination udp port.
+@item ipv6
+    force v6, otherwise defaults to v4.
+@item rxcookie=@var{rxcookie}
+@item txcookie=@var{txcookie}
+    Cookies are a weak form of security in the l2tpv3 specification.
+Their function is mostly to prevent misconfiguration. By default they are 32
+bit.
+@item cookie64
+    Set cookie size to 64 bit instead of the default 32
+@item counter=off
+    Force a 'cut-down' L2TPv3 with no counter as in
+draft-mkonstan-l2tpext-keyed-ipv6-tunnel-00
+@item pincounter=on
+    Work around broken counter handling in peer. This may also help on
+networks which have packet reorder.
+@item offset=@var{offset}
+    Add an extra offset between header and data
+
+For example, to attach a VM running on host 4.3.2.1 via L2TPv3 to the bridge br-lan
+on the remote Linux host 1.2.3.4:
+@example
+# Setup tunnel on linux host using raw ip as encapsulation
+# on 1.2.3.4
+ip l2tp add tunnel remote 4.3.2.1 local 1.2.3.4 tunnel_id 1 peer_tunnel_id 1 \
+    encap udp udp_sport 16384 udp_dport 16384
+ip l2tp add session tunnel_id 1 name vmtunnel0 session_id \
+    0xFFFFFFFF peer_session_id 0xFFFFFFFF
+ifconfig vmtunnel0 mtu 1500
+ifconfig vmtunnel0 up
+brctl addif br-lan vmtunnel0
+
+
+# on 4.3.2.1
+# launch QEMU instance - if your network has reorder or is very lossy add ,pincounter
+
+qemu-system-i386 linux.img -net nic -net l2tpv3,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter
+
+
+@end example
+
 @item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
 @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
 Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and
diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index 97a81d8660..9a91af9dbf 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1138,6 +1138,8 @@
                 "POWER7 v2.3")
     POWERPC_DEF("POWER7+_v2.1",  CPU_POWERPC_POWER7P_v21,            POWER7P,
                 "POWER7+ v2.1")
+    POWERPC_DEF("POWER8E_v1.0",  CPU_POWERPC_POWER8E_v10,            POWER8E,
+                "POWER8E v1.0")
     POWERPC_DEF("POWER8_v1.0",   CPU_POWERPC_POWER8_v10,             POWER8,
                 "POWER8 v1.0")
     POWERPC_DEF("970",           CPU_POWERPC_970,                    970,
@@ -1386,6 +1388,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
     { "POWER5gs", "POWER5+" },
     { "POWER7", "POWER7_v2.3" },
     { "POWER7+", "POWER7+_v2.1" },
+    { "POWER8E", "POWER8E_v1.0" },
     { "POWER8", "POWER8_v1.0" },
     { "970fx", "970fx_v3.1" },
     { "970mp", "970mp_v1.1" },
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index db75896012..c39d03a504 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -559,9 +559,12 @@ enum {
     CPU_POWERPC_POWER7P_BASE       = 0x004A0000,
     CPU_POWERPC_POWER7P_MASK       = 0xFFFF0000,
     CPU_POWERPC_POWER7P_v21        = 0x004A0201,
-    CPU_POWERPC_POWER8_BASE        = 0x004B0000,
+    CPU_POWERPC_POWER8E_BASE       = 0x004B0000,
+    CPU_POWERPC_POWER8E_MASK       = 0xFFFF0000,
+    CPU_POWERPC_POWER8E_v10        = 0x004B0100,
+    CPU_POWERPC_POWER8_BASE        = 0x004D0000,
     CPU_POWERPC_POWER8_MASK        = 0xFFFF0000,
-    CPU_POWERPC_POWER8_v10         = 0x004B0100,
+    CPU_POWERPC_POWER8_v10         = 0x004D0100,
     CPU_POWERPC_970                = 0x00390202,
     CPU_POWERPC_970FX_v10          = 0x00391100,
     CPU_POWERPC_970FX_v20          = 0x003C0200,
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 13c7031265..f1f0a52998 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -119,7 +119,9 @@ void ppc_cpu_dump_statistics(CPUState *cpu, FILE *f,
                              fprintf_function cpu_fprintf, int flags);
 hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int ppc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
+int ppc_cpu_gdb_read_register_apple(CPUState *cpu, uint8_t *buf, int reg);
 int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
 int ppc64_cpu_write_elf64_qemunote(WriteCoreDumpFunction f,
                                    CPUState *cpu, void *opaque);
 int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 74407ee209..08ae527cb6 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2012,7 +2012,7 @@ enum {
                         PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | \
                         PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
                         PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
-                        PPC2_ALTIVEC_207 | PPC2_ISA207S)
+                        PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP)
 };
 
 /*****************************************************************************/
diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c
index 381a3c7e31..694d303e19 100644
--- a/target-ppc/gdbstub.c
+++ b/target-ppc/gdbstub.c
@@ -21,6 +21,31 @@
 #include "qemu-common.h"
 #include "exec/gdbstub.h"
 
+static int ppc_gdb_register_len_apple(int n)
+{
+    switch (n) {
+    case 0 ... 31:
+        /* gprs */
+        return 8;
+    case 32 ... 63:
+        /* fprs */
+        return 8;
+    case 64 ... 95:
+        return 16;
+    case 64+32: /* nip */
+    case 65+32: /* msr */
+    case 67+32: /* lr */
+    case 68+32: /* ctr */
+    case 69+32: /* xer */
+    case 70+32: /* fpscr */
+        return 8;
+    case 66+32: /* cr */
+        return 4;
+    default:
+        return 0;
+    }
+}
+
 static int ppc_gdb_register_len(int n)
 {
     switch (n) {
@@ -132,6 +157,65 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
     return r;
 }
 
+int ppc_cpu_gdb_read_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    int r = ppc_gdb_register_len_apple(n);
+
+    if (!r) {
+        return r;
+    }
+
+    if (n < 32) {
+        /* gprs */
+        gdb_get_reg64(mem_buf, env->gpr[n]);
+    } else if (n < 64) {
+        /* fprs */
+        stfq_p(mem_buf, env->fpr[n-32]);
+    } else if (n < 96) {
+        /* Altivec */
+        stq_p(mem_buf, n - 64);
+        stq_p(mem_buf + 8, 0);
+    } else {
+        switch (n) {
+        case 64 + 32:
+            gdb_get_reg64(mem_buf, env->nip);
+            break;
+        case 65 + 32:
+            gdb_get_reg64(mem_buf, env->msr);
+            break;
+        case 66 + 32:
+            {
+                uint32_t cr = 0;
+                int i;
+                for (i = 0; i < 8; i++) {
+                    cr |= env->crf[i] << (32 - ((i + 1) * 4));
+                }
+                gdb_get_reg32(mem_buf, cr);
+                break;
+            }
+        case 67 + 32:
+            gdb_get_reg64(mem_buf, env->lr);
+            break;
+        case 68 + 32:
+            gdb_get_reg64(mem_buf, env->ctr);
+            break;
+        case 69 + 32:
+            gdb_get_reg64(mem_buf, env->xer);
+            break;
+        case 70 + 32:
+            gdb_get_reg64(mem_buf, env->fpscr);
+            break;
+        }
+    }
+    if (msr_le) {
+        /* If cpu is in LE mode, convert memory contents to LE. */
+        ppc_gdb_swap_register(mem_buf, n, r);
+    }
+    return r;
+}
+
 int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -185,3 +269,56 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     }
     return r;
 }
+int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    int r = ppc_gdb_register_len_apple(n);
+
+    if (!r) {
+        return r;
+    }
+    if (msr_le) {
+        /* If cpu is in LE mode, convert memory contents to LE. */
+        ppc_gdb_swap_register(mem_buf, n, r);
+    }
+    if (n < 32) {
+        /* gprs */
+        env->gpr[n] = ldq_p(mem_buf);
+    } else if (n < 64) {
+        /* fprs */
+        env->fpr[n-32] = ldfq_p(mem_buf);
+    } else {
+        switch (n) {
+        case 64 + 32:
+            env->nip = ldq_p(mem_buf);
+            break;
+        case 65 + 32:
+            ppc_store_msr(env, ldq_p(mem_buf));
+            break;
+        case 66 + 32:
+            {
+                uint32_t cr = ldl_p(mem_buf);
+                int i;
+                for (i = 0; i < 8; i++) {
+                    env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
+                }
+                break;
+            }
+        case 67 + 32:
+            env->lr = ldq_p(mem_buf);
+            break;
+        case 68 + 32:
+            env->ctr = ldq_p(mem_buf);
+            break;
+        case 69 + 32:
+            env->xer = ldq_p(mem_buf);
+            break;
+        case 70 + 32:
+            /* fpscr */
+            store_fpscr(env, ldq_p(mem_buf), 0xffffffff);
+            break;
+        }
+    }
+    return r;
+}
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 561f8ccf2f..2d87108d8b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -63,6 +63,7 @@ static int cap_ppc_smt;
 static int cap_ppc_rma;
 static int cap_spapr_tce;
 static int cap_spapr_multitce;
+static int cap_spapr_vfio;
 static int cap_hior;
 static int cap_one_reg;
 static int cap_epr;
@@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s)
     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
+    cap_spapr_vfio = false;
     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
@@ -1660,7 +1662,8 @@ bool kvmppc_spapr_use_multitce(void)
     return cap_spapr_multitce;
 }
 
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+                              bool vfio_accel)
 {
     struct kvm_create_spapr_tce args = {
         .liobn = liobn,
@@ -1674,7 +1677,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
      * destroying the table, which the upper layers -will- do
      */
     *pfd = -1;
-    if (!cap_spapr_tce) {
+    if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
         return NULL;
     }
 
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 412cc7f3c1..1118122d89 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -33,7 +33,8 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
 #ifndef CONFIG_USER_ONLY
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
 bool kvmppc_spapr_use_multitce(void);
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+                              bool vfio_accel);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
 int kvmppc_reset_htab(int shift_hint);
 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
@@ -144,7 +145,8 @@ static inline bool kvmppc_spapr_use_multitce(void)
 }
 
 static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
-                                            uint32_t window_size, int *fd)
+                                            uint32_t window_size, int *fd,
+                                            bool vfio_accel)
 {
     return NULL;
 }
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 48017219a4..b23933f7bd 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -426,7 +426,6 @@ static inline uint32_t SPR(uint32_t opcode)
     return ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5);
 }
 /***                              Get constants                            ***/
-EXTRACT_HELPER(IMM, 12, 8);
 /* 16 bits signed immediate value */
 EXTRACT_SHELPER(SIMM, 0, 16);
 /* 16 bits unsigned immediate value */
@@ -459,8 +458,6 @@ EXTRACT_HELPER(FPFLM, 17, 8);
 EXTRACT_HELPER(FPW, 16, 1);
 
 /***                            Jump target decoding                       ***/
-/* Displacement */
-EXTRACT_SHELPER(d, 0, 16);
 /* Immediate address */
 static inline target_ulong LI(uint32_t opcode)
 {
@@ -2665,11 +2662,6 @@ static inline void gen_qemu_ld8u(DisasContext *ctx, TCGv arg1, TCGv arg2)
     tcg_gen_qemu_ld8u(arg1, arg2, ctx->mem_idx);
 }
 
-static inline void gen_qemu_ld8s(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    tcg_gen_qemu_ld8s(arg1, arg2, ctx->mem_idx);
-}
-
 static inline void gen_qemu_ld16u(DisasContext *ctx, TCGv arg1, TCGv arg2)
 {
     TCGMemOp op = MO_UW | ctx->default_tcg_memop_mask;
@@ -4123,8 +4115,9 @@ static void gen_mcrxr(DisasContext *ctx)
     tcg_gen_trunc_tl_i32(t0, cpu_so);
     tcg_gen_trunc_tl_i32(t1, cpu_ov);
     tcg_gen_trunc_tl_i32(dst, cpu_ca);
-    tcg_gen_shri_i32(t0, t0, 2);
-    tcg_gen_shri_i32(t1, t1, 1);
+    tcg_gen_shli_i32(t0, t0, 3);
+    tcg_gen_shli_i32(t1, t1, 2);
+    tcg_gen_shli_i32(dst, dst, 1);
     tcg_gen_or_i32(dst, dst, t0);
     tcg_gen_or_i32(dst, dst, t1);
     tcg_temp_free_i32(t0);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 85581c9537..a3bb336e16 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -34,6 +34,7 @@
 //#define PPC_DUMP_CPU
 //#define PPC_DEBUG_SPR
 //#define PPC_DUMP_SPR_ACCESSES
+/* #define USE_APPLE_GDB */
 
 /* For user-mode emulation, we don't emulate any IRQ controller */
 #if defined(CONFIG_USER_ONLY)
@@ -8188,16 +8189,16 @@ static void init_proc_POWER8(CPUPPCState *env)
     init_proc_book3s_64(env, BOOK3S_CPU_POWER8);
 }
 
-POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+POWERPC_FAMILY(POWER8E)(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
 
     dc->fw_name = "PowerPC,POWER8";
-    dc->desc = "POWER8";
+    dc->desc = "POWER8E";
     dc->props = powerpc_servercpu_properties;
-    pcc->pvr = CPU_POWERPC_POWER8_BASE;
-    pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
+    pcc->pvr = CPU_POWERPC_POWER8E_BASE;
+    pcc->pvr_mask = CPU_POWERPC_POWER8E_MASK;
     pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
     pcc->init_proc = init_proc_POWER8;
     pcc->check_pow = check_pow_nocheck;
@@ -8251,6 +8252,18 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->l1_icache_size = 0x8000;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
 }
+
+POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    ppc_POWER8E_cpu_family_class_init(oc, data);
+
+    dc->desc = "POWER8";
+    pcc->pvr = CPU_POWERPC_POWER8_BASE;
+    pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
+}
 #endif /* defined (TARGET_PPC64) */
 
 
@@ -9667,6 +9680,13 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
 #endif
 
     cc->gdb_num_core_regs = 71;
+
+#ifdef USE_APPLE_GDB
+    cc->gdb_read_register = ppc_cpu_gdb_read_register_apple;
+    cc->gdb_write_register = ppc_cpu_gdb_write_register_apple;
+    cc->gdb_num_core_regs = 71 + 32;
+#endif
+
 #if defined(TARGET_PPC64)
     cc->gdb_core_xml_file = "power64-core.xml";
 #else
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index dd84e76127..203027eb3e 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -1719,6 +1719,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 # define LINK_AREA_SIZE                (6 * SZR)
 # define LR_OFFSET                     (1 * SZR)
 # define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
+#elif defined(TCG_TARGET_CALL_DARWIN)
+# define LINK_AREA_SIZE                (6 * SZR)
+# define LR_OFFSET                     (2 * SZR)
 #elif TCG_TARGET_REG_BITS == 64
 # if defined(_CALL_ELF) && _CALL_ELF == 2
 #  define LINK_AREA_SIZE               (4 * SZR)
@@ -1728,9 +1731,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 # if defined(_CALL_SYSV)
 #  define LINK_AREA_SIZE               (2 * SZR)
 #  define LR_OFFSET                    (1 * SZR)
-# elif defined(TCG_TARGET_CALL_DARWIN)
-#  define LINK_AREA_SIZE               24
-#  define LR_OFFSET                    8
 # endif
 #endif
 #ifndef LR_OFFSET
diff --git a/trace-events b/trace-events
index ba01ad52cf..d071b97dd7 100644
--- a/trace-events
+++ b/trace-events
@@ -1169,12 +1169,13 @@ qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride,
 qxl_render_update_area_done(void *cookie) "%p"
 
 # hw/ppc/spapr_pci.c
-spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)"
+spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=%x)"
 spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64
-spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u"
+spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr %x func %u, requested %u, first irq %u"
 spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
 spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u"
 spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at %x asked %u, have only %u"
 
 # hw/intc/xics.c
 xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x"
@@ -1188,6 +1189,12 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
 xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
 xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
 xics_ics_eoi(int nr) "ics_eoi: irq %#x"
+xics_alloc(int src, int irq) "source#%d, irq %d"
+xics_alloc_failed_hint(int src, int irq) "source#%d, irq %d is already in use"
+xics_alloc_failed_no_left(int src) "source#%d, no irq left"
+xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d"
+xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs"
+xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free"
 
 # hw/ppc/spapr.c
 spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes"
diff --git a/vmstate.c b/vmstate.c
index c9965205df..ef2f87bdad 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -43,11 +43,18 @@ static int vmstate_size(void *opaque, VMStateField *field)
     return size;
 }
 
-static void *vmstate_base_addr(void *opaque, VMStateField *field)
+static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
 {
     void *base_addr = opaque + field->offset;
 
     if (field->flags & VMS_POINTER) {
+        if (alloc && (field->flags & VMS_ALLOC)) {
+            int n_elems = vmstate_n_elems(opaque, field);
+            if (n_elems) {
+                gsize size = n_elems * field->size;
+                *((void **)base_addr + field->start) = g_malloc(size);
+            }
+        }
         base_addr = *(void **)base_addr + field->start;
     }
 
@@ -81,7 +88,7 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
              field->field_exists(opaque, version_id)) ||
             (!field->field_exists &&
              field->version_id <= version_id)) {
-            void *base_addr = vmstate_base_addr(opaque, field);
+            void *base_addr = vmstate_base_addr(opaque, field, true);
             int i, n_elems = vmstate_n_elems(opaque, field);
             int size = vmstate_size(opaque, field);
 
@@ -135,7 +142,7 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
     while (field->name) {
         if (!field->field_exists ||
             field->field_exists(opaque, vmsd->version_id)) {
-            void *base_addr = vmstate_base_addr(opaque, field);
+            void *base_addr = vmstate_base_addr(opaque, field, false);
             int i, n_elems = vmstate_n_elems(opaque, field);
             int size = vmstate_size(opaque, field);