summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/ppc/spapr.c16
-rw-r--r--hw/ppc/spapr_hcall.c10
-rw-r--r--hw/ppc/spapr_pci.c156
-rw-r--r--hw/ppc/spapr_pci_vfio.c131
-rw-r--r--hw/ppc/spapr_rng.c4
-rw-r--r--hw/vfio/common.c112
6 files changed, 241 insertions, 188 deletions
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 298171a205..79a70a9c0f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1091,7 +1091,7 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
         }
 
         spapr->htab_shift = shift;
-        kvmppc_kern_htab = true;
+        spapr->htab = NULL;
     } else {
         /* kernel-side HPT not needed, allocate in userspace instead */
         size_t size = 1ULL << shift;
@@ -1106,7 +1106,6 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
 
         memset(spapr->htab, 0, size);
         spapr->htab_shift = shift;
-        kvmppc_kern_htab = false;
 
         for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
             DIRTY_HPTE(HPTE(spapr->htab, i));
@@ -1196,17 +1195,8 @@ static void spapr_cpu_reset(void *opaque)
 
     env->spr[SPR_HIOR] = 0;
 
-    env->external_htab = (uint8_t *)spapr->htab;
-    env->htab_base = -1;
-    /*
-     * htab_mask is the mask used to normalize hash value to PTEG index.
-     * htab_shift is log2 of hash table size.
-     * We have 8 hpte per group, and each hpte is 16 bytes.
-     * ie have 128 bytes per hpte entry.
-     */
-    env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1;
-    env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
-        (spapr->htab_shift - 18);
+    ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
+                                &error_fatal);
 }
 
 static void spapr_create_nvram(sPAPRMachineState *spapr)
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 1733482de6..b2b1b93cfd 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -122,17 +122,17 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                 break;
             }
         }
-        ppc_hash64_stop_access(token);
+        ppc_hash64_stop_access(cpu, token);
         if (index == 8) {
             return H_PTEG_FULL;
         }
     } else {
         token = ppc_hash64_start_access(cpu, pte_index);
         if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
-            ppc_hash64_stop_access(token);
+            ppc_hash64_stop_access(cpu, token);
             return H_PTEG_FULL;
         }
-        ppc_hash64_stop_access(token);
+        ppc_hash64_stop_access(cpu, token);
     }
 
     ppc_hash64_store_hpte(cpu, pte_index + index,
@@ -165,7 +165,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
     token = ppc_hash64_start_access(cpu, ptex);
     v = ppc_hash64_load_hpte0(cpu, token, 0);
     r = ppc_hash64_load_hpte1(cpu, token, 0);
-    ppc_hash64_stop_access(token);
+    ppc_hash64_stop_access(cpu, token);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -288,7 +288,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     token = ppc_hash64_start_access(cpu, pte_index);
     v = ppc_hash64_load_hpte0(cpu, token, 0);
     r = ppc_hash64_load_hpte1(cpu, token, 0);
-    ppc_hash64_stop_access(token);
+    ppc_hash64_stop_access(cpu, token);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 3fc78955ec..79baa7b177 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -42,6 +42,8 @@
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 
+#include "hw/vfio/vfio.h"
+
 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
 #define RTAS_QUERY_FN           0
 #define RTAS_CHANGE_FN          1
@@ -440,7 +442,6 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
                                     target_ulong rets)
 {
     sPAPRPHBState *sphb;
-    sPAPRPHBClass *spc;
     uint32_t addr, option;
     uint64_t buid;
     int ret;
@@ -458,12 +459,11 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
         goto param_error_exit;
     }
 
-    spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-    if (!spc->eeh_set_option) {
+    if (!spapr_phb_eeh_available(sphb)) {
         goto param_error_exit;
     }
 
-    ret = spc->eeh_set_option(sphb, addr, option);
+    ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option);
     rtas_st(rets, 0, ret);
     return;
 
@@ -478,7 +478,6 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
                                            target_ulong rets)
 {
     sPAPRPHBState *sphb;
-    sPAPRPHBClass *spc;
     PCIDevice *pdev;
     uint32_t addr, option;
     uint64_t buid;
@@ -493,8 +492,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
         goto param_error_exit;
     }
 
-    spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-    if (!spc->eeh_set_option) {
+    if (!spapr_phb_eeh_available(sphb)) {
         goto param_error_exit;
     }
 
@@ -534,7 +532,6 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
                                             target_ulong rets)
 {
     sPAPRPHBState *sphb;
-    sPAPRPHBClass *spc;
     uint64_t buid;
     int state, ret;
 
@@ -548,12 +545,11 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
         goto param_error_exit;
     }
 
-    spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-    if (!spc->eeh_get_state) {
+    if (!spapr_phb_eeh_available(sphb)) {
         goto param_error_exit;
     }
 
-    ret = spc->eeh_get_state(sphb, &state);
+    ret = spapr_phb_vfio_eeh_get_state(sphb, &state);
     rtas_st(rets, 0, ret);
     if (ret != RTAS_OUT_SUCCESS) {
         return;
@@ -578,7 +574,6 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
                                     target_ulong rets)
 {
     sPAPRPHBState *sphb;
-    sPAPRPHBClass *spc;
     uint32_t option;
     uint64_t buid;
     int ret;
@@ -594,12 +589,11 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
         goto param_error_exit;
     }
 
-    spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-    if (!spc->eeh_reset) {
+    if (!spapr_phb_eeh_available(sphb)) {
         goto param_error_exit;
     }
 
-    ret = spc->eeh_reset(sphb, option);
+    ret = spapr_phb_vfio_eeh_reset(sphb, option);
     rtas_st(rets, 0, ret);
     return;
 
@@ -614,7 +608,6 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
                                   target_ulong rets)
 {
     sPAPRPHBState *sphb;
-    sPAPRPHBClass *spc;
     uint64_t buid;
     int ret;
 
@@ -628,12 +621,11 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
         goto param_error_exit;
     }
 
-    spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-    if (!spc->eeh_configure) {
+    if (!spapr_phb_eeh_available(sphb)) {
         goto param_error_exit;
     }
 
-    ret = spc->eeh_configure(sphb);
+    ret = spapr_phb_vfio_eeh_configure(sphb);
     rtas_st(rets, 0, ret);
     return;
 
@@ -649,7 +641,6 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
                                        target_ulong rets)
 {
     sPAPRPHBState *sphb;
-    sPAPRPHBClass *spc;
     int option;
     uint64_t buid;
 
@@ -663,8 +654,7 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
         goto param_error_exit;
     }
 
-    spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-    if (!spc->eeh_set_option) {
+    if (!spapr_phb_eeh_available(sphb)) {
         goto param_error_exit;
     }
 
@@ -1142,14 +1132,21 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
     drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
 }
 
-static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
-                                               PCIDevice *pdev)
+static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb,
+                                                    uint32_t busnr,
+                                                    int32_t devfn)
 {
-    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
     return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
                                     (phb->index << 16) |
                                     (busnr << 8) |
-                                    pdev->devfn);
+                                    devfn);
+}
+
+static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
+                                               PCIDevice *pdev)
+{
+    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
+    return spapr_phb_get_pci_func_drc(phb, busnr, pdev->devfn);
 }
 
 static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
@@ -1173,6 +1170,8 @@ static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
     PCIDevice *pdev = PCI_DEVICE(plugged_dev);
     sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
     Error *local_err = NULL;
+    PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
+    uint32_t slotnr = PCI_SLOT(pdev->devfn);
 
     /* if DR is disabled we don't need to do anything in the case of
      * hotplug or coldplug callbacks
@@ -1190,13 +1189,44 @@ static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
 
     g_assert(drc);
 
+    /* Following the QEMU convention used for PCIe multifunction
+     * hotplug, we do not allow functions to be hotplugged to a
+     * slot that already has function 0 present
+     */
+    if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
+        PCI_FUNC(pdev->devfn) != 0) {
+        error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
+                   " additional functions can no longer be exposed to guest.",
+                   slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
+        return;
+    }
+
     spapr_phb_add_pci_device(drc, phb, pdev, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
     }
-    if (plugged_dev->hotplugged) {
-        spapr_hotplug_req_add_by_index(drc);
+
+    /* If this is function 0, signal hotplug for all the device functions.
+     * Otherwise defer sending the hotplug event.
+     */
+    if (plugged_dev->hotplugged && PCI_FUNC(pdev->devfn) == 0) {
+        int i;
+
+        for (i = 0; i < 8; i++) {
+            sPAPRDRConnector *func_drc;
+            sPAPRDRConnectorClass *func_drck;
+            sPAPRDREntitySense state;
+
+            func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
+                                                  PCI_DEVFN(slotnr, i));
+            func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+            func_drck->entity_sense(func_drc, &state);
+
+            if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
+                spapr_hotplug_req_add_by_index(func_drc);
+            }
+        }
     }
 }
 
@@ -1219,12 +1249,51 @@ static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler,
 
     drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     if (!drck->release_pending(drc)) {
+        PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
+        uint32_t slotnr = PCI_SLOT(pdev->devfn);
+        sPAPRDRConnector *func_drc;
+        sPAPRDRConnectorClass *func_drck;
+        sPAPRDREntitySense state;
+        int i;
+
+        /* ensure any other present functions are pending unplug */
+        if (PCI_FUNC(pdev->devfn) == 0) {
+            for (i = 1; i < 8; i++) {
+                func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
+                                                      PCI_DEVFN(slotnr, i));
+                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+                func_drck->entity_sense(func_drc, &state);
+                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
+                    && !func_drck->release_pending(func_drc)) {
+                    error_setg(errp,
+                               "PCI: slot %d, function %d still present. "
+                               "Must unplug all non-0 functions first.",
+                               slotnr, i);
+                    return;
+                }
+            }
+        }
+
         spapr_phb_remove_pci_device(drc, phb, pdev, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             return;
         }
-        spapr_hotplug_req_remove_by_index(drc);
+
+        /* if this isn't func 0, defer unplug event. otherwise signal removal
+         * for all present functions
+         */
+        if (PCI_FUNC(pdev->devfn) == 0) {
+            for (i = 7; i >= 0; i--) {
+                func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
+                                                      PCI_DEVFN(slotnr, i));
+                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+                func_drck->entity_sense(func_drc, &state);
+                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
+                    spapr_hotplug_req_remove_by_index(func_drc);
+                }
+            }
+        }
     }
 }
 
@@ -1234,11 +1303,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     SysBusDevice *s = SYS_BUS_DEVICE(dev);
     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
     PCIHostState *phb = PCI_HOST_BRIDGE(s);
-    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
     char *namebuf;
     int i;
     PCIBus *bus;
     uint64_t msi_window_size = 4096;
+    sPAPRTCETable *tcet;
+    uint32_t nb_table;
 
     if (sphb->index != (uint32_t)-1) {
         hwaddr windows_base;
@@ -1390,33 +1460,20 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    if (!info->finish_realize) {
-        error_setg(errp, "finish_realize not defined");
-        return;
-    }
-
-    info->finish_realize(sphb, errp);
-
-    sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
-}
-
-static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
-{
-    sPAPRTCETable *tcet;
-    uint32_t nb_table;
-
     nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT;
     tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
                                0, SPAPR_TCE_PAGE_SHIFT, nb_table, false);
     if (!tcet) {
         error_setg(errp, "Unable to create TCE table for %s",
                    sphb->dtbusname);
-        return ;
+        return;
     }
 
     /* Register default 32bit DMA window */
     memory_region_add_subregion(&sphb->iommu_root, sphb->dma_win_addr,
                                 spapr_tce_get_iommu(tcet));
+
+    sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
 }
 
 static int spapr_phb_children_reset(Object *child, void *opaque)
@@ -1434,6 +1491,10 @@ static void spapr_phb_reset(DeviceState *qdev)
 {
     /* Reset the IOMMU state */
     object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
+
+    if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) {
+        spapr_phb_vfio_reset(qdev);
+    }
 }
 
 static Property spapr_phb_properties[] = {
@@ -1553,7 +1614,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
 {
     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
-    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
     HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
 
     hc->root_bus_path = spapr_phb_root_bus_path;
@@ -1563,7 +1623,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
     dc->vmsd = &vmstate_spapr_pci;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->cannot_instantiate_with_device_add_yet = false;
-    spc->finish_realize = spapr_phb_finish_realize;
     hp->plug = spapr_phb_hot_plug_child;
     hp->unplug = spapr_phb_hot_unplug_child;
 }
@@ -1573,7 +1632,6 @@ static const TypeInfo spapr_phb_info = {
     .parent        = TYPE_PCI_HOST_BRIDGE,
     .instance_size = sizeof(sPAPRPHBState),
     .class_init    = spapr_phb_class_init,
-    .class_size    = sizeof(sPAPRPHBClass),
     .interfaces    = (InterfaceInfo[]) {
         { TYPE_HOTPLUG_HANDLER },
         { }
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 2f3752ea18..9e15924f50 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -23,68 +23,42 @@
 #include "hw/pci/msix.h"
 #include "linux/vfio.h"
 #include "hw/vfio/vfio.h"
+#include "qemu/error-report.h"
 
-static Property spapr_phb_vfio_properties[] = {
-    DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
-    DEFINE_PROP_END_OF_LIST(),
-};
+#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
 
-static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
-{
-    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-    struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
-    int ret;
-    sPAPRTCETable *tcet;
-    uint32_t liobn = svphb->phb.dma_liobn;
+#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
+    OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
 
-    if (svphb->iommugroupid == -1) {
-        error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
-        return;
-    }
+typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
 
-    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
-                               VFIO_CHECK_EXTENSION,
-                               (void *) VFIO_SPAPR_TCE_IOMMU);
-    if (ret != 1) {
-        error_setg_errno(errp, -ret,
-                         "spapr-vfio: SPAPR extension is not supported");
-        return;
-    }
+struct sPAPRPHBVFIOState {
+    sPAPRPHBState phb;
 
-    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
-                               VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
-    if (ret) {
-        error_setg_errno(errp, -ret,
-                         "spapr-vfio: get info from container failed");
-        return;
-    }
+    int32_t iommugroupid;
+};
 
-    tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
-                               SPAPR_TCE_PAGE_SHIFT,
-                               info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT,
-                               true);
-    if (!tcet) {
-        error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
-        return;
-    }
+static Property spapr_phb_vfio_properties[] = {
+    DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
 
-    /* Register default 32bit DMA window */
-    memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
-                                spapr_tce_get_iommu(tcet));
+static void spapr_phb_vfio_instance_init(Object *obj)
+{
+    error_report("spapr-pci-vfio-host-bridge is deprecated");
 }
 
-static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb)
+bool spapr_phb_eeh_available(sPAPRPHBState *sphb)
 {
-    struct vfio_eeh_pe_op op = {
-        .argsz = sizeof(op),
-        .op    = VFIO_EEH_PE_ENABLE
-    };
+    return vfio_eeh_as_ok(&sphb->iommu_as);
+}
 
-    vfio_container_ioctl(&svphb->phb.iommu_as,
-                         svphb->iommugroupid, VFIO_EEH_PE_OP, &op);
+static void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb)
+{
+    vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
 }
 
-static void spapr_phb_vfio_reset(DeviceState *qdev)
+void spapr_phb_vfio_reset(DeviceState *qdev)
 {
     /*
      * The PE might be in frozen state. To reenable the EEH
@@ -92,19 +66,18 @@ static void spapr_phb_vfio_reset(DeviceState *qdev)
      * ensures that the contained PCI devices will work properly
      * after reboot.
      */
-    spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev));
+    spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
 }
 
-static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
-                                         unsigned int addr, int option)
+int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
+                                  unsigned int addr, int option)
 {
-    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-    struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
+    uint32_t op;
     int ret;
 
     switch (option) {
     case RTAS_EEH_DISABLE:
-        op.op = VFIO_EEH_PE_DISABLE;
+        op = VFIO_EEH_PE_DISABLE;
         break;
     case RTAS_EEH_ENABLE: {
         PCIHostState *phb;
@@ -122,21 +95,20 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
             return RTAS_OUT_PARAM_ERROR;
         }
 
-        op.op = VFIO_EEH_PE_ENABLE;
+        op = VFIO_EEH_PE_ENABLE;
         break;
     }
     case RTAS_EEH_THAW_IO:
-        op.op = VFIO_EEH_PE_UNFREEZE_IO;
+        op = VFIO_EEH_PE_UNFREEZE_IO;
         break;
     case RTAS_EEH_THAW_DMA:
-        op.op = VFIO_EEH_PE_UNFREEZE_DMA;
+        op = VFIO_EEH_PE_UNFREEZE_DMA;
         break;
     default:
         return RTAS_OUT_PARAM_ERROR;
     }
 
-    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
-                               VFIO_EEH_PE_OP, &op);
+    ret = vfio_eeh_as_op(&sphb->iommu_as, op);
     if (ret < 0) {
         return RTAS_OUT_HW_ERROR;
     }
@@ -144,15 +116,11 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
     return RTAS_OUT_SUCCESS;
 }
 
-static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state)
+int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state)
 {
-    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-    struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
     int ret;
 
-    op.op = VFIO_EEH_PE_GET_STATE;
-    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
-                               VFIO_EEH_PE_OP, &op);
+    ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
     if (ret < 0) {
         return RTAS_OUT_PARAM_ERROR;
     }
@@ -204,30 +172,28 @@ static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState *sphb)
        pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
 }
 
-static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
+int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
 {
-    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-    struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
+    uint32_t op;
     int ret;
 
     switch (option) {
     case RTAS_SLOT_RESET_DEACTIVATE:
-        op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+        op = VFIO_EEH_PE_RESET_DEACTIVATE;
         break;
     case RTAS_SLOT_RESET_HOT:
         spapr_phb_vfio_eeh_pre_reset(sphb);
-        op.op = VFIO_EEH_PE_RESET_HOT;
+        op = VFIO_EEH_PE_RESET_HOT;
         break;
     case RTAS_SLOT_RESET_FUNDAMENTAL:
         spapr_phb_vfio_eeh_pre_reset(sphb);
-        op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
+        op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
         break;
     default:
         return RTAS_OUT_PARAM_ERROR;
     }
 
-    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
-                               VFIO_EEH_PE_OP, &op);
+    ret = vfio_eeh_as_op(&sphb->iommu_as, op);
     if (ret < 0) {
         return RTAS_OUT_HW_ERROR;
     }
@@ -235,15 +201,11 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
     return RTAS_OUT_SUCCESS;
 }
 
-static int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb)
+int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb)
 {
-    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-    struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
     int ret;
 
-    op.op = VFIO_EEH_PE_CONFIGURE;
-    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
-                               VFIO_EEH_PE_OP, &op);
+    ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
     if (ret < 0) {
         return RTAS_OUT_PARAM_ERROR;
     }
@@ -254,23 +216,16 @@ static int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb)
 static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
 
     dc->props = spapr_phb_vfio_properties;
-    dc->reset = spapr_phb_vfio_reset;
-    spc->finish_realize = spapr_phb_vfio_finish_realize;
-    spc->eeh_set_option = spapr_phb_vfio_eeh_set_option;
-    spc->eeh_get_state = spapr_phb_vfio_eeh_get_state;
-    spc->eeh_reset = spapr_phb_vfio_eeh_reset;
-    spc->eeh_configure = spapr_phb_vfio_eeh_configure;
 }
 
 static const TypeInfo spapr_phb_vfio_info = {
     .name          = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
     .parent        = TYPE_SPAPR_PCI_HOST_BRIDGE,
     .instance_size = sizeof(sPAPRPHBVFIOState),
+    .instance_init = spapr_phb_vfio_instance_init,
     .class_init    = spapr_phb_vfio_class_init,
-    .class_size    = sizeof(sPAPRPHBClass),
 };
 
 static void spapr_pci_vfio_register_types(void)
diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c
index a39d472b66..02d6be49f5 100644
--- a/hw/ppc/spapr_rng.c
+++ b/hw/ppc/spapr_rng.c
@@ -77,13 +77,13 @@ static target_ulong h_random(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     hrdata.val.v64 = 0;
     hrdata.received = 0;
 
-    qemu_mutex_unlock_iothread();
     while (hrdata.received < 8) {
         rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received,
                                     random_recv, &hrdata);
+        qemu_mutex_unlock_iothread();
         qemu_sem_wait(&hrdata.sem);
+        qemu_mutex_lock_iothread();
     }
-    qemu_mutex_lock_iothread();
 
     qemu_sem_destroy(&hrdata.sem);
     args[0] = hrdata.val.v64;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 96ccb797fe..fb588d8d8e 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1093,47 +1093,97 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
     return 0;
 }
 
-static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid,
-                                   int req, void *param)
+/*
+ * Interfaces for IBM EEH (Enhanced Error Handling)
+ */
+static bool vfio_eeh_container_ok(VFIOContainer *container)
 {
-    VFIOGroup *group;
-    VFIOContainer *container;
-    int ret = -1;
+    /*
+     * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
+     * implementation is broken if there are multiple groups in a
+     * container.  The hardware works in units of Partitionable
+     * Endpoints (== IOMMU groups) and the EEH operations naively
+     * iterate across all groups in the container, without any logic
+     * to make sure the groups have their state synchronized.  For
+     * certain operations (ENABLE) that might be ok, until an error
+     * occurs, but for others (GET_STATE) it's clearly broken.
+     */
 
-    group = vfio_get_group(groupid, as);
-    if (!group) {
-        error_report("vfio: group %d not registered", groupid);
-        return ret;
+    /*
+     * XXX Once fixed kernels exist, test for them here
+     */
+
+    if (QLIST_EMPTY(&container->group_list)) {
+        return false;
     }
 
-    container = group->container;
-    if (group->container) {
-        ret = ioctl(container->fd, req, param);
-        if (ret < 0) {
-            error_report("vfio: failed to ioctl %d to container: ret=%d, %s",
-                         _IOC_NR(req) - VFIO_BASE, ret, strerror(errno));
-        }
+    if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
+        return false;
     }
 
-    vfio_put_group(group);
+    return true;
+}
 
-    return ret;
+static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
+{
+    struct vfio_eeh_pe_op pe_op = {
+        .argsz = sizeof(pe_op),
+        .op = op,
+    };
+    int ret;
+
+    if (!vfio_eeh_container_ok(container)) {
+        error_report("vfio/eeh: EEH_PE_OP 0x%x: "
+                     "kernel requires a container with exactly one group", op);
+        return -EPERM;
+    }
+
+    ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
+    if (ret < 0) {
+        error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
+        return -errno;
+    }
+
+    return 0;
 }
 
-int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
-                         int req, void *param)
+static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
 {
-    /* We allow only certain ioctls to the container */
-    switch (req) {
-    case VFIO_CHECK_EXTENSION:
-    case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
-    case VFIO_EEH_PE_OP:
-        break;
-    default:
-        /* Return an error on unknown requests */
-        error_report("vfio: unsupported ioctl %X", req);
-        return -1;
+    VFIOAddressSpace *space = vfio_get_address_space(as);
+    VFIOContainer *container = NULL;
+
+    if (QLIST_EMPTY(&space->containers)) {
+        /* No containers to act on */
+        goto out;
+    }
+
+    container = QLIST_FIRST(&space->containers);
+
+    if (QLIST_NEXT(container, next)) {
+        /* We don't yet have logic to synchronize EEH state across
+         * multiple containers */
+        container = NULL;
+        goto out;
     }
 
-    return vfio_container_do_ioctl(as, groupid, req, param);
+out:
+    vfio_put_address_space(space);
+    return container;
+}
+
+bool vfio_eeh_as_ok(AddressSpace *as)
+{
+    VFIOContainer *container = vfio_eeh_as_container(as);
+
+    return (container != NULL) && vfio_eeh_container_ok(container);
+}
+
+int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
+{
+    VFIOContainer *container = vfio_eeh_as_container(as);
+
+    if (!container) {
+        return -ENODEV;
+    }
+    return vfio_eeh_container_op(container, op);
 }