summary refs log tree commit diff stats
path: root/hw/pci
diff options
context:
space:
mode:
Diffstat (limited to 'hw/pci')
-rw-r--r--hw/pci/pci.c89
-rw-r--r--hw/pci/pci_host.c4
-rw-r--r--hw/pci/pcie_sriov.c409
-rw-r--r--hw/pci/trace-events2
4 files changed, 139 insertions, 365 deletions
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 8ad5d7e2d8..fab86d0567 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -85,7 +85,6 @@ static Property pci_props[] = {
                     QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
     DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
                     QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
-    DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -734,17 +733,10 @@ static bool migrate_is_not_pcie(void *opaque, int version_id)
     return !pci_is_express((PCIDevice *)opaque);
 }
 
-static int pci_post_load(void *opaque, int version_id)
-{
-    pcie_sriov_pf_post_load(opaque);
-    return 0;
-}
-
 const VMStateDescription vmstate_pci_device = {
     .name = "PCIDevice",
     .version_id = 2,
     .minimum_version_id = 1,
-    .post_load = pci_post_load,
     .fields = (const VMStateField[]) {
         VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
         VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
@@ -960,8 +952,13 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
         dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
     }
 
-    /* SR/IOV is not handled here. */
-    if (pci_is_vf(dev)) {
+    /*
+     * With SR/IOV and ARI, a device at function 0 need not be a multifunction
+     * device, as it may just be a VF that ended up with function 0 in
+     * the legacy PCI interpretation. Avoid failing in such cases:
+     */
+    if (pci_is_vf(dev) &&
+        dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
         return;
     }
 
@@ -994,8 +991,7 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
     }
     /* function 0 indicates single function, so function > 0 must be NULL */
     for (func = 1; func < PCI_FUNC_MAX; ++func) {
-        PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
-        if (device && !pci_is_vf(device)) {
+        if (bus->devices[PCI_DEVFN(slot, func)]) {
             error_setg(errp, "PCI: %x.0 indicates single function, "
                        "but %x.%x is already populated.",
                        slot, slot, func);
@@ -1280,7 +1276,6 @@ static void pci_qdev_unrealize(DeviceState *dev)
 
     pci_unregister_io_regions(pci_dev);
     pci_del_option_rom(pci_dev);
-    pcie_sriov_unregister_device(pci_dev);
 
     if (pc->exit) {
         pc->exit(pci_dev);
@@ -1312,6 +1307,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     pcibus_t size = memory_region_size(memory);
     uint8_t hdr_type;
 
+    assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
     assert(region_num >= 0);
     assert(region_num < PCI_NUM_REGIONS);
     assert(is_power_of_2(size));
@@ -1322,6 +1318,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
     assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
 
     r = &pci_dev->io_regions[region_num];
+    r->addr = PCI_BAR_UNMAPPED;
     r->size = size;
     r->type = type;
     r->memory = memory;
@@ -1329,35 +1326,22 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
                         ? pci_get_bus(pci_dev)->address_space_io
                         : pci_get_bus(pci_dev)->address_space_mem;
 
-    if (pci_is_vf(pci_dev)) {
-        PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
-        assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
-
-        r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
-        if (r->addr != PCI_BAR_UNMAPPED) {
-            memory_region_add_subregion_overlap(r->address_space,
-                                                r->addr, r->memory, 1);
-        }
-    } else {
-        r->addr = PCI_BAR_UNMAPPED;
-
-        wmask = ~(size - 1);
-        if (region_num == PCI_ROM_SLOT) {
-            /* ROM enable bit is writable */
-            wmask |= PCI_ROM_ADDRESS_ENABLE;
-        }
+    wmask = ~(size - 1);
+    if (region_num == PCI_ROM_SLOT) {
+        /* ROM enable bit is writable */
+        wmask |= PCI_ROM_ADDRESS_ENABLE;
+    }
 
-        addr = pci_bar(pci_dev, region_num);
-        pci_set_long(pci_dev->config + addr, type);
+    addr = pci_bar(pci_dev, region_num);
+    pci_set_long(pci_dev->config + addr, type);
 
-        if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
-            r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
-            pci_set_quad(pci_dev->wmask + addr, wmask);
-            pci_set_quad(pci_dev->cmask + addr, ~0ULL);
-        } else {
-            pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
-            pci_set_long(pci_dev->cmask + addr, 0xffffffff);
-        }
+    if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+        r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+        pci_set_quad(pci_dev->wmask + addr, wmask);
+        pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+    } else {
+        pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
+        pci_set_long(pci_dev->cmask + addr, 0xffffffff);
     }
 }
 
@@ -1446,11 +1430,7 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
             pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
         uint16_t vf_stride =
             pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
-        uint32_t vf_num = d->devfn - (pf->devfn + vf_offset);
-
-        if (vf_num) {
-            vf_num /= vf_stride;
-        }
+        uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
 
         if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
             new_addr = pci_get_quad(pf->config + bar);
@@ -1545,7 +1525,7 @@ static void pci_update_mappings(PCIDevice *d)
             continue;
 
         new_addr = pci_bar_address(d, i, r->type, r->size);
-        if (!d->enabled) {
+        if (!d->has_power) {
             new_addr = PCI_BAR_UNMAPPED;
         }
 
@@ -1633,7 +1613,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
         pci_update_irq_disabled(d, was_irq_disabled);
         memory_region_set_enabled(&d->bus_master_enable_region,
                                   (pci_get_word(d->config + PCI_COMMAND)
-                                   & PCI_COMMAND_MASTER) && d->enabled);
+                                   & PCI_COMMAND_MASTER) && d->has_power);
     }
 
     msi_write_config(d, addr, val_in, l);
@@ -2118,11 +2098,6 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         }
     }
 
-    if (!pcie_sriov_register_device(pci_dev, errp)) {
-        pci_qdev_unrealize(DEVICE(pci_dev));
-        return;
-    }
-
     /*
      * A PCIe Downstream Port that do not have ARI Forwarding enabled must
      * associate only Device 0 with the device attached to the bus
@@ -2909,18 +2884,18 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
     return msg;
 }
 
-void pci_set_enabled(PCIDevice *d, bool state)
+void pci_set_power(PCIDevice *d, bool state)
 {
-    if (d->enabled == state) {
+    if (d->has_power == state) {
         return;
     }
 
-    d->enabled = state;
+    d->has_power = state;
     pci_update_mappings(d);
     memory_region_set_enabled(&d->bus_master_enable_region,
                               (pci_get_word(d->config + PCI_COMMAND)
-                               & PCI_COMMAND_MASTER) && d->enabled);
-    if (d->qdev.realized) {
+                               & PCI_COMMAND_MASTER) && d->has_power);
+    if (!d->has_power) {
         pci_device_reset(d);
     }
 }
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 0d82727cc9..dfe6fe6184 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
      * allowing direct removal of unexposed functions.
      */
     if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
-        !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
+        !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
         return;
     }
 
@@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
      * allowing direct removal of unexposed functions.
      */
     if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
-        !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
+        !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
         return ~0x0;
     }
 
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 0fc9f810b9..e9b23221d7 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,90 +20,23 @@
 #include "qapi/error.h"
 #include "trace.h"
 
-static GHashTable *pfs;
+static PCIDevice *register_vf(PCIDevice *pf, int devfn,
+                              const char *name, uint16_t vf_num);
+static void unregister_vfs(PCIDevice *dev);
 
-static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
-{
-    for (uint16_t i = 0; i < total_vfs; i++) {
-        PCIDevice *vf = dev->exp.sriov_pf.vf[i];
-        object_unparent(OBJECT(vf));
-        object_unref(OBJECT(vf));
-    }
-    g_free(dev->exp.sriov_pf.vf);
-    dev->exp.sriov_pf.vf = NULL;
-}
-
-static void clear_ctrl_vfe(PCIDevice *dev)
-{
-    uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
-    pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
-}
-
-static void register_vfs(PCIDevice *dev)
-{
-    uint16_t num_vfs;
-    uint16_t i;
-    uint16_t sriov_cap = dev->exp.sriov_cap;
-
-    assert(sriov_cap > 0);
-    num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
-    if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
-        clear_ctrl_vfe(dev);
-        return;
-    }
-
-    trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
-                             PCI_FUNC(dev->devfn), num_vfs);
-    for (i = 0; i < num_vfs; i++) {
-        pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
-    }
-}
-
-static void unregister_vfs(PCIDevice *dev)
-{
-    uint16_t i;
-    uint8_t *cfg = dev->config + dev->exp.sriov_cap;
-
-    trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
-                               PCI_FUNC(dev->devfn));
-    for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
-        pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
-    }
-}
-
-static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
-                                      uint16_t vf_dev_id, uint16_t init_vfs,
-                                      uint16_t total_vfs, uint16_t vf_offset,
-                                      uint16_t vf_stride, Error **errp)
+void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+                        const char *vfname, uint16_t vf_dev_id,
+                        uint16_t init_vfs, uint16_t total_vfs,
+                        uint16_t vf_offset, uint16_t vf_stride)
 {
     uint8_t *cfg = dev->config + offset;
     uint8_t *wmask;
 
-    if (!pci_is_express(dev)) {
-        error_setg(errp, "PCI Express is required for SR-IOV PF");
-        return false;
-    }
-
-    if (pci_is_vf(dev)) {
-        error_setg(errp, "a device cannot be both an SR-IOV PF and a VF");
-        return false;
-    }
-
-    if (total_vfs) {
-        uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI);
-        uint16_t first_vf_devfn = dev->devfn + vf_offset;
-        uint16_t last_vf_devfn = first_vf_devfn + vf_stride * (total_vfs - 1);
-
-        if ((!ari_cap && PCI_SLOT(dev->devfn) != PCI_SLOT(last_vf_devfn)) ||
-            last_vf_devfn >= PCI_DEVFN_MAX) {
-            error_setg(errp, "VF function number overflows");
-            return false;
-        }
-    }
-
     pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
                         offset, PCI_EXT_CAP_SRIOV_SIZEOF);
     dev->exp.sriov_cap = offset;
+    dev->exp.sriov_pf.num_vfs = 0;
+    dev->exp.sriov_pf.vfname = g_strdup(vfname);
     dev->exp.sriov_pf.vf = NULL;
 
     pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -136,76 +69,13 @@ static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
     pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
 
     qdev_prop_set_bit(&dev->qdev, "multifunction", true);
-
-    return true;
-}
-
-bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
-                        const char *vfname, uint16_t vf_dev_id,
-                        uint16_t init_vfs, uint16_t total_vfs,
-                        uint16_t vf_offset, uint16_t vf_stride,
-                        Error **errp)
-{
-    BusState *bus = qdev_get_parent_bus(&dev->qdev);
-    int32_t devfn = dev->devfn + vf_offset;
-
-    if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
-        error_setg(errp, "attaching user-created SR-IOV VF unsupported");
-        return false;
-    }
-
-    if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs,
-                                   total_vfs, vf_offset, vf_stride, errp)) {
-        return false;
-    }
-
-    dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
-
-    for (uint16_t i = 0; i < total_vfs; i++) {
-        PCIDevice *vf = pci_new(devfn, vfname);
-        vf->exp.sriov_vf.pf = dev;
-        vf->exp.sriov_vf.vf_number = i;
-
-        if (!qdev_realize(&vf->qdev, bus, errp)) {
-            object_unparent(OBJECT(vf));
-            object_unref(vf);
-            unparent_vfs(dev, i);
-            return false;
-        }
-
-        /* set vid/did according to sr/iov spec - they are not used */
-        pci_config_set_vendor_id(vf->config, 0xffff);
-        pci_config_set_device_id(vf->config, 0xffff);
-
-        dev->exp.sriov_pf.vf[i] = vf;
-        devfn += vf_stride;
-    }
-
-    return true;
 }
 
 void pcie_sriov_pf_exit(PCIDevice *dev)
 {
-    uint8_t *cfg = dev->config + dev->exp.sriov_cap;
-
-    if (dev->exp.sriov_pf.vf_user_created) {
-        uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
-        uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF);
-        uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID);
-
-        unregister_vfs(dev);
-
-        for (uint16_t i = 0; i < total_vfs; i++) {
-            PCIDevice *vf = dev->exp.sriov_pf.vf[i];
-
-            vf->exp.sriov_vf.pf = NULL;
-
-            pci_config_set_vendor_id(vf->config, ven_id);
-            pci_config_set_device_id(vf->config, vf_dev_id);
-        }
-    } else {
-        unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
-    }
+    unregister_vfs(dev);
+    g_free((char *)dev->exp.sriov_pf.vfname);
+    dev->exp.sriov_pf.vfname = NULL;
 }
 
 void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -238,173 +108,113 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
 void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
                                 MemoryRegion *memory)
 {
+    PCIIORegion *r;
+    PCIBus *bus = pci_get_bus(dev);
     uint8_t type;
+    pcibus_t size = memory_region_size(memory);
 
-    assert(dev->exp.sriov_vf.pf);
+    assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
+    assert(region_num >= 0);
+    assert(region_num < PCI_NUM_REGIONS);
     type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
 
-    return pci_register_bar(dev, region_num, type, memory);
-}
-
-static gint compare_vf_devfns(gconstpointer a, gconstpointer b)
-{
-    return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
-}
-
-int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
-                                                 uint16_t offset,
-                                                 Error **errp)
-{
-    GPtrArray *pf;
-    PCIDevice **vfs;
-    BusState *bus = qdev_get_parent_bus(DEVICE(dev));
-    uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
-    uint16_t vf_dev_id;
-    uint16_t vf_offset;
-    uint16_t vf_stride;
-    uint16_t i;
-
-    if (!pfs || !dev->qdev.id) {
-        return 0;
-    }
-
-    pf = g_hash_table_lookup(pfs, dev->qdev.id);
-    if (!pf) {
-        return 0;
-    }
-
-    if (pf->len > UINT16_MAX) {
-        error_setg(errp, "too many VFs");
-        return -1;
-    }
-
-    g_ptr_array_sort(pf, compare_vf_devfns);
-    vfs = (void *)pf->pdata;
-
-    if (vfs[0]->devfn <= dev->devfn) {
-        error_setg(errp, "a VF function number is less than the PF function number");
-        return -1;
-    }
-
-    vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID);
-    vf_offset = vfs[0]->devfn - dev->devfn;
-    vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn;
-
-    for (i = 0; i < pf->len; i++) {
-        if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) {
-            error_setg(errp, "SR-IOV VF parent bus mismatches with PF");
-            return -1;
-        }
-
-        if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) {
-            error_setg(errp, "SR-IOV VF vendor ID mismatches with PF");
-            return -1;
-        }
-
-        if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) {
-            error_setg(errp, "inconsistent SR-IOV VF device IDs");
-            return -1;
-        }
-
-        for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
-            if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size ||
-                vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) {
-                error_setg(errp, "inconsistent SR-IOV BARs");
-                return -1;
-            }
-        }
-
-        if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) {
-            error_setg(errp, "inconsistent SR-IOV stride");
-            return -1;
-        }
-    }
-
-    if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len,
-                                   pf->len, vf_offset, vf_stride, errp)) {
-        return -1;
+    if (!is_power_of_2(size)) {
+        error_report("%s: PCI region size must be a power"
+                     " of two - type=0x%x, size=0x%"FMT_PCIBUS,
+                     __func__, type, size);
+        exit(1);
     }
 
-    for (i = 0; i < pf->len; i++) {
-        vfs[i]->exp.sriov_vf.pf = dev;
-        vfs[i]->exp.sriov_vf.vf_number = i;
-
-        /* set vid/did according to sr/iov spec - they are not used */
-        pci_config_set_vendor_id(vfs[i]->config, 0xffff);
-        pci_config_set_device_id(vfs[i]->config, 0xffff);
-    }
-
-    dev->exp.sriov_pf.vf = vfs;
-    dev->exp.sriov_pf.vf_user_created = true;
-
-    for (i = 0; i < PCI_NUM_REGIONS; i++) {
-        PCIIORegion *region = &vfs[0]->io_regions[i];
-
-        if (region->size) {
-            pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size);
-        }
+    r = &dev->io_regions[region_num];
+    r->memory = memory;
+    r->address_space =
+        type & PCI_BASE_ADDRESS_SPACE_IO
+        ? bus->address_space_io
+        : bus->address_space_mem;
+    r->size = size;
+    r->type = type;
+
+    r->addr = pci_bar_address(dev, region_num, r->type, r->size);
+    if (r->addr != PCI_BAR_UNMAPPED) {
+        memory_region_add_subregion_overlap(r->address_space,
+                                            r->addr, r->memory, 1);
     }
-
-    return PCI_EXT_CAP_SRIOV_SIZEOF;
 }
 
-bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
+static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
+                              uint16_t vf_num)
 {
-    if (!dev->exp.sriov_pf.vf && dev->qdev.id &&
-        pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
-        error_setg(errp, "attaching user-created SR-IOV VF unsupported");
-        return false;
+    PCIDevice *dev = pci_new(devfn, name);
+    dev->exp.sriov_vf.pf = pf;
+    dev->exp.sriov_vf.vf_number = vf_num;
+    PCIBus *bus = pci_get_bus(pf);
+    Error *local_err = NULL;
+
+    qdev_realize(&dev->qdev, &bus->qbus, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return NULL;
     }
 
-    if (dev->sriov_pf) {
-        PCIDevice *pci_pf;
-        GPtrArray *pf;
+    /* set vid/did according to sr/iov spec - they are not used */
+    pci_config_set_vendor_id(dev->config, 0xffff);
+    pci_config_set_device_id(dev->config, 0xffff);
 
-        if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) {
-            error_setg(errp, "user cannot create SR-IOV VF with this device type");
-            return false;
-        }
+    return dev;
+}
 
-        if (!pci_is_express(dev)) {
-            error_setg(errp, "PCI Express is required for SR-IOV VF");
-            return false;
-        }
+static void register_vfs(PCIDevice *dev)
+{
+    uint16_t num_vfs;
+    uint16_t i;
+    uint16_t sriov_cap = dev->exp.sriov_cap;
+    uint16_t vf_offset =
+        pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
+    uint16_t vf_stride =
+        pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
+    int32_t devfn = dev->devfn + vf_offset;
 
-        if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) {
-            error_setg(errp, "PCI device specified as SR-IOV PF already exists");
-            return false;
-        }
+    assert(sriov_cap > 0);
+    num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+    if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
+        return;
+    }
 
-        if (!pfs) {
-            pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
-        }
+    dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
 
-        pf = g_hash_table_lookup(pfs, dev->sriov_pf);
-        if (!pf) {
-            pf = g_ptr_array_new();
-            g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf);
+    trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
+                             PCI_FUNC(dev->devfn), num_vfs);
+    for (i = 0; i < num_vfs; i++) {
+        dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
+                                              dev->exp.sriov_pf.vfname, i);
+        if (!dev->exp.sriov_pf.vf[i]) {
+            num_vfs = i;
+            break;
         }
-
-        g_ptr_array_add(pf, dev);
+        devfn += vf_stride;
     }
-
-    return true;
+    dev->exp.sriov_pf.num_vfs = num_vfs;
 }
 
-void pcie_sriov_unregister_device(PCIDevice *dev)
+static void unregister_vfs(PCIDevice *dev)
 {
-    if (dev->sriov_pf && pfs) {
-        GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf);
-
-        if (pf) {
-            g_ptr_array_remove_fast(pf, dev);
+    uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
+    uint16_t i;
 
-            if (!pf->len) {
-                g_hash_table_remove(pfs, dev->sriov_pf);
-                g_ptr_array_free(pf, FALSE);
-            }
+    trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
+                               PCI_FUNC(dev->devfn), num_vfs);
+    for (i = 0; i < num_vfs; i++) {
+        Error *err = NULL;
+        PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+        if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
+            error_reportf_err(err, "Failed to unplug: ");
         }
+        object_unparent(OBJECT(vf));
+        object_unref(OBJECT(vf));
     }
+    g_free(dev->exp.sriov_pf.vf);
+    dev->exp.sriov_pf.vf = NULL;
+    dev->exp.sriov_pf.num_vfs = 0;
 }
 
 void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
@@ -425,21 +235,15 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
                              PCI_FUNC(dev->devfn), off, val, len);
 
     if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
-        if (val & PCI_SRIOV_CTRL_VFE) {
-            register_vfs(dev);
+        if (dev->exp.sriov_pf.num_vfs) {
+            if (!(val & PCI_SRIOV_CTRL_VFE)) {
+                unregister_vfs(dev);
+            }
         } else {
-            unregister_vfs(dev);
+            if (val & PCI_SRIOV_CTRL_VFE) {
+                register_vfs(dev);
+            }
         }
-    } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
-        clear_ctrl_vfe(dev);
-        unregister_vfs(dev);
-    }
-}
-
-void pcie_sriov_pf_post_load(PCIDevice *dev)
-{
-    if (dev->exp.sriov_cap) {
-        register_vfs(dev);
     }
 }
 
@@ -490,7 +294,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
 
 uint16_t pcie_sriov_vf_number(PCIDevice *dev)
 {
-    assert(dev->exp.sriov_vf.pf);
+    assert(pci_is_vf(dev));
     return dev->exp.sriov_vf.vf_number;
 }
 
@@ -502,7 +306,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
 PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
 {
     assert(!pci_is_vf(dev));
-    if (n < pcie_sriov_num_vfs(dev)) {
+    if (n < dev->exp.sriov_pf.num_vfs) {
         return dev->exp.sriov_pf.vf[n];
     }
     return NULL;
@@ -510,10 +314,5 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
 
 uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
 {
-    uint16_t sriov_cap = dev->exp.sriov_cap;
-    uint8_t *cfg = dev->config + sriov_cap;
-
-    return sriov_cap &&
-           (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ?
-           pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0;
+    return dev->exp.sriov_pf.num_vfs;
 }
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index e98f575a9d..19643aa8c6 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask
 
 # hw/pci/pcie_sriov.c
 sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
-sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs"
+sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
 sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
 
 # pcie.c