summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--hw/vfio/pci-quirks.c114
-rw-r--r--hw/vfio/pci.c17
-rw-r--r--hw/vfio/pci.h4
-rw-r--r--include/qom/object.h11
-rw-r--r--include/sysemu/iothread.h9
-rw-r--r--iothread.c46
-rw-r--r--qom/object.c11
-rw-r--r--util/aio-posix.c9
8 files changed, 208 insertions, 13 deletions
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 349085ea12..14291c2a16 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -14,6 +14,7 @@
 #include "qemu/error-report.h"
 #include "qemu/range.h"
 #include "qapi/error.h"
+#include "qapi/visitor.h"
 #include "hw/nvram/fw_cfg.h"
 #include "pci.h"
 #include "trace.h"
@@ -1850,3 +1851,116 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
         break;
     }
 }
+
+/*
+ * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
+ * devices as a member of a clique.  Devices within the same clique ID
+ * are capable of direct P2P.  It's the user's responsibility that this
+ * is correct.  The spec says that this may reside at any unused config
+ * offset, but reserves and recommends hypervisors place this at C8h.
+ * The spec also states that the hypervisor should place this capability
+ * at the end of the capability list, thus next is defined as 0h.
+ *
+ * +----------------+----------------+----------------+----------------+
+ * | sig 7:0 ('P')  |  vndr len (8h) |    next (0h)   |   cap id (9h)  |
+ * +----------------+----------------+----------------+----------------+
+ * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)|          sig 23:8 ('P2')        |
+ * +---------------------------------+---------------------------------+
+ *
+ * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
+ */
+static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
+
+    visit_type_uint8(v, name, ptr, errp);
+}
+
+static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop);
+    Error *local_err = NULL;
+
+    if (dev->realized) {
+        qdev_prop_set_after_realize(dev, name, errp);
+        return;
+    }
+
+    visit_type_uint8(v, name, &value, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (value & ~0xF) {
+        error_setg(errp, "Property %s: valid range 0-15", name);
+        return;
+    }
+
+    *ptr = value;
+}
+
+const PropertyInfo qdev_prop_nv_gpudirect_clique = {
+    .name = "uint4",
+    .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
+    .get = get_nv_gpudirect_clique_id,
+    .set = set_nv_gpudirect_clique_id,
+};
+
+static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    int ret, pos = 0xC8;
+
+    if (vdev->nv_gpudirect_clique == 0xFF) {
+        return 0;
+    }
+
+    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
+        error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
+        return -EINVAL;
+    }
+
+    if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
+        PCI_BASE_CLASS_DISPLAY) {
+        error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
+        return -EINVAL;
+    }
+
+    ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
+    if (ret < 0) {
+        error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
+        return ret;
+    }
+
+    memset(vdev->emulated_config_bits + pos, 0xFF, 8);
+    pos += PCI_CAP_FLAGS;
+    pci_set_byte(pdev->config + pos++, 8);
+    pci_set_byte(pdev->config + pos++, 'P');
+    pci_set_byte(pdev->config + pos++, '2');
+    pci_set_byte(pdev->config + pos++, 'P');
+    pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
+    pci_set_byte(pdev->config + pos, 0);
+
+    return 0;
+}
+
+int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
+{
+    int ret;
+
+    ret = vfio_add_nv_gpudirect_cap(vdev, errp);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 31e1edf447..9e86db7c3b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1826,15 +1826,23 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
     if (next) {
         ret = vfio_add_std_cap(vdev, next, errp);
         if (ret) {
-            goto out;
+            return ret;
         }
     } else {
         /* Begin the rebuild, use QEMU emulated list bits */
         pdev->config[PCI_CAPABILITY_LIST] = 0;
         vdev->emulated_config_bits[PCI_CAPABILITY_LIST] = 0xff;
         vdev->emulated_config_bits[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+
+        ret = vfio_add_virt_caps(vdev, errp);
+        if (ret) {
+            return ret;
+        }
     }
 
+    /* Scale down size, esp in case virt caps were added above */
+    size = MIN(size, vfio_std_cap_max_size(pdev, pos));
+
     /* Use emulated next pointer to allow dropping caps */
     pci_set_byte(vdev->emulated_config_bits + pos + PCI_CAP_LIST_NEXT, 0xff);
 
@@ -1862,7 +1870,7 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
         ret = pci_add_capability(pdev, cap_id, pos, size, errp);
         break;
     }
-out:
+
     if (ret < 0) {
         error_prepend(errp,
                       "failed to add PCI capability 0x%x[0x%x]@0x%x: ",
@@ -2962,6 +2970,8 @@ static void vfio_instance_init(Object *obj)
     vdev->host.bus = ~0U;
     vdev->host.slot = ~0U;
     vdev->host.function = ~0U;
+
+    vdev->nv_gpudirect_clique = 0xFF;
 }
 
 static Property vfio_pci_dev_properties[] = {
@@ -2986,6 +2996,9 @@ static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
                        sub_device_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
+    DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice,
+                                   nv_gpudirect_clique,
+                                   qdev_prop_nv_gpudirect_clique, uint8_t),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index a8366bb2a7..502a5755b9 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice {
     int32_t bootindex;
     uint32_t igd_gms;
     uint8_t pm_cap;
+    uint8_t nv_gpudirect_clique;
     bool pci_aer;
     bool req_enabled;
     bool has_flr;
@@ -160,6 +161,9 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr);
 void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr);
 void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr);
 void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
+int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
+
+extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
 
 int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
 
diff --git a/include/qom/object.h b/include/qom/object.h
index f3e5cff37a..e0d9824415 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -1214,6 +1214,17 @@ Object *object_get_root(void);
 Object *object_get_objects_root(void);
 
 /**
+ * object_get_internal_root:
+ *
+ * Get the container object that holds internally used object
+ * instances.  Any object which is put into this container must not be
+ * user visible, and it will not be exposed in the QOM tree.
+ *
+ * Returns: the internal object container
+ */
+Object *object_get_internal_root(void);
+
+/**
  * object_get_canonical_path_component:
  *
  * Returns: The final component in the object's canonical path.  The canonical
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index d2985b30ba..110329b2b4 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -46,4 +46,13 @@ AioContext *iothread_get_aio_context(IOThread *iothread);
 void iothread_stop_all(void);
 GMainContext *iothread_get_g_main_context(IOThread *iothread);
 
+/*
+ * Helpers used to allocate iothreads for internal use.  These
+ * iothreads will not be seen by monitor clients when query using
+ * "query-iothreads".
+ */
+IOThread *iothread_create(const char *id, Error **errp);
+void iothread_stop(IOThread *iothread);
+void iothread_destroy(IOThread *iothread);
+
 #endif /* IOTHREAD_H */
diff --git a/iothread.c b/iothread.c
index 59d0850988..27a4288578 100644
--- a/iothread.c
+++ b/iothread.c
@@ -71,8 +71,6 @@ static void *iothread_run(void *opaque)
             g_main_loop_unref(loop);
 
             g_main_context_pop_thread_default(iothread->worker_context);
-            g_main_context_unref(iothread->worker_context);
-            iothread->worker_context = NULL;
         }
     }
 
@@ -80,13 +78,10 @@ static void *iothread_run(void *opaque)
     return NULL;
 }
 
-static int iothread_stop(Object *object, void *opaque)
+void iothread_stop(IOThread *iothread)
 {
-    IOThread *iothread;
-
-    iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
-    if (!iothread || !iothread->ctx || iothread->stopping) {
-        return 0;
+    if (!iothread->ctx || iothread->stopping) {
+        return;
     }
     iothread->stopping = true;
     aio_notify(iothread->ctx);
@@ -94,6 +89,17 @@ static int iothread_stop(Object *object, void *opaque)
         g_main_loop_quit(iothread->main_loop);
     }
     qemu_thread_join(&iothread->thread);
+}
+
+static int iothread_stop_iter(Object *object, void *opaque)
+{
+    IOThread *iothread;
+
+    iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
+    if (!iothread) {
+        return 0;
+    }
+    iothread_stop(iothread);
     return 0;
 }
 
@@ -108,7 +114,11 @@ static void iothread_instance_finalize(Object *obj)
 {
     IOThread *iothread = IOTHREAD(obj);
 
-    iothread_stop(obj, NULL);
+    iothread_stop(iothread);
+    if (iothread->worker_context) {
+        g_main_context_unref(iothread->worker_context);
+        iothread->worker_context = NULL;
+    }
     qemu_cond_destroy(&iothread->init_done_cond);
     qemu_mutex_destroy(&iothread->init_done_lock);
     if (!iothread->ctx) {
@@ -328,7 +338,7 @@ void iothread_stop_all(void)
         aio_context_release(ctx);
     }
 
-    object_child_foreach(container, iothread_stop, NULL);
+    object_child_foreach(container, iothread_stop_iter, NULL);
 }
 
 static gpointer iothread_g_main_context_init(gpointer opaque)
@@ -354,3 +364,19 @@ GMainContext *iothread_get_g_main_context(IOThread *iothread)
 
     return iothread->worker_context;
 }
+
+IOThread *iothread_create(const char *id, Error **errp)
+{
+    Object *obj;
+
+    obj = object_new_with_props(TYPE_IOTHREAD,
+                                object_get_internal_root(),
+                                id, errp, NULL);
+
+    return IOTHREAD(obj);
+}
+
+void iothread_destroy(IOThread *iothread)
+{
+    object_unparent(OBJECT(iothread));
+}
diff --git a/qom/object.c b/qom/object.c
index 3e18537e9b..6a7bd9257b 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -1370,6 +1370,17 @@ Object *object_get_objects_root(void)
     return container_get(object_get_root(), "/objects");
 }
 
+Object *object_get_internal_root(void)
+{
+    static Object *internal_root;
+
+    if (!internal_root) {
+        internal_root = object_new("container");
+    }
+
+    return internal_root;
+}
+
 static void object_get_child_property(Object *obj, Visitor *v,
                                       const char *name, void *opaque,
                                       Error **errp)
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 2d51239ec6..5946ac09f0 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -223,7 +223,14 @@ void aio_set_fd_handler(AioContext *ctx,
             return;
         }
 
-        g_source_remove_poll(&ctx->source, &node->pfd);
+        /* If the GSource is in the process of being destroyed then
+         * g_source_remove_poll() causes an assertion failure.  Skip
+         * removal in that case, because glib cleans up its state during
+         * destruction anyway.
+         */
+        if (!g_source_is_destroyed(&ctx->source)) {
+            g_source_remove_poll(&ctx->source, &node->pfd);
+        }
 
         /* If the lock is held, just mark the node as deleted */
         if (qemu_lockcnt_count(&ctx->list_lock)) {