diff options
| -rw-r--r-- | hw/vfio/pci-quirks.c | 114 | ||||
| -rw-r--r-- | hw/vfio/pci.c | 17 | ||||
| -rw-r--r-- | hw/vfio/pci.h | 4 | ||||
| -rw-r--r-- | include/qom/object.h | 11 | ||||
| -rw-r--r-- | include/sysemu/iothread.h | 9 | ||||
| -rw-r--r-- | iothread.c | 46 | ||||
| -rw-r--r-- | qom/object.c | 11 | ||||
| -rw-r--r-- | util/aio-posix.c | 9 |
8 files changed, 208 insertions, 13 deletions
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 349085ea12..14291c2a16 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -14,6 +14,7 @@ #include "qemu/error-report.h" #include "qemu/range.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include "hw/nvram/fw_cfg.h" #include "pci.h" #include "trace.h" @@ -1850,3 +1851,116 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) break; } } + +/* + * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify + * devices as a member of a clique. Devices within the same clique ID + * are capable of direct P2P. It's the user's responsibility that this + * is correct. The spec says that this may reside at any unused config + * offset, but reserves and recommends hypervisors place this at C8h. + * The spec also states that the hypervisor should place this capability + * at the end of the capability list, thus next is defined as 0h. + * + * +----------------+----------------+----------------+----------------+ + * | sig 7:0 ('P') | vndr len (8h) | next (0h) | cap id (9h) | + * +----------------+----------------+----------------+----------------+ + * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)| sig 23:8 ('P2') | + * +---------------------------------+---------------------------------+ + * + * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf + */ +static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + uint8_t *ptr = qdev_get_prop_ptr(dev, prop); + + visit_type_uint8(v, name, ptr, errp); +} + +static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop); + Error *local_err = NULL; + + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; + } + + visit_type_uint8(v, name, &value, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (value & ~0xF) { + error_setg(errp, "Property %s: valid range 0-15", name); + return; + } + + *ptr = value; +} + +const PropertyInfo qdev_prop_nv_gpudirect_clique = { + .name = "uint4", + .description = "NVIDIA GPUDirect Clique ID (0 - 15)", + .get = get_nv_gpudirect_clique_id, + .set = set_nv_gpudirect_clique_id, +}; + +static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) +{ + PCIDevice *pdev = &vdev->pdev; + int ret, pos = 0xC8; + + if (vdev->nv_gpudirect_clique == 0xFF) { + return 0; + } + + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) { + error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor"); + return -EINVAL; + } + + if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) != + PCI_BASE_CLASS_DISPLAY) { + error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class"); + return -EINVAL; + } + + ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp); + if (ret < 0) { + error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: "); + return ret; + } + + memset(vdev->emulated_config_bits + pos, 0xFF, 8); + pos += PCI_CAP_FLAGS; + pci_set_byte(pdev->config + pos++, 8); + pci_set_byte(pdev->config + pos++, 'P'); + pci_set_byte(pdev->config + pos++, '2'); + pci_set_byte(pdev->config + pos++, 'P'); + pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3); + pci_set_byte(pdev->config + pos, 0); + + return 0; +} + +int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp) +{ + int ret; + + ret = vfio_add_nv_gpudirect_cap(vdev, errp); + if (ret) { + return ret; + } + + return 0; +} diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 31e1edf447..9e86db7c3b 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1826,15 +1826,23 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) if (next) { ret = vfio_add_std_cap(vdev, next, errp); if (ret) { - goto out; + return ret; } } else { /* Begin the rebuild, use QEMU emulated list bits */ pdev->config[PCI_CAPABILITY_LIST] = 0; vdev->emulated_config_bits[PCI_CAPABILITY_LIST] = 0xff; vdev->emulated_config_bits[PCI_STATUS] |= PCI_STATUS_CAP_LIST; + + ret = vfio_add_virt_caps(vdev, errp); + if (ret) { + return ret; + } } + /* Scale down size, esp in case virt caps were added above */ + size = MIN(size, vfio_std_cap_max_size(pdev, pos)); + /* Use emulated next pointer to allow dropping caps */ pci_set_byte(vdev->emulated_config_bits + pos + PCI_CAP_LIST_NEXT, 0xff); @@ -1862,7 +1870,7 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) ret = pci_add_capability(pdev, cap_id, pos, size, errp); break; } -out: + if (ret < 0) { error_prepend(errp, "failed to add PCI capability 0x%x[0x%x]@0x%x: ", @@ -2962,6 +2970,8 @@ static void vfio_instance_init(Object *obj) vdev->host.bus = ~0U; vdev->host.slot = ~0U; vdev->host.function = ~0U; + + vdev->nv_gpudirect_clique = 0xFF; } static Property vfio_pci_dev_properties[] = { @@ -2986,6 +2996,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, sub_device_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0), + DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice, + nv_gpudirect_clique, + qdev_prop_nv_gpudirect_clique, uint8_t), /* * TODO - support passed fds... is this necessary? * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index a8366bb2a7..502a5755b9 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { int32_t bootindex; uint32_t igd_gms; uint8_t pm_cap; + uint8_t nv_gpudirect_clique; bool pci_aer; bool req_enabled; bool has_flr; @@ -160,6 +161,9 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr); void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr); void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr); void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev); +int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp); + +extern const PropertyInfo qdev_prop_nv_gpudirect_clique; int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); diff --git a/include/qom/object.h b/include/qom/object.h index f3e5cff37a..e0d9824415 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -1214,6 +1214,17 @@ Object *object_get_root(void); Object *object_get_objects_root(void); /** + * object_get_internal_root: + * + * Get the container object that holds internally used object + * instances. Any object which is put into this container must not be + * user visible, and it will not be exposed in the QOM tree. + * + * Returns: the internal object container + */ +Object *object_get_internal_root(void); + +/** * object_get_canonical_path_component: * * Returns: The final component in the object's canonical path. The canonical diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h index d2985b30ba..110329b2b4 100644 --- a/include/sysemu/iothread.h +++ b/include/sysemu/iothread.h @@ -46,4 +46,13 @@ AioContext *iothread_get_aio_context(IOThread *iothread); void iothread_stop_all(void); GMainContext *iothread_get_g_main_context(IOThread *iothread); +/* + * Helpers used to allocate iothreads for internal use. These + * iothreads will not be seen by monitor clients when query using + * "query-iothreads". + */ +IOThread *iothread_create(const char *id, Error **errp); +void iothread_stop(IOThread *iothread); +void iothread_destroy(IOThread *iothread); + #endif /* IOTHREAD_H */ diff --git a/iothread.c b/iothread.c index 59d0850988..27a4288578 100644 --- a/iothread.c +++ b/iothread.c @@ -71,8 +71,6 @@ static void *iothread_run(void *opaque) g_main_loop_unref(loop); g_main_context_pop_thread_default(iothread->worker_context); - g_main_context_unref(iothread->worker_context); - iothread->worker_context = NULL; } } @@ -80,13 +78,10 @@ static void *iothread_run(void *opaque) return NULL; } -static int iothread_stop(Object *object, void *opaque) +void iothread_stop(IOThread *iothread) { - IOThread *iothread; - - iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); - if (!iothread || !iothread->ctx || iothread->stopping) { - return 0; + if (!iothread->ctx || iothread->stopping) { + return; } iothread->stopping = true; aio_notify(iothread->ctx); @@ -94,6 +89,17 @@ static int iothread_stop(Object *object, void *opaque) g_main_loop_quit(iothread->main_loop); } qemu_thread_join(&iothread->thread); +} + +static int iothread_stop_iter(Object *object, void *opaque) +{ + IOThread *iothread; + + iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); + if (!iothread) { + return 0; + } + iothread_stop(iothread); return 0; } @@ -108,7 +114,11 @@ static void iothread_instance_finalize(Object *obj) { IOThread *iothread = IOTHREAD(obj); - iothread_stop(obj, NULL); + iothread_stop(iothread); + if (iothread->worker_context) { + g_main_context_unref(iothread->worker_context); + iothread->worker_context = NULL; + } qemu_cond_destroy(&iothread->init_done_cond); qemu_mutex_destroy(&iothread->init_done_lock); if (!iothread->ctx) { @@ -328,7 +338,7 @@ void iothread_stop_all(void) aio_context_release(ctx); } - object_child_foreach(container, iothread_stop, NULL); + object_child_foreach(container, iothread_stop_iter, NULL); } static gpointer iothread_g_main_context_init(gpointer opaque) @@ -354,3 +364,19 @@ GMainContext *iothread_get_g_main_context(IOThread *iothread) return iothread->worker_context; } + +IOThread *iothread_create(const char *id, Error **errp) +{ + Object *obj; + + obj = object_new_with_props(TYPE_IOTHREAD, + object_get_internal_root(), + id, errp, NULL); + + return IOTHREAD(obj); +} + +void iothread_destroy(IOThread *iothread) +{ + object_unparent(OBJECT(iothread)); +} diff --git a/qom/object.c b/qom/object.c index 3e18537e9b..6a7bd9257b 100644 --- a/qom/object.c +++ b/qom/object.c @@ -1370,6 +1370,17 @@ Object *object_get_objects_root(void) return container_get(object_get_root(), "/objects"); } +Object *object_get_internal_root(void) +{ + static Object *internal_root; + + if (!internal_root) { + internal_root = object_new("container"); + } + + return internal_root; +} + static void object_get_child_property(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) diff --git a/util/aio-posix.c b/util/aio-posix.c index 2d51239ec6..5946ac09f0 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -223,7 +223,14 @@ void aio_set_fd_handler(AioContext *ctx, return; } - g_source_remove_poll(&ctx->source, &node->pfd); + /* If the GSource is in the process of being destroyed then + * g_source_remove_poll() causes an assertion failure. Skip + * removal in that case, because glib cleans up its state during + * destruction anyway. + */ + if (!g_source_is_destroyed(&ctx->source)) { + g_source_remove_poll(&ctx->source, &node->pfd); + } /* If the lock is held, just mark the node as deleted */ if (qemu_lockcnt_count(&ctx->list_lock)) { |