From 9fca2b7d702f7ba216c571504922b2f8994537cc Mon Sep 17 00:00:00 2001 From: John Levon Date: Wed, 25 Jun 2025 20:29:53 +0100 Subject: vfio-user: add vfio-user class and container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce basic plumbing for vfio-user with CONFIG_VFIO_USER. We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a container type for the "IOMMU" type "vfio-iommu-user", and share some common container code from hw/vfio/container.c. Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects, sharing some common code from hw/vfio/pci.c. Originally-by: John Johnson Signed-off-by: Elena Ufimtseva Signed-off-by: Jagannathan Raman Signed-off-by: John Levon Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/20250625193012.2316242-2-john.levon@nutanix.com Signed-off-by: Cédric Le Goater --- hw/vfio-user/container.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 hw/vfio-user/container.c (limited to 'hw/vfio-user/container.c') diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c new file mode 100644 index 0000000000..2367332177 --- /dev/null +++ b/hw/vfio-user/container.c @@ -0,0 +1,208 @@ +/* + * Container for vfio-user IOMMU type: rather than communicating with the kernel + * vfio driver, we communicate over a socket to a server using the vfio-user + * protocol. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include +#include "qemu/osdep.h" + +#include "hw/vfio-user/container.h" +#include "hw/vfio/vfio-cpr.h" +#include "hw/vfio/vfio-device.h" +#include "hw/vfio/vfio-listener.h" +#include "qapi/error.h" + +static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb, bool unmap_all) +{ + return -ENOTSUP; +} + +static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mrp) +{ + return -ENOTSUP; +} + +static int +vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, + bool start, Error **errp) +{ + error_setg_errno(errp, ENOTSUP, "Not supported"); + return -ENOTSUP; +} + +static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size, Error **errp) +{ + error_setg_errno(errp, ENOTSUP, "Not supported"); + return -ENOTSUP; +} + +static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) +{ + error_setg_errno(errp, ENOTSUP, "Not supported"); + return -ENOTSUP; +} + +static VFIOUserContainer *vfio_user_create_container(Error **errp) +{ + VFIOUserContainer *container; + + container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER)); + return container; +} + +/* + * Try to mirror vfio_container_connect() as much as possible. + */ +static VFIOUserContainer * +vfio_user_container_connect(AddressSpace *as, Error **errp) +{ + VFIOContainerBase *bcontainer; + VFIOUserContainer *container; + VFIOAddressSpace *space; + VFIOIOMMUClass *vioc; + + space = vfio_address_space_get(as); + + container = vfio_user_create_container(errp); + if (!container) { + goto put_space_exit; + } + + bcontainer = &container->bcontainer; + + if (!vfio_cpr_register_container(bcontainer, errp)) { + goto free_container_exit; + } + + vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + assert(vioc->setup); + + if (!vioc->setup(bcontainer, errp)) { + goto unregister_container_exit; + } + + vfio_address_space_insert(space, bcontainer); + + if (!vfio_listener_register(bcontainer, errp)) { + goto listener_release_exit; + } + + bcontainer->initialized = true; + + return container; + +listener_release_exit: + vfio_listener_unregister(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); + } + +unregister_container_exit: + vfio_cpr_unregister_container(bcontainer); + +free_container_exit: + object_unref(container); + +put_space_exit: + vfio_address_space_put(space); + + return NULL; +} + +static void vfio_user_container_disconnect(VFIOUserContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + vfio_listener_unregister(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); + } + + VFIOAddressSpace *space = bcontainer->space; + + vfio_cpr_unregister_container(bcontainer); + object_unref(container); + + vfio_address_space_put(space); +} + +static bool vfio_user_device_get(VFIOUserContainer *container, + VFIODevice *vbasedev, Error **errp) +{ + struct vfio_device_info info = { 0 }; + + vbasedev->fd = -1; + + vfio_device_prepare(vbasedev, &container->bcontainer, &info); + + return true; +} + +/* + * vfio_user_device_attach: attach a device to a new container. + */ +static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + VFIOUserContainer *container; + + container = vfio_user_container_connect(as, errp); + if (container == NULL) { + error_prepend(errp, "failed to connect proxy"); + return false; + } + + return vfio_user_device_get(container, vbasedev, errp); +} + +static void vfio_user_device_detach(VFIODevice *vbasedev) +{ + VFIOUserContainer *container = container_of(vbasedev->bcontainer, + VFIOUserContainer, bcontainer); + + vfio_device_unprepare(vbasedev); + + vfio_user_container_disconnect(container); +} + +static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single) +{ + /* ->needs_reset is always false for vfio-user. */ + return 0; +} + +static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data) +{ + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + + vioc->setup = vfio_user_setup; + vioc->dma_map = vfio_user_dma_map; + vioc->dma_unmap = vfio_user_dma_unmap; + vioc->attach_device = vfio_user_device_attach; + vioc->detach_device = vfio_user_device_detach; + vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking; + vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap; + vioc->pci_hot_reset = vfio_user_pci_hot_reset; +}; + +static const TypeInfo types[] = { + { + .name = TYPE_VFIO_IOMMU_USER, + .parent = TYPE_VFIO_IOMMU, + .instance_size = sizeof(VFIOUserContainer), + .class_init = vfio_iommu_user_class_init, + }, +}; + +DEFINE_TYPES(types) -- cgit 1.4.1 From 3bdb738b734c77f93f93f8119c8f6ba8a9c5947c Mon Sep 17 00:00:00 2001 From: John Levon Date: Wed, 25 Jun 2025 20:29:57 +0100 Subject: vfio-user: implement VFIO_USER_DEVICE_GET_INFO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for getting basic device information. Originally-by: John Johnson Signed-off-by: Elena Ufimtseva Signed-off-by: Jagannathan Raman Signed-off-by: John Levon Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/20250625193012.2316242-6-john.levon@nutanix.com Signed-off-by: Cédric Le Goater --- hw/vfio-user/container.c | 8 ++++++- hw/vfio-user/device.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++ hw/vfio-user/device.h | 20 +++++++++++++++++ hw/vfio-user/meson.build | 1 + hw/vfio-user/protocol.h | 12 +++++++++++ hw/vfio-user/proxy.c | 10 ++++----- hw/vfio-user/proxy.h | 7 ++++++ hw/vfio-user/trace-events | 1 + 8 files changed, 107 insertions(+), 7 deletions(-) create mode 100644 hw/vfio-user/device.c create mode 100644 hw/vfio-user/device.h (limited to 'hw/vfio-user/container.c') diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c index 2367332177..f5bfd54316 100644 --- a/hw/vfio-user/container.c +++ b/hw/vfio-user/container.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "hw/vfio-user/container.h" +#include "hw/vfio-user/device.h" #include "hw/vfio/vfio-cpr.h" #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-listener.h" @@ -140,7 +141,12 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container) static bool vfio_user_device_get(VFIOUserContainer *container, VFIODevice *vbasedev, Error **errp) { - struct vfio_device_info info = { 0 }; + struct vfio_device_info info = { .argsz = sizeof(info) }; + + + if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) { + return false; + } vbasedev->fd = -1; diff --git a/hw/vfio-user/device.c b/hw/vfio-user/device.c new file mode 100644 index 0000000000..4212fefd44 --- /dev/null +++ b/hw/vfio-user/device.c @@ -0,0 +1,55 @@ +/* + * vfio protocol over a UNIX socket device handling. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +#include "hw/vfio-user/device.h" +#include "hw/vfio-user/trace.h" + +/* + * These are to defend against a malign server trying + * to force us to run out of memory. + */ +#define VFIO_USER_MAX_REGIONS 100 +#define VFIO_USER_MAX_IRQS 50 + +bool vfio_user_get_device_info(VFIOUserProxy *proxy, + struct vfio_device_info *info, Error **errp) +{ + VFIOUserDeviceInfo msg; + uint32_t argsz = sizeof(msg) - sizeof(msg.hdr); + + memset(&msg, 0, sizeof(msg)); + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0); + msg.argsz = argsz; + + if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) { + return false; + } + + if (msg.hdr.flags & VFIO_USER_ERROR) { + error_setg_errno(errp, -msg.hdr.error_reply, + "VFIO_USER_DEVICE_GET_INFO failed"); + return false; + } + + trace_vfio_user_get_info(msg.num_regions, msg.num_irqs); + + memcpy(info, &msg.argsz, argsz); + + /* defend against a malicious server */ + if (info->num_regions > VFIO_USER_MAX_REGIONS || + info->num_irqs > VFIO_USER_MAX_IRQS) { + error_setg_errno(errp, EINVAL, "invalid reply"); + return false; + } + + return true; +} diff --git a/hw/vfio-user/device.h b/hw/vfio-user/device.h new file mode 100644 index 0000000000..ef3f71ee69 --- /dev/null +++ b/hw/vfio-user/device.h @@ -0,0 +1,20 @@ +#ifndef VFIO_USER_DEVICE_H +#define VFIO_USER_DEVICE_H + +/* + * vfio protocol over a UNIX socket device handling. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "linux/vfio.h" + +#include "hw/vfio-user/proxy.h" + +bool vfio_user_get_device_info(VFIOUserProxy *proxy, + struct vfio_device_info *info, Error **errp); + +#endif /* VFIO_USER_DEVICE_H */ diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build index 9e85a8ea51..2ed0ae5b1d 100644 --- a/hw/vfio-user/meson.build +++ b/hw/vfio-user/meson.build @@ -3,6 +3,7 @@ vfio_user_ss = ss.source_set() vfio_user_ss.add(files( 'container.c', + 'device.c', 'pci.c', 'proxy.c', )) diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h index 2d52d0fb10..e0bba68739 100644 --- a/hw/vfio-user/protocol.h +++ b/hw/vfio-user/protocol.h @@ -112,4 +112,16 @@ typedef struct { */ #define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024) +/* + * VFIO_USER_DEVICE_GET_INFO + * imported from struct vfio_device_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t num_regions; + uint32_t num_irqs; +} VFIOUserDeviceInfo; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c index 874142e9e5..aed7b22e2a 100644 --- a/hw/vfio-user/proxy.c +++ b/hw/vfio-user/proxy.c @@ -35,8 +35,6 @@ static void vfio_user_send(void *opaque); static void vfio_user_cb(void *opaque); static void vfio_user_request(void *opaque); -static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, - uint32_t size, uint32_t flags); static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) { @@ -626,8 +624,8 @@ static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg, * * In either case, the caller must free @hdr and @fds if needed. */ -static bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, - VFIOUserFDs *fds, int rsize, Error **errp) +bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, Error **errp) { VFIOUserMsg *msg; bool ok = false; @@ -802,8 +800,8 @@ void vfio_user_disconnect(VFIOUserProxy *proxy) g_free(proxy); } -static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, - uint32_t size, uint32_t flags) +void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, + uint32_t size, uint32_t flags) { static uint16_t next_id; diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h index 5bc890a0f5..837b02a8c4 100644 --- a/hw/vfio-user/proxy.h +++ b/hw/vfio-user/proxy.h @@ -12,7 +12,9 @@ #include "io/channel.h" #include "io/channel-socket.h" +#include "qemu/queue.h" #include "qemu/sockets.h" +#include "qemu/thread.h" #include "hw/vfio-user/protocol.h" typedef struct { @@ -96,4 +98,9 @@ void vfio_user_set_handler(VFIODevice *vbasedev, void *reqarg); bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp); +void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, + uint32_t size, uint32_t flags); +bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, Error **errp); + #endif /* VFIO_USER_PROXY_H */ diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events index a965c7b1f2..b7312d6d59 100644 --- a/hw/vfio-user/trace-events +++ b/hw/vfio-user/trace-events @@ -8,3 +8,4 @@ vfio_user_recv_read(uint16_t id, int read) " id 0x%x read 0x%x" vfio_user_recv_request(uint16_t cmd) " command 0x%x" vfio_user_send_write(uint16_t id, int wrote) " id 0x%x wrote 0x%x" vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " major %d minor %d caps: %s" +vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs %d" -- cgit 1.4.1 From 52ce9c35f8e364e5823fc13f23929eb597bb69ac Mon Sep 17 00:00:00 2001 From: John Levon Date: Wed, 25 Jun 2025 20:30:03 +0100 Subject: vfio-user: set up container access to the proxy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user container will shortly need access to the underlying vfio-user proxy; set this up. Originally-by: John Johnson Signed-off-by: Elena Ufimtseva Signed-off-by: Jagannathan Raman Signed-off-by: John Levon Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/20250625193012.2316242-12-john.levon@nutanix.com Signed-off-by: Cédric Le Goater --- hw/vfio-user/container.c | 43 ++++++++++++++++++++++++++++++++++--------- hw/vfio-user/container.h | 2 ++ 2 files changed, 36 insertions(+), 9 deletions(-) (limited to 'hw/vfio-user/container.c') diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c index f5bfd54316..b4a5a840b0 100644 --- a/hw/vfio-user/container.c +++ b/hw/vfio-user/container.c @@ -49,15 +49,28 @@ static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) { - error_setg_errno(errp, ENOTSUP, "Not supported"); - return -ENOTSUP; + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + + assert(container->proxy->dma_pgsizes != 0); + bcontainer->pgsizes = container->proxy->dma_pgsizes; + bcontainer->dma_max_mappings = container->proxy->max_dma; + + /* No live migration support yet. */ + bcontainer->dirty_pages_supported = false; + bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap; + bcontainer->dirty_pgsizes = container->proxy->migr_pgsize; + + return true; } -static VFIOUserContainer *vfio_user_create_container(Error **errp) +static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev, + Error **errp) { VFIOUserContainer *container; container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER)); + container->proxy = vbasedev->proxy; return container; } @@ -65,16 +78,18 @@ static VFIOUserContainer *vfio_user_create_container(Error **errp) * Try to mirror vfio_container_connect() as much as possible. */ static VFIOUserContainer * -vfio_user_container_connect(AddressSpace *as, Error **errp) +vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, + Error **errp) { VFIOContainerBase *bcontainer; VFIOUserContainer *container; VFIOAddressSpace *space; VFIOIOMMUClass *vioc; + int ret; space = vfio_address_space_get(as); - container = vfio_user_create_container(errp); + container = vfio_user_create_container(vbasedev, errp); if (!container) { goto put_space_exit; } @@ -85,11 +100,17 @@ vfio_user_container_connect(AddressSpace *as, Error **errp) goto free_container_exit; } + ret = ram_block_uncoordinated_discard_disable(true); + if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); + goto unregister_container_exit; + } + vioc = VFIO_IOMMU_GET_CLASS(bcontainer); assert(vioc->setup); if (!vioc->setup(bcontainer, errp)) { - goto unregister_container_exit; + goto enable_discards_exit; } vfio_address_space_insert(space, bcontainer); @@ -108,6 +129,9 @@ listener_release_exit: vioc->release(bcontainer); } +enable_discards_exit: + ram_block_uncoordinated_discard_disable(false); + unregister_container_exit: vfio_cpr_unregister_container(bcontainer); @@ -124,14 +148,15 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container) { VFIOContainerBase *bcontainer = &container->bcontainer; VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + VFIOAddressSpace *space = bcontainer->space; + + ram_block_uncoordinated_discard_disable(false); vfio_listener_unregister(bcontainer); if (vioc->release) { vioc->release(bcontainer); } - VFIOAddressSpace *space = bcontainer->space; - vfio_cpr_unregister_container(bcontainer); object_unref(container); @@ -163,7 +188,7 @@ static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev, { VFIOUserContainer *container; - container = vfio_user_container_connect(as, errp); + container = vfio_user_container_connect(as, vbasedev, errp); if (container == NULL) { error_prepend(errp, "failed to connect proxy"); return false; diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h index e4a46d2c1b..2bb1fa1343 100644 --- a/hw/vfio-user/container.h +++ b/hw/vfio-user/container.h @@ -10,10 +10,12 @@ #include "qemu/osdep.h" #include "hw/vfio/vfio-container-base.h" +#include "hw/vfio-user/proxy.h" /* MMU container sub-class for vfio-user. */ typedef struct VFIOUserContainer { VFIOContainerBase bcontainer; + VFIOUserProxy *proxy; } VFIOUserContainer; OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER); -- cgit 1.4.1 From 18e899e63dd9d05e567c081023ad7fc5b2c5dc9a Mon Sep 17 00:00:00 2001 From: John Levon Date: Wed, 25 Jun 2025 20:30:05 +0100 Subject: vfio-user: implement VFIO_USER_DMA_MAP/UNMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the vfio-user container gets mapping updates, share them with the vfio-user by sending a message; this can include the region fd, allowing the server to directly mmap() the region as needed. For performance, we only wait for the message responses when we're doing with a series of updates via the listener_commit() callback. Originally-by: John Johnson Signed-off-by: Jagannathan Raman Signed-off-by: Elena Ufimtseva Signed-off-by: John Levon Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/20250625193012.2316242-14-john.levon@nutanix.com Signed-off-by: Cédric Le Goater --- hw/vfio-user/container.c | 135 +++++++++++++++++++++++++++++++++++++++++++++- hw/vfio-user/protocol.h | 32 +++++++++++ hw/vfio-user/proxy.c | 84 ++++++++++++++++++++++++++++- hw/vfio-user/proxy.h | 6 +++ hw/vfio-user/trace-events | 4 ++ 5 files changed, 257 insertions(+), 4 deletions(-) (limited to 'hw/vfio-user/container.c') diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c index b4a5a840b0..3133fef177 100644 --- a/hw/vfio-user/container.c +++ b/hw/vfio-user/container.c @@ -12,23 +12,152 @@ #include "hw/vfio-user/container.h" #include "hw/vfio-user/device.h" +#include "hw/vfio-user/trace.h" #include "hw/vfio/vfio-cpr.h" #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-listener.h" #include "qapi/error.h" +/* + * When DMA space is the physical address space, the region add/del listeners + * will fire during memory update transactions. These depend on BQL being held, + * so do any resulting map/demap ops async while keeping BQL. + */ +static void vfio_user_listener_begin(VFIOContainerBase *bcontainer) +{ + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + + container->proxy->async_ops = true; +} + +static void vfio_user_listener_commit(VFIOContainerBase *bcontainer) +{ + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + + /* wait here for any async requests sent during the transaction */ + container->proxy->async_ops = false; + vfio_user_wait_reqs(container->proxy); +} + static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all) { - return -ENOTSUP; + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + Error *local_err = NULL; + int ret = 0; + + VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp)); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0); + msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap); + msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0; + msgp->iova = iova; + msgp->size = size; + trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags, + container->proxy->async_ops); + + if (container->proxy->async_ops) { + if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL, + 0, &local_err)) { + error_report_err(local_err); + ret = -EFAULT; + } else { + ret = 0; + } + } else { + if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL, + 0, &local_err)) { + error_report_err(local_err); + ret = -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + ret = -msgp->hdr.error_reply; + } + + g_free(msgp); + } + + return ret; } static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mrp) { - return -ENOTSUP; + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + int fd = memory_region_get_fd(mrp); + Error *local_err = NULL; + int ret; + + VFIOUserFDs *fds = NULL; + VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp)); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0); + msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map); + msgp->flags = VFIO_DMA_MAP_FLAG_READ; + msgp->offset = 0; + msgp->iova = iova; + msgp->size = size; + + /* + * vaddr enters as a QEMU process address; make it either a file offset + * for mapped areas or leave as 0. + */ + if (fd != -1) { + msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr); + } + + if (!readonly) { + msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE; + } + + trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags, + container->proxy->async_ops); + + /* + * The async_ops case sends without blocking. They're later waited for in + * vfio_send_wait_reqs. + */ + if (container->proxy->async_ops) { + /* can't use auto variable since we don't block */ + if (fd != -1) { + fds = vfio_user_getfds(1); + fds->send_fds = 1; + fds->fds[0] = fd; + } + + if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds, + 0, &local_err)) { + error_report_err(local_err); + ret = -EFAULT; + } else { + ret = 0; + } + } else { + VFIOUserFDs local_fds = { 1, 0, &fd }; + + fds = fd != -1 ? &local_fds : NULL; + + if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds, + 0, &local_err)) { + error_report_err(local_err); + ret = -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + ret = -msgp->hdr.error_reply; + } + + g_free(msgp); + } + + return ret; } static int @@ -218,6 +347,8 @@ static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data) VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); vioc->setup = vfio_user_setup; + vioc->listener_begin = vfio_user_listener_begin, + vioc->listener_commit = vfio_user_listener_commit, vioc->dma_map = vfio_user_dma_map; vioc->dma_unmap = vfio_user_dma_unmap; vioc->attach_device = vfio_user_device_attach; diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h index 48144b2c33..524f3d633a 100644 --- a/hw/vfio-user/protocol.h +++ b/hw/vfio-user/protocol.h @@ -112,6 +112,31 @@ typedef struct { */ #define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024) +/* + * VFIO_USER_DMA_MAP + * imported from struct vfio_iommu_type1_dma_map + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint64_t offset; /* FD offset */ + uint64_t iova; + uint64_t size; +} VFIOUserDMAMap; + +/* + * VFIO_USER_DMA_UNMAP + * imported from struct vfio_iommu_type1_dma_unmap + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint64_t iova; + uint64_t size; +} VFIOUserDMAUnmap; + /* * VFIO_USER_DEVICE_GET_INFO * imported from struct vfio_device_info @@ -175,4 +200,11 @@ typedef struct { char data[]; } VFIOUserRegionRW; +/*imported from struct vfio_bitmap */ +typedef struct { + uint64_t pgsize; + uint64_t size; + char data[]; +} VFIOUserBitmap; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c index aed7b22e2a..c8ae8a59b4 100644 --- a/hw/vfio-user/proxy.c +++ b/hw/vfio-user/proxy.c @@ -27,7 +27,6 @@ static IOThread *vfio_user_iothread; static void vfio_user_shutdown(VFIOUserProxy *proxy); static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds); -static VFIOUserFDs *vfio_user_getfds(int numfds); static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg); static void vfio_user_recv(void *opaque); @@ -132,7 +131,7 @@ static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg) QTAILQ_INSERT_HEAD(&proxy->free, msg, next); } -static VFIOUserFDs *vfio_user_getfds(int numfds) +VFIOUserFDs *vfio_user_getfds(int numfds) { VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int))); @@ -618,6 +617,43 @@ static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg, return true; } +/* + * nowait send - vfio_wait_reqs() can wait for it later + * + * Returns false if we did not successfully receive a reply message, in which + * case @errp will be populated. + * + * In either case, ownership of @hdr and @fds is taken, and the caller must + * *not* free them itself. + */ +bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, Error **errp) +{ + VFIOUserMsg *msg; + + QEMU_LOCK_GUARD(&proxy->lock); + + msg = vfio_user_getmsg(proxy, hdr, fds); + msg->id = hdr->id; + msg->rsize = rsize ? rsize : hdr->size; + msg->type = VFIO_MSG_NOWAIT; + + if (hdr->flags & VFIO_USER_NO_REPLY) { + error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); + vfio_user_recycle(proxy, msg); + return false; + } + + if (!vfio_user_send_queued(proxy, msg, errp)) { + vfio_user_recycle(proxy, msg); + return false; + } + + proxy->last_nowait = msg; + + return true; +} + /* * Returns false if we did not successfully receive a reply message, in which * case @errp will be populated. @@ -666,6 +702,50 @@ bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, return ok; } +void vfio_user_wait_reqs(VFIOUserProxy *proxy) +{ + VFIOUserMsg *msg; + + /* + * Any DMA map/unmap requests sent in the middle + * of a memory region transaction were sent nowait. + * Wait for them here. + */ + qemu_mutex_lock(&proxy->lock); + if (proxy->last_nowait != NULL) { + /* + * Change type to WAIT to wait for reply + */ + msg = proxy->last_nowait; + msg->type = VFIO_MSG_WAIT; + proxy->last_nowait = NULL; + while (!msg->complete) { + if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) { + VFIOUserMsgQ *list; + + list = msg->pending ? &proxy->pending : &proxy->outgoing; + QTAILQ_REMOVE(list, msg, next); + error_printf("vfio_wait_reqs - timed out\n"); + break; + } + } + + if (msg->hdr->flags & VFIO_USER_ERROR) { + error_printf("vfio_user_wait_reqs - error reply on async "); + error_printf("request: command %x error %s\n", msg->hdr->command, + strerror(msg->hdr->error_reply)); + } + + /* + * Change type back to NOWAIT to free + */ + msg->type = VFIO_MSG_NOWAIT; + vfio_user_recycle(proxy, msg); + } + + qemu_mutex_unlock(&proxy->lock); +} + static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets = QLIST_HEAD_INITIALIZER(vfio_user_sockets); diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h index ba1c33aba8..e2fc83ca3b 100644 --- a/hw/vfio-user/proxy.h +++ b/hw/vfio-user/proxy.h @@ -70,6 +70,7 @@ typedef struct VFIOUserProxy { QemuCond close_cv; AioContext *ctx; QEMUBH *req_bh; + bool async_ops; /* * above only changed when BQL is held @@ -99,9 +100,14 @@ void vfio_user_set_handler(VFIODevice *vbasedev, void *reqarg); bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp); +VFIOUserFDs *vfio_user_getfds(int numfds); + void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, uint32_t size, uint32_t flags); +void vfio_user_wait_reqs(VFIOUserProxy *proxy); bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds, int rsize, Error **errp); +bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, Error **errp); #endif /* VFIO_USER_PROXY_H */ diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events index aa8f3c3d4d..44dde020b3 100644 --- a/hw/vfio-user/trace-events +++ b/hw/vfio-user/trace-events @@ -13,3 +13,7 @@ vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d offset 0x%"PRIx64" count %d" vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index %d flags 0x%x count %d" vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t flags) " index %d start %d count %d flags 0x%x" + +# container.c +vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x async_ops %d" +vfio_user_dma_unmap(uint64_t iova, uint64_t size, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" flags 0x%x async_ops %d" -- cgit 1.4.1