From 751522275a800c0d1c215b386a17f537bc76cd37 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 5 Aug 2018 18:35:06 +0300 Subject: hw/rdma: Make distinction between device init and start modes There are certain operations that are well considered as part of device configuration while others are needed only when "start" command is triggered by the guest driver. An example of device initialization step is msix_init and example of "device start" stage is the creation of a CQ completion handler thread. Driver expects such distinction - implement it. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <20180805153518.2983-2-yuval.shaia@oracle.com> Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 96 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 19 deletions(-) (limited to 'hw/rdma/rdma_backend.c') diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index e9ced6f9ef..647e16399f 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -35,6 +35,7 @@ #define VENDOR_ERR_MR_SMALL 0x208 #define THR_NAME_LEN 16 +#define THR_POLL_TO 5000 typedef struct BackendCtx { uint64_t req_id; @@ -91,35 +92,82 @@ static void *comp_handler_thread(void *arg) int rc; struct ibv_cq *ev_cq; void *ev_ctx; + int flags; + GPollFD pfds[1]; + + /* Change to non-blocking mode */ + flags = fcntl(backend_dev->channel->fd, F_GETFL); + rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK); + if (rc < 0) { + pr_dbg("Fail to change to non-blocking mode\n"); + return NULL; + } pr_dbg("Starting\n"); + pfds[0].fd = backend_dev->channel->fd; + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; + + backend_dev->comp_thread.is_running = true; + while (backend_dev->comp_thread.run) { - pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel); - rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx); - pr_dbg("ibv_get_cq_event=%d\n", rc); - if (unlikely(rc)) { - pr_dbg("---> ibv_get_cq_event (%d)\n", rc); - continue; - } + do { + rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS); + } while (!rc && backend_dev->comp_thread.run); + + if (backend_dev->comp_thread.run) { + pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel); + rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx); + pr_dbg("ibv_get_cq_event=%d\n", rc); + if (unlikely(rc)) { + pr_dbg("---> ibv_get_cq_event (%d)\n", rc); + continue; + } - rc = ibv_req_notify_cq(ev_cq, 0); - if (unlikely(rc)) { - pr_dbg("Error %d from ibv_req_notify_cq\n", rc); - } + rc = ibv_req_notify_cq(ev_cq, 0); + if (unlikely(rc)) { + pr_dbg("Error %d from ibv_req_notify_cq\n", rc); + } - poll_cq(backend_dev->rdma_dev_res, ev_cq); + poll_cq(backend_dev->rdma_dev_res, ev_cq); - ibv_ack_cq_events(ev_cq, 1); + ibv_ack_cq_events(ev_cq, 1); + } } pr_dbg("Going down\n"); /* TODO: Post cqe for all remaining buffs that were posted */ + backend_dev->comp_thread.is_running = false; + + qemu_thread_exit(0); + return NULL; } +static void stop_comp_thread(RdmaBackendDev *backend_dev) +{ + backend_dev->comp_thread.run = false; + while (backend_dev->comp_thread.is_running) { + pr_dbg("Waiting for thread to complete\n"); + sleep(THR_POLL_TO / SCALE_US / 2); + } +} + +static void start_comp_thread(RdmaBackendDev *backend_dev) +{ + char thread_name[THR_NAME_LEN] = {0}; + + stop_comp_thread(backend_dev); + + snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s", + ibv_get_device_name(backend_dev->ib_dev)); + backend_dev->comp_thread.run = true; + qemu_thread_create(&backend_dev->comp_thread.thread, thread_name, + comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED); +} + void rdma_backend_register_comp_handler(void (*handler)(int status, unsigned int vendor_err, void *ctx)) { @@ -706,7 +754,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, int i; int ret = 0; int num_ibv_devices; - char thread_name[THR_NAME_LEN] = {0}; struct ibv_device **dev_list; struct ibv_port_attr port_attr; @@ -800,11 +847,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, pr_dbg("interface_id=0x%" PRIx64 "\n", be64_to_cpu(backend_dev->gid.global.interface_id)); - snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s", - ibv_get_device_name(backend_dev->ib_dev)); - backend_dev->comp_thread.run = true; - qemu_thread_create(&backend_dev->comp_thread.thread, thread_name, - comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED); + backend_dev->comp_thread.run = false; + backend_dev->comp_thread.is_running = false; ah_cache_init(); @@ -823,8 +867,22 @@ out: return ret; } + +void rdma_backend_start(RdmaBackendDev *backend_dev) +{ + pr_dbg("Starting rdma_backend\n"); + start_comp_thread(backend_dev); +} + +void rdma_backend_stop(RdmaBackendDev *backend_dev) +{ + pr_dbg("Stopping rdma_backend\n"); + stop_comp_thread(backend_dev); +} + void rdma_backend_fini(RdmaBackendDev *backend_dev) { + rdma_backend_stop(backend_dev); g_hash_table_destroy(ah_hash); ibv_destroy_comp_channel(backend_dev->channel); ibv_close_device(backend_dev->context); -- cgit 1.4.1 From 7f99daadbddfee16f3ee77c413bd4a34366a3dad Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 5 Aug 2018 18:35:11 +0300 Subject: hw/rdma: Delete useless structure RdmaRmUserMR The structure RdmaRmUserMR has no benefits, remove it an move all its fields to struct RdmaRmMR. Reviewed-by: Marcel Apfelbaum Signed-off-by: Yuval Shaia Message-Id: <20180805153518.2983-7-yuval.shaia@oracle.com> Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 3 +-- hw/rdma/rdma_rm.c | 16 ++++++++-------- hw/rdma/rdma_rm_defs.h | 10 +++------- 3 files changed, 12 insertions(+), 17 deletions(-) (limited to 'hw/rdma/rdma_backend.c') diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index 647e16399f..52981d652d 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -271,8 +271,7 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey; } - dsge->addr = (uintptr_t)mr->user_mr.host_virt + ssge[ssge_idx].addr - - mr->user_mr.guest_start; + dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start; dsge->length = ssge[ssge_idx].length; dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr); diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 415da15efe..7403d24674 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -165,15 +165,15 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, length = TARGET_PAGE_SIZE; addr = g_malloc(length); } else { - mr->user_mr.host_virt = host_virt; - pr_dbg("host_virt=0x%p\n", mr->user_mr.host_virt); - mr->user_mr.length = guest_length; + mr->virt = host_virt; + pr_dbg("host_virt=0x%p\n", mr->virt); + mr->length = guest_length; pr_dbg("length=%zu\n", guest_length); - mr->user_mr.guest_start = guest_start; - pr_dbg("guest_start=0x%" PRIx64 "\n", mr->user_mr.guest_start); + mr->start = guest_start; + pr_dbg("guest_start=0x%" PRIx64 "\n", mr->start); - length = mr->user_mr.length; - addr = mr->user_mr.host_virt; + length = mr->length; + addr = mr->virt; } ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, addr, length, @@ -214,7 +214,7 @@ void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) if (mr) { rdma_backend_destroy_mr(&mr->backend_mr); - munmap(mr->user_mr.host_virt, mr->user_mr.length); + munmap(mr->virt, mr->length); res_tbl_dealloc(&dev_res->mr_tbl, mr_handle); } } diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index 226011176d..7228151239 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -55,16 +55,12 @@ typedef struct RdmaRmCQ { bool notify; } RdmaRmCQ; -typedef struct RdmaRmUserMR { - void *host_virt; - uint64_t guest_start; - size_t length; -} RdmaRmUserMR; - /* MR (DMA region) */ typedef struct RdmaRmMR { RdmaBackendMR backend_mr; - RdmaRmUserMR user_mr; + void *virt; + uint64_t start; + size_t length; uint32_t pd_handle; uint32_t lkey; uint32_t rkey; -- cgit 1.4.1 From 292dce627b51329212130470f485b7dc2b72b48c Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 5 Aug 2018 18:35:15 +0300 Subject: hw/rdma: Cosmetic change - move to generic function To ease maintenance of struct comp_thread move all related code to dedicated function. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <20180805153518.2983-11-yuval.shaia@oracle.com> Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'hw/rdma/rdma_backend.c') diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index 52981d652d..d29acc505b 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -146,10 +146,10 @@ static void *comp_handler_thread(void *arg) return NULL; } -static void stop_comp_thread(RdmaBackendDev *backend_dev) +static void stop_backend_thread(RdmaBackendThread *thread) { - backend_dev->comp_thread.run = false; - while (backend_dev->comp_thread.is_running) { + thread->run = false; + while (thread->is_running) { pr_dbg("Waiting for thread to complete\n"); sleep(THR_POLL_TO / SCALE_US / 2); } @@ -159,7 +159,7 @@ static void start_comp_thread(RdmaBackendDev *backend_dev) { char thread_name[THR_NAME_LEN] = {0}; - stop_comp_thread(backend_dev); + stop_backend_thread(&backend_dev->comp_thread); snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s", ibv_get_device_name(backend_dev->ib_dev)); @@ -876,7 +876,7 @@ void rdma_backend_start(RdmaBackendDev *backend_dev) void rdma_backend_stop(RdmaBackendDev *backend_dev) { pr_dbg("Stopping rdma_backend\n"); - stop_comp_thread(backend_dev); + stop_backend_thread(&backend_dev->comp_thread); } void rdma_backend_fini(RdmaBackendDev *backend_dev) -- cgit 1.4.1 From 430e440c59e8a6bcf8806576f37470641298af31 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Sun, 5 Aug 2018 18:35:18 +0300 Subject: hw/rdma: Add reference to pci_dev in backend_dev The field backend_dev->dev is not initialized, fix it. Signed-off-by: Yuval Shaia Message-Id: <20180805153518.2983-14-yuval.shaia@oracle.com> Reviewed-by: Marcel Apfelbaum Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 6 +++++- hw/rdma/rdma_backend.h | 2 +- hw/rdma/vmw/pvrdma_main.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'hw/rdma/rdma_backend.c') diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index d29acc505b..d7a4bbd91f 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -744,7 +744,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, return 0; } -int rdma_backend_init(RdmaBackendDev *backend_dev, +int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, @@ -756,6 +756,10 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, struct ibv_device **dev_list; struct ibv_port_attr port_attr; + memset(backend_dev, 0, sizeof(*backend_dev)); + + backend_dev->dev = pdev; + backend_dev->backend_gid_idx = backend_gid_idx; backend_dev->port_num = port_num; backend_dev->rdma_dev_res = rdma_dev_res; diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 3049a73962..86e8fe8ab6 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -46,7 +46,7 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr) return mr->ibmr ? mr->ibmr->rkey : 0; } -int rdma_backend_init(RdmaBackendDev *backend_dev, +int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 3d448bffc4..ca5fa8d981 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -611,7 +611,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) goto out; } - rc = rdma_backend_init(&dev->backend_dev, &dev->rdma_dev_res, + rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, dev->backend_device_name, dev->backend_port_num, dev->backend_gid_idx, &dev->dev_attr, errp); if (rc) { -- cgit 1.4.1