diff options
Diffstat (limited to 'hw/rdma/vmw')
| -rw-r--r-- | hw/rdma/vmw/pvrdma.h | 10 | ||||
| -rw-r--r-- | hw/rdma/vmw/pvrdma_cmd.c | 273 | ||||
| -rw-r--r-- | hw/rdma/vmw/pvrdma_dev_ring.c | 29 | ||||
| -rw-r--r-- | hw/rdma/vmw/pvrdma_dev_ring.h | 1 | ||||
| -rw-r--r-- | hw/rdma/vmw/pvrdma_main.c | 70 | ||||
| -rw-r--r-- | hw/rdma/vmw/pvrdma_qp_ops.c | 62 |
6 files changed, 255 insertions, 190 deletions
diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h index e2d9f93cdf..ffae36986e 100644 --- a/hw/rdma/vmw/pvrdma.h +++ b/hw/rdma/vmw/pvrdma.h @@ -17,8 +17,11 @@ #define PVRDMA_PVRDMA_H #include "qemu/units.h" +#include "qemu/notify.h" #include "hw/pci/pci.h" #include "hw/pci/msix.h" +#include "chardev/char-fe.h" +#include "hw/net/vmxnet3_defs.h" #include "../rdma_backend_defs.h" #include "../rdma_rm_defs.h" @@ -51,7 +54,7 @@ #define PVRDMA_FW_VERSION 14 /* Some defaults */ -#define PVRDMA_PKEY 0x7FFF +#define PVRDMA_PKEY 0xFFFF typedef struct DSRInfo { dma_addr_t dma; @@ -78,11 +81,14 @@ typedef struct PVRDMADev { int interrupt_mask; struct ibv_device_attr dev_attr; uint64_t node_guid; + char *backend_eth_device_name; char *backend_device_name; - uint8_t backend_gid_idx; uint8_t backend_port_num; RdmaBackendDev backend_dev; RdmaDeviceResources rdma_dev_res; + CharBackend mad_chr; + VMXNET3State *func0; + Notifier shutdown_notifier; } PVRDMADev; #define PVRDMA_DEV(dev) OBJECT_CHECK(PVRDMADev, (dev), PVRDMA_HW_NAME) diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 4faeb21631..89920887bf 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -128,6 +128,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_port_attr attrs = {0}; pr_dbg("port=%d\n", cmd->port_num); + if (cmd->port_num > MAX_PORTS) { + return -EINVAL; + } if (rdma_backend_query_port(&dev->backend_dev, (struct ibv_port_attr *)&attrs)) { @@ -135,11 +138,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req, } memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_QUERY_PORT_RESP; - resp->hdr.err = 0; - resp->attrs.state = attrs.state; + resp->attrs.state = dev->func0->device_active ? attrs.state : + PVRDMA_PORT_DOWN; resp->attrs.max_mtu = attrs.max_mtu; resp->attrs.active_mtu = attrs.active_mtu; resp->attrs.phys_state = attrs.phys_state; @@ -159,12 +160,16 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_query_pkey_resp *resp = &rsp->query_pkey_resp; pr_dbg("port=%d\n", cmd->port_num); + if (cmd->port_num > MAX_PORTS) { + return -EINVAL; + } + pr_dbg("index=%d\n", cmd->index); + if (cmd->index > MAX_PKEYS) { + return -EINVAL; + } memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_QUERY_PKEY_RESP; - resp->hdr.err = 0; resp->pkey = PVRDMA_PKEY; pr_dbg("pkey=0x%x\n", resp->pkey); @@ -177,17 +182,15 @@ static int create_pd(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_create_pd *cmd = &req->create_pd; struct pvrdma_cmd_create_pd_resp *resp = &rsp->create_pd_resp; + int rc; pr_dbg("context=0x%x\n", cmd->ctx_handle ? cmd->ctx_handle : 0); memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_PD_RESP; - resp->hdr.err = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev, - &resp->pd_handle, cmd->ctx_handle); + rc = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev, + &resp->pd_handle, cmd->ctx_handle); - pr_dbg("ret=%d\n", resp->hdr.err); - return resp->hdr.err; + return rc; } static int destroy_pd(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -209,10 +212,9 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_create_mr_resp *resp = &rsp->create_mr_resp; PCIDevice *pci_dev = PCI_DEVICE(dev); void *host_virt = NULL; + int rc = 0; memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_MR_RESP; pr_dbg("pd_handle=%d\n", cmd->pd_handle); pr_dbg("access_flags=0x%x\n", cmd->access_flags); @@ -223,22 +225,18 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req, cmd->length); if (!host_virt) { pr_dbg("Failed to map to pdir\n"); - resp->hdr.err = -EINVAL; - goto out; + return -EINVAL; } } - resp->hdr.err = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle, - cmd->start, cmd->length, host_virt, - cmd->access_flags, &resp->mr_handle, - &resp->lkey, &resp->rkey); - if (host_virt && !resp->hdr.err) { + rc = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle, cmd->start, + cmd->length, host_virt, cmd->access_flags, + &resp->mr_handle, &resp->lkey, &resp->rkey); + if (rc && host_virt) { munmap(host_virt, cmd->length); } -out: - pr_dbg("ret=%d\n", resp->hdr.err); - return resp->hdr.err; + return rc; } static int destroy_mr(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -261,6 +259,11 @@ static int create_cq_ring(PCIDevice *pci_dev , PvrdmaRing **ring, int rc = -EINVAL; char ring_name[MAX_RING_NAME_SZ]; + if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) { + pr_dbg("invalid nchunks: %d\n", nchunks); + return rc; + } + pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma); dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { @@ -310,34 +313,43 @@ out: return rc; } +static void destroy_cq_ring(PvrdmaRing *ring) +{ + pvrdma_ring_free(ring); + /* ring_state was in slot 1, not 0 so need to jump back */ + rdma_pci_dma_unmap(ring->dev, --ring->ring_state, TARGET_PAGE_SIZE); + g_free(ring); +} + static int create_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_create_cq *cmd = &req->create_cq; struct pvrdma_cmd_create_cq_resp *resp = &rsp->create_cq_resp; PvrdmaRing *ring = NULL; + int rc; memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_CQ_RESP; resp->cqe = cmd->cqe; - resp->hdr.err = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, - cmd->nchunks, cmd->cqe); - if (resp->hdr.err) { - goto out; + rc = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, cmd->nchunks, + cmd->cqe); + if (rc) { + return rc; } pr_dbg("ring=%p\n", ring); - resp->hdr.err = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev, - cmd->cqe, &resp->cq_handle, ring); + rc = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev, cmd->cqe, + &resp->cq_handle, ring); + if (rc) { + destroy_cq_ring(ring); + } + resp->cqe = cmd->cqe; -out: - pr_dbg("ret=%d\n", resp->hdr.err); - return resp->hdr.err; + return rc; } static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -356,10 +368,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, } ring = (PvrdmaRing *)cq->opaque; - pvrdma_ring_free(ring); - /* ring_state was in slot 1, not 0 so need to jump back */ - rdma_pci_dma_unmap(PCI_DEVICE(dev), --ring->ring_state, TARGET_PAGE_SIZE); - g_free(ring); + destroy_cq_ring(ring); rdma_rm_dealloc_cq(&dev->rdma_dev_res, cmd->cq_handle); @@ -377,6 +386,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, char ring_name[MAX_RING_NAME_SZ]; uint32_t wqe_sz; + if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES + || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) { + pr_dbg("invalid pages: %d, %d\n", spages, rpages); + return rc; + } + pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma); dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { @@ -451,36 +466,49 @@ out: return rc; } +static void destroy_qp_rings(PvrdmaRing *ring) +{ + pr_dbg("sring=%p\n", &ring[0]); + pvrdma_ring_free(&ring[0]); + pr_dbg("rring=%p\n", &ring[1]); + pvrdma_ring_free(&ring[1]); + + rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); + g_free(ring); +} + static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_create_qp *cmd = &req->create_qp; struct pvrdma_cmd_create_qp_resp *resp = &rsp->create_qp_resp; PvrdmaRing *rings = NULL; + int rc; memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_QP_RESP; pr_dbg("total_chunks=%d\n", cmd->total_chunks); pr_dbg("send_chunks=%d\n", cmd->send_chunks); - resp->hdr.err = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings, - cmd->max_send_wr, cmd->max_send_sge, - cmd->send_chunks, cmd->max_recv_wr, - cmd->max_recv_sge, cmd->total_chunks - - cmd->send_chunks - 1); - if (resp->hdr.err) { - goto out; + rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings, + cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks, + cmd->max_recv_wr, cmd->max_recv_sge, + cmd->total_chunks - cmd->send_chunks - 1); + if (rc) { + return rc; } pr_dbg("rings=%p\n", rings); - resp->hdr.err = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle, - cmd->qp_type, cmd->max_send_wr, - cmd->max_send_sge, cmd->send_cq_handle, - cmd->max_recv_wr, cmd->max_recv_sge, - cmd->recv_cq_handle, rings, &resp->qpn); + rc = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle, cmd->qp_type, + cmd->max_send_wr, cmd->max_send_sge, + cmd->send_cq_handle, cmd->max_recv_wr, + cmd->max_recv_sge, cmd->recv_cq_handle, rings, + &resp->qpn); + if (rc) { + destroy_qp_rings(rings); + return rc; + } resp->max_send_wr = cmd->max_send_wr; resp->max_recv_wr = cmd->max_recv_wr; @@ -488,32 +516,31 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, resp->max_recv_sge = cmd->max_recv_sge; resp->max_inline_data = cmd->max_inline_data; -out: - pr_dbg("ret=%d\n", resp->hdr.err); - return resp->hdr.err; + return 0; } static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_modify_qp *cmd = &req->modify_qp; + int rc; pr_dbg("qp_handle=%d\n", cmd->qp_handle); memset(rsp, 0, sizeof(*rsp)); - rsp->hdr.response = cmd->hdr.response; - rsp->hdr.ack = PVRDMA_CMD_MODIFY_QP_RESP; - - rsp->hdr.err = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev, - cmd->qp_handle, cmd->attr_mask, - (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid, - cmd->attrs.dest_qp_num, - (enum ibv_qp_state)cmd->attrs.qp_state, - cmd->attrs.qkey, cmd->attrs.rq_psn, - cmd->attrs.sq_psn); - - pr_dbg("ret=%d\n", rsp->hdr.err); - return rsp->hdr.err; + + /* No need to verify sgid_index since it is u8 */ + + rc = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev, + cmd->qp_handle, cmd->attr_mask, + cmd->attrs.ah_attr.grh.sgid_index, + (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid, + cmd->attrs.dest_qp_num, + (enum ibv_qp_state)cmd->attrs.qp_state, + cmd->attrs.qkey, cmd->attrs.rq_psn, + cmd->attrs.sq_psn); + + return rc; } static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -522,21 +549,18 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_query_qp *cmd = &req->query_qp; struct pvrdma_cmd_query_qp_resp *resp = &rsp->query_qp_resp; struct ibv_qp_init_attr init_attr; + int rc; pr_dbg("qp_handle=%d\n", cmd->qp_handle); pr_dbg("attr_mask=0x%x\n", cmd->attr_mask); memset(rsp, 0, sizeof(*rsp)); - rsp->hdr.response = cmd->hdr.response; - rsp->hdr.ack = PVRDMA_CMD_QUERY_QP_RESP; - rsp->hdr.err = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, - cmd->qp_handle, - (struct ibv_qp_attr *)&resp->attrs, - cmd->attr_mask, &init_attr); + rc = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle, + (struct ibv_qp_attr *)&resp->attrs, cmd->attr_mask, + &init_attr); - pr_dbg("ret=%d\n", rsp->hdr.err); - return rsp->hdr.err; + return rc; } static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -555,13 +579,7 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle); ring = (PvrdmaRing *)qp->opaque; - pr_dbg("sring=%p\n", &ring[0]); - pvrdma_ring_free(&ring[0]); - pr_dbg("rring=%p\n", &ring[1]); - pvrdma_ring_free(&ring[1]); - - rdma_pci_dma_unmap(PCI_DEVICE(dev), ring->ring_state, TARGET_PAGE_SIZE); - g_free(ring); + destroy_qp_rings(ring); return 0; } @@ -570,10 +588,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_create_bind *cmd = &req->create_bind; -#ifdef PVRDMA_DEBUG - __be64 *subnet = (__be64 *)&cmd->new_gid[0]; - __be64 *if_id = (__be64 *)&cmd->new_gid[8]; -#endif + int rc; + union ibv_gid *gid = (union ibv_gid *)&cmd->new_gid; pr_dbg("index=%d\n", cmd->index); @@ -582,26 +598,20 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, } pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index, - (long long unsigned int)be64_to_cpu(*subnet), - (long long unsigned int)be64_to_cpu(*if_id)); - - /* Driver forces to one port only */ - memcpy(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, &cmd->new_gid, - sizeof(cmd->new_gid)); + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); - /* TODO: Since drivers stores node_guid at load_dsr phase then this - * assignment is not relevant, i need to figure out a way how to - * retrieve MAC of our netdev */ - dev->node_guid = dev->rdma_dev_res.ports[0].gid_tbl[0].global.interface_id; - pr_dbg("dev->node_guid=0x%llx\n", - (long long unsigned int)be64_to_cpu(dev->node_guid)); + rc = rdma_rm_add_gid(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name, gid, cmd->index); - return 0; + return rc; } static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { + int rc; + struct pvrdma_cmd_destroy_bind *cmd = &req->destroy_bind; pr_dbg("index=%d\n", cmd->index); @@ -610,10 +620,10 @@ static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, return -EINVAL; } - memset(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, 0, - sizeof(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw)); + rc = rdma_rm_del_gid(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name, cmd->index); - return 0; + return rc; } static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -621,18 +631,14 @@ static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_create_uc *cmd = &req->create_uc; struct pvrdma_cmd_create_uc_resp *resp = &rsp->create_uc_resp; + int rc; pr_dbg("pfn=%d\n", cmd->pfn); memset(resp, 0, sizeof(*resp)); - resp->hdr.response = cmd->hdr.response; - resp->hdr.ack = PVRDMA_CMD_CREATE_UC_RESP; - resp->hdr.err = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn, - &resp->ctx_handle); - - pr_dbg("ret=%d\n", resp->hdr.err); + rc = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn, &resp->ctx_handle); - return 0; + return rc; } static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, @@ -646,30 +652,32 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, return 0; } + struct cmd_handler { uint32_t cmd; + uint32_t ack; int (*exec)(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp); }; static struct cmd_handler cmd_handlers[] = { - {PVRDMA_CMD_QUERY_PORT, query_port}, - {PVRDMA_CMD_QUERY_PKEY, query_pkey}, - {PVRDMA_CMD_CREATE_PD, create_pd}, - {PVRDMA_CMD_DESTROY_PD, destroy_pd}, - {PVRDMA_CMD_CREATE_MR, create_mr}, - {PVRDMA_CMD_DESTROY_MR, destroy_mr}, - {PVRDMA_CMD_CREATE_CQ, create_cq}, - {PVRDMA_CMD_RESIZE_CQ, NULL}, - {PVRDMA_CMD_DESTROY_CQ, destroy_cq}, - {PVRDMA_CMD_CREATE_QP, create_qp}, - {PVRDMA_CMD_MODIFY_QP, modify_qp}, - {PVRDMA_CMD_QUERY_QP, query_qp}, - {PVRDMA_CMD_DESTROY_QP, destroy_qp}, - {PVRDMA_CMD_CREATE_UC, create_uc}, - {PVRDMA_CMD_DESTROY_UC, destroy_uc}, - {PVRDMA_CMD_CREATE_BIND, create_bind}, - {PVRDMA_CMD_DESTROY_BIND, destroy_bind}, + {PVRDMA_CMD_QUERY_PORT, PVRDMA_CMD_QUERY_PORT_RESP, query_port}, + {PVRDMA_CMD_QUERY_PKEY, PVRDMA_CMD_QUERY_PKEY_RESP, query_pkey}, + {PVRDMA_CMD_CREATE_PD, PVRDMA_CMD_CREATE_PD_RESP, create_pd}, + {PVRDMA_CMD_DESTROY_PD, PVRDMA_CMD_DESTROY_PD_RESP_NOOP, destroy_pd}, + {PVRDMA_CMD_CREATE_MR, PVRDMA_CMD_CREATE_MR_RESP, create_mr}, + {PVRDMA_CMD_DESTROY_MR, PVRDMA_CMD_DESTROY_MR_RESP_NOOP, destroy_mr}, + {PVRDMA_CMD_CREATE_CQ, PVRDMA_CMD_CREATE_CQ_RESP, create_cq}, + {PVRDMA_CMD_RESIZE_CQ, PVRDMA_CMD_RESIZE_CQ_RESP, NULL}, + {PVRDMA_CMD_DESTROY_CQ, PVRDMA_CMD_DESTROY_CQ_RESP_NOOP, destroy_cq}, + {PVRDMA_CMD_CREATE_QP, PVRDMA_CMD_CREATE_QP_RESP, create_qp}, + {PVRDMA_CMD_MODIFY_QP, PVRDMA_CMD_MODIFY_QP_RESP, modify_qp}, + {PVRDMA_CMD_QUERY_QP, PVRDMA_CMD_QUERY_QP_RESP, query_qp}, + {PVRDMA_CMD_DESTROY_QP, PVRDMA_CMD_DESTROY_QP_RESP, destroy_qp}, + {PVRDMA_CMD_CREATE_UC, PVRDMA_CMD_CREATE_UC_RESP, create_uc}, + {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc}, + {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind}, + {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind}, }; int execute_command(PVRDMADev *dev) @@ -692,7 +700,12 @@ int execute_command(PVRDMADev *dev) } err = cmd_handlers[dsr_info->req->hdr.cmd].exec(dev, dsr_info->req, - dsr_info->rsp); + dsr_info->rsp); + dsr_info->rsp->hdr.response = dsr_info->req->hdr.response; + dsr_info->rsp->hdr.ack = cmd_handlers[dsr_info->req->hdr.cmd].ack; + dsr_info->rsp->hdr.err = err < 0 ? -err : 0; + pr_dbg("rsp->hdr.err=%d\n", dsr_info->rsp->hdr.err); + out: set_reg_val(dev, PVRDMA_REG_ERR, err); post_interrupt(dev, INTR_VEC_CMD_RING); diff --git a/hw/rdma/vmw/pvrdma_dev_ring.c b/hw/rdma/vmw/pvrdma_dev_ring.c index 01247fc041..e8e5b502f6 100644 --- a/hw/rdma/vmw/pvrdma_dev_ring.c +++ b/hw/rdma/vmw/pvrdma_dev_ring.c @@ -73,23 +73,16 @@ out: void *pvrdma_ring_next_elem_read(PvrdmaRing *ring) { + int e; unsigned int idx = 0, offset; - /* - pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail, - ring->ring_state->cons_head); - */ - - if (!pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx)) { + e = pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx); + if (e <= 0) { pr_dbg("No more data in ring\n"); return NULL; } offset = idx * ring->elem_sz; - /* - pr_dbg("idx=%d\n", idx); - pr_dbg("offset=%d\n", offset); - */ return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE); } @@ -105,20 +98,20 @@ void pvrdma_ring_read_inc(PvrdmaRing *ring) void *pvrdma_ring_next_elem_write(PvrdmaRing *ring) { - unsigned int idx, offset, tail; + int idx; + unsigned int offset, tail; - /* - pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail, - ring->ring_state->cons_head); - */ - - if (!pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail)) { + idx = pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail); + if (idx <= 0) { pr_dbg("CQ is full\n"); return NULL; } idx = pvrdma_idx(&ring->ring_state->prod_tail, ring->max_elems); - /* TODO: tail == idx */ + if (idx < 0 || tail != idx) { + pr_dbg("invalid idx\n"); + return NULL; + } offset = idx * ring->elem_sz; return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE); diff --git a/hw/rdma/vmw/pvrdma_dev_ring.h b/hw/rdma/vmw/pvrdma_dev_ring.h index 411d244603..5f2a0cf9b9 100644 --- a/hw/rdma/vmw/pvrdma_dev_ring.h +++ b/hw/rdma/vmw/pvrdma_dev_ring.h @@ -16,7 +16,6 @@ #ifndef PVRDMA_DEV_RING_H #define PVRDMA_DEV_RING_H -#include "qemu/typedefs.h" #define MAX_RING_NAME_SZ 32 diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index ca5fa8d981..838ad8a949 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -24,6 +24,7 @@ #include "hw/qdev-properties.h" #include "cpu.h" #include "trace.h" +#include "sysemu/sysemu.h" #include "../rdma_rm.h" #include "../rdma_backend.h" @@ -36,9 +37,9 @@ #include "pvrdma_qp_ops.h" static Property pvrdma_dev_properties[] = { - DEFINE_PROP_STRING("backend-dev", PVRDMADev, backend_device_name), - DEFINE_PROP_UINT8("backend-port", PVRDMADev, backend_port_num, 1), - DEFINE_PROP_UINT8("backend-gid-idx", PVRDMADev, backend_gid_idx, 0), + DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name), + DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name), + DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1), DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size, MAX_MR_SIZE), DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP), @@ -51,6 +52,7 @@ static Property pvrdma_dev_properties[] = { DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), + DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), DEFINE_PROP_END_OF_LIST(), }; @@ -263,7 +265,7 @@ static void init_dsr_dev_caps(PVRDMADev *dev) dsr->caps.sys_image_guid = 0; pr_dbg("sys_image_guid=%" PRIx64 "\n", dsr->caps.sys_image_guid); - dsr->caps.node_guid = cpu_to_be64(dev->node_guid); + dsr->caps.node_guid = dev->node_guid; pr_dbg("node_guid=%" PRIx64 "\n", be64_to_cpu(dsr->caps.node_guid)); dsr->caps.phys_port_cnt = MAX_PORTS; @@ -275,17 +277,6 @@ static void init_dsr_dev_caps(PVRDMADev *dev) pr_dbg("Initialized\n"); } -static void init_ports(PVRDMADev *dev, Error **errp) -{ - int i; - - memset(dev->rdma_dev_res.ports, 0, sizeof(dev->rdma_dev_res.ports)); - - for (i = 0; i < MAX_PORTS; i++) { - dev->rdma_dev_res.ports[i].state = IBV_PORT_DOWN; - } -} - static void uninit_msix(PCIDevice *pdev, int used_vectors) { PVRDMADev *dev = PVRDMA_DEV(pdev); @@ -334,7 +325,8 @@ static void pvrdma_fini(PCIDevice *pdev) pvrdma_qp_ops_fini(); - rdma_rm_fini(&dev->rdma_dev_res); + rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name); rdma_backend_fini(&dev->backend_dev); @@ -343,6 +335,9 @@ static void pvrdma_fini(PCIDevice *pdev) if (msix_enabled(pdev)) { uninit_msix(pdev, RDMA_MAX_INTRS); } + + pr_dbg("Device %s %x.%x is down\n", pdev->name, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); } static void pvrdma_stop(PVRDMADev *dev) @@ -368,13 +363,11 @@ static int unquiesce_device(PVRDMADev *dev) return 0; } -static int reset_device(PVRDMADev *dev) +static void reset_device(PVRDMADev *dev) { pvrdma_stop(dev); pr_dbg("Device reset complete\n"); - - return 0; } static uint64_t regs_read(void *opaque, hwaddr addr, unsigned size) @@ -455,6 +448,11 @@ static const MemoryRegionOps regs_ops = { }, }; +static uint64_t uar_read(void *opaque, hwaddr addr, unsigned size) +{ + return 0xffffffff; +} + static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { PVRDMADev *dev = opaque; @@ -496,6 +494,7 @@ static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) } static const MemoryRegionOps uar_ops = { + .read = uar_read, .write = uar_write, .endianness = DEVICE_LITTLE_ENDIAN, .impl = { @@ -570,12 +569,21 @@ static int pvrdma_check_ram_shared(Object *obj, void *opaque) return 0; } +static void pvrdma_shutdown_notifier(Notifier *n, void *opaque) +{ + PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier); + PCIDevice *pci_dev = PCI_DEVICE(dev); + + pvrdma_fini(pci_dev); +} + static void pvrdma_realize(PCIDevice *pdev, Error **errp) { - int rc; + int rc = 0; PVRDMADev *dev = PVRDMA_DEV(pdev); Object *memdev_root; bool ram_shared = false; + PCIDevice *func0; init_pr_dbg(); @@ -587,6 +595,20 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) return; } + func0 = pci_get_function_0(pdev); + /* Break if not vmxnet3 device in slot 0 */ + if (strcmp(object_get_typename(&func0->qdev.parent_obj), TYPE_VMXNET3)) { + pr_dbg("func0 type is %s\n", + object_get_typename(&func0->qdev.parent_obj)); + error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn), + TYPE_VMXNET3); + return; + } + dev->func0 = VMXNET3(func0); + + addrconf_addr_eui48((unsigned char *)&dev->node_guid, + (const char *)&dev->func0->conf.macaddr.a); + memdev_root = object_resolve_path("/objects", NULL); if (memdev_root) { object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared); @@ -613,7 +635,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, dev->backend_device_name, dev->backend_port_num, - dev->backend_gid_idx, &dev->dev_attr, errp); + &dev->dev_attr, &dev->mad_chr, errp); if (rc) { goto out; } @@ -623,15 +645,17 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) goto out; } - init_ports(dev, errp); - rc = pvrdma_qp_ops_init(); if (rc) { goto out; } + dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; + qemu_register_shutdown_notifier(&dev->shutdown_notifier); + out: if (rc) { + pvrdma_fini(pdev); error_append_hint(errp, "Device fail to load\n"); } } diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index c668afd0ed..300471a4c9 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -47,7 +47,7 @@ typedef struct PvrdmaRqWqe { * 3. Interrupt host */ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, - struct pvrdma_cqe *cqe) + struct pvrdma_cqe *cqe, struct ibv_wc *wc) { struct pvrdma_cqe *cqe1; struct pvrdma_cqne *cqne; @@ -66,6 +66,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, pr_dbg("Writing CQE\n"); cqe1 = pvrdma_ring_next_elem_write(ring); if (unlikely(!cqe1)) { + pr_dbg("No CQEs in ring\n"); return -EINVAL; } @@ -73,8 +74,20 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, cqe1->wr_id = cqe->wr_id; cqe1->qp = cqe->qp; cqe1->opcode = cqe->opcode; - cqe1->status = cqe->status; - cqe1->vendor_err = cqe->vendor_err; + cqe1->status = wc->status; + cqe1->byte_len = wc->byte_len; + cqe1->src_qp = wc->src_qp; + cqe1->wc_flags = wc->wc_flags; + cqe1->vendor_err = wc->vendor_err; + + pr_dbg("wr_id=%" PRIx64 "\n", cqe1->wr_id); + pr_dbg("qp=0x%lx\n", cqe1->qp); + pr_dbg("opcode=%d\n", cqe1->opcode); + pr_dbg("status=%d\n", cqe1->status); + pr_dbg("byte_len=%d\n", cqe1->byte_len); + pr_dbg("src_qp=%d\n", cqe1->src_qp); + pr_dbg("wc_flags=%d\n", cqe1->wc_flags); + pr_dbg("vendor_err=%d\n", cqe1->vendor_err); pvrdma_ring_write_inc(ring); @@ -89,26 +102,22 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, pvrdma_ring_write_inc(&dev->dsr_info.cq); pr_dbg("cq->notify=%d\n", cq->notify); - if (cq->notify) { - cq->notify = false; + if (cq->notify != CNT_CLEAR) { + if (cq->notify == CNT_ARM) { + cq->notify = CNT_CLEAR; + } post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q); } return 0; } -static void pvrdma_qp_ops_comp_handler(int status, unsigned int vendor_err, - void *ctx) +static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc) { CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx; - pr_dbg("cq_handle=%d\n", comp_ctx->cq_handle); - pr_dbg("wr_id=%" PRIx64 "\n", comp_ctx->cqe.wr_id); - pr_dbg("status=%d\n", status); - pr_dbg("vendor_err=0x%x\n", vendor_err); - comp_ctx->cqe.status = status; - comp_ctx->cqe.vendor_err = vendor_err; - pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe); + pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc); + g_free(ctx); } @@ -129,6 +138,8 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) RdmaRmQP *qp; PvrdmaSqWqe *wqe; PvrdmaRing *ring; + int sgid_idx; + union ibv_gid *sgid; pr_dbg("qp_handle=0x%x\n", qp_handle); @@ -152,10 +163,28 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) comp_ctx->cq_handle = qp->send_cq_handle; comp_ctx->cqe.wr_id = wqe->hdr.wr_id; comp_ctx->cqe.qp = qp_handle; - comp_ctx->cqe.opcode = wqe->hdr.opcode; + comp_ctx->cqe.opcode = IBV_WC_SEND; + + sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index); + if (!sgid) { + pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index); + return -EIO; + } + pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index, + sgid->global.interface_id); + + sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res, + &dev->backend_dev, + wqe->hdr.wr.ud.av.gid_index); + if (sgid_idx <= 0) { + pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", + wqe->hdr.wr.ud.av.gid_index); + return -EIO; + } rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type, (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, + sgid_idx, sgid, (union ibv_gid *)wqe->hdr.wr.ud.av.dgid, wqe->hdr.wr.ud.remote_qpn, wqe->hdr.wr.ud.remote_qkey, comp_ctx); @@ -194,8 +223,9 @@ int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) comp_ctx = g_malloc(sizeof(CompHandlerCtx)); comp_ctx->dev = dev; comp_ctx->cq_handle = qp->recv_cq_handle; - comp_ctx->cqe.qp = qp_handle; comp_ctx->cqe.wr_id = wqe->hdr.wr_id; + comp_ctx->cqe.qp = qp_handle; + comp_ctx->cqe.opcode = IBV_WC_RECV; rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res, &qp->backend_qp, qp->qp_type, |