diff options
Diffstat (limited to 'net/vhost-vdpa.c')
| -rw-r--r-- | net/vhost-vdpa.c | 267 |
1 files changed, 199 insertions, 68 deletions
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 37cdc84562..e19ab063fa 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { /* The device always have SVQ enabled */ bool always_svq; + + /* The device can isolate CVQ in its own ASID */ + bool cvq_isolated; + bool started; } VhostVDPAState; @@ -54,6 +58,7 @@ const int vdpa_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, VIRTIO_NET_F_GSO, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, @@ -85,6 +90,7 @@ const int vdpa_feature_bits[] = { static const uint64_t vdpa_svq_device_features = BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | + BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | @@ -116,6 +122,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) return s->vhost_net; } +static size_t vhost_vdpa_net_cvq_cmd_len(void) +{ + /* + * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. + * In buffer is always 1 byte, so it should fit here + */ + return sizeof(struct virtio_net_ctrl_hdr) + + 2 * sizeof(struct virtio_net_ctrl_mac) + + MAC_TABLE_ENTRIES * ETH_ALEN; +} + +static size_t vhost_vdpa_net_cvq_cmd_page_len(void) +{ + return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); +} + static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) { uint64_t invalid_dev_features = @@ -185,8 +207,16 @@ static void vhost_vdpa_cleanup(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->status); + /* + * If a peer NIC is attached, do not cleanup anything. + * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() + * when the guest is shutting down. + */ + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { + return; + } + munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); + munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); g_free(s->vhost_net); @@ -362,7 +392,8 @@ static NetClientInfo net_vhost_vdpa_info = { .check_peer_type = vhost_vdpa_check_peer_type, }; -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, + Error **errp) { struct vhost_vring_state state = { .index = vq_index, @@ -370,8 +401,8 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); if (unlikely(r < 0)) { - error_report("Cannot get VQ %u group: %s", vq_index, - g_strerror(errno)); + r = -errno; + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); return r; } @@ -422,22 +453,6 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) vhost_iova_tree_remove(tree, *map); } -static size_t vhost_vdpa_net_cvq_cmd_len(void) -{ - /* - * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. - * In buffer is always 1 byte, so it should fit here - */ - return sizeof(struct virtio_net_ctrl_hdr) + - 2 * sizeof(struct virtio_net_ctrl_mac) + - MAC_TABLE_ENTRIES * ETH_ALEN; -} - -static size_t vhost_vdpa_net_cvq_cmd_page_len(void) -{ - return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); -} - /** Map CVQ buffer. */ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, bool write) @@ -471,9 +486,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) { VhostVDPAState *s, *s0; struct vhost_vdpa *v; - uint64_t backend_features; int64_t cvq_group; - int cvq_index, r; + int r; + Error *err = NULL; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); @@ -493,41 +508,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) /* * If we early return in these cases SVQ will not be enabled. The migration * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. - * - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev - * yet. */ - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); - if (unlikely(r < 0)) { - error_report("Cannot get vdpa backend_features: %s(%d)", - g_strerror(errno), errno); - return -1; + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + return 0; } - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + + if (!s->cvq_isolated) { return 0; } - /* - * Check if all the virtqueues of the virtio device are in a different vq - * than the last vq. VQ group of last group passed in cvq_group. - */ - cvq_index = v->dev->vq_index_end - 1; - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, + v->dev->vq_index_end - 1, + &err); if (unlikely(cvq_group < 0)) { + error_report_err(err); return cvq_group; } - for (int i = 0; i < cvq_index; ++i) { - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); - - if (unlikely(group < 0)) { - return group; - } - - if (group == cvq_group) { - return 0; - } - } r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); if (unlikely(r < 0)) { @@ -643,8 +639,7 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) { - uint64_t features = n->parent_obj.guest_features; - if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { + if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) { ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET, n->mac, sizeof(n->mac)); @@ -662,10 +657,9 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n) { struct virtio_net_ctrl_mq mq; - uint64_t features = n->parent_obj.guest_features; ssize_t dev_written; - if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { + if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) { return 0; } @@ -680,6 +674,44 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, return *s->status != VIRTIO_NET_OK; } +static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, + const VirtIONet *n) +{ + uint64_t offloads; + ssize_t dev_written; + + if (!virtio_vdev_has_feature(&n->parent_obj, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { + return 0; + } + + if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) { + /* + * According to VirtIO standard, "Upon feature negotiation + * corresponding offload gets enabled to preserve + * backward compatibility.". + * + * Therefore, there is no need to send this CVQ command if the + * driver also enables all supported offloads, which aligns with + * the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + return 0; + } + + offloads = cpu_to_le64(n->curr_guest_offloads); + dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS, + VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, + &offloads, sizeof(offloads)); + if (unlikely(dev_written < 0)) { + return dev_written; + } + + return *s->status != VIRTIO_NET_OK; +} + static int vhost_vdpa_net_load(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); @@ -702,6 +734,10 @@ static int vhost_vdpa_net_load(NetClientState *nc) if (unlikely(r)) { return r; } + r = vhost_vdpa_net_load_offloads(s, n); + if (unlikely(r)) { + return r; + } return 0; } @@ -766,7 +802,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, } if (*s->status != VIRTIO_NET_OK) { - return VIRTIO_NET_ERR; + goto out; } status = VIRTIO_NET_ERR; @@ -790,6 +826,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { .avail_handler = vhost_vdpa_net_handle_ctrl_avail, }; +/** + * Probe if CVQ is isolated + * + * @device_fd The vdpa device fd + * @features Features offered by the device. + * @cvq_index The control vq pair index + * + * Returns <0 in case of failure, 0 if false and 1 if true. + */ +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, + int cvq_index, Error **errp) +{ + uint64_t backend_features; + int64_t cvq_group; + uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER | + VIRTIO_CONFIG_S_FEATURES_OK; + int r; + + ERRP_GUARD(); + + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); + if (unlikely(r < 0)) { + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); + return r; + } + + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { + return 0; + } + + r = ioctl(device_fd, VHOST_SET_FEATURES, &features); + if (unlikely(r)) { + error_setg_errno(errp, errno, "Cannot set features"); + } + + r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); + if (unlikely(r)) { + error_setg_errno(errp, -r, "Cannot set device features"); + goto out; + } + + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); + if (unlikely(cvq_group < 0)) { + if (cvq_group != -ENOTSUP) { + r = cvq_group; + goto out; + } + + /* + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend + * support ASID even if the parent driver does not. The CVQ cannot be + * isolated in this case. + */ + error_free(*errp); + *errp = NULL; + r = 0; + goto out; + } + + for (int i = 0; i < cvq_index; ++i) { + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); + if (unlikely(group < 0)) { + r = group; + goto out; + } + + if (group == (int64_t)cvq_group) { + r = 0; + goto out; + } + } + + r = 1; + +out: + status = 0; + ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); + return r; +} + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, const char *device, const char *name, @@ -799,16 +916,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, bool is_datapath, bool svq, struct vhost_vdpa_iova_range iova_range, - uint64_t features) + uint64_t features, + Error **errp) { NetClientState *nc = NULL; VhostVDPAState *s; int ret = 0; assert(name); + int cvq_isolated; + if (is_datapath) { nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name); } else { + cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, + queue_pair_index * 2, + errp); + if (unlikely(cvq_isolated < 0)) { + return NULL; + } + nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, device, name); } @@ -826,24 +953,28 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, vhost_vdpa_net_valid_svq_features(features, &s->vhost_vdpa.migration_blocker); } else if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); - memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); - s->status = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); - memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); + s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; + s->cvq_isolated = cvq_isolated; /* - * TODO: We cannot migrate devices with CVQ as there is no way to set - * the device state (MAC, MQ, etc) before starting the datapath. + * TODO: We cannot migrate devices with CVQ and no x-svq enabled as + * there is no way to set the device state (MAC, MQ, etc) before + * starting the datapath. * * Migration blocker ownership now belongs to s->vhost_vdpa. */ - error_setg(&s->vhost_vdpa.migration_blocker, - "net vdpa cannot migrate with CVQ feature"); + if (!svq) { + error_setg(&s->vhost_vdpa.migration_blocker, + "net vdpa cannot migrate with CVQ feature"); + } } ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { @@ -963,7 +1094,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, for (i = 0; i < queue_pairs; i++) { ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 2, true, opts->x_svq, - iova_range, features); + iova_range, features, errp); if (!ncs[i]) goto err; } @@ -971,7 +1102,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, if (has_cvq) { nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 1, false, - opts->x_svq, iova_range, features); + opts->x_svq, iova_range, features, errp); if (!nc) goto err; } |