diff options
Diffstat (limited to 'hw/virtio')
| -rw-r--r-- | hw/virtio/trace-events | 10 | ||||
| -rw-r--r-- | hw/virtio/vhost-stub.c | 4 | ||||
| -rw-r--r-- | hw/virtio/vhost-user.c | 163 | ||||
| -rw-r--r-- | hw/virtio/vhost.c | 96 | ||||
| -rw-r--r-- | hw/virtio/virtio-balloon.c | 78 |
5 files changed, 320 insertions, 31 deletions
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 07bcbe9e85..60c649c4bc 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -1,6 +1,6 @@ # See docs/devel/tracing.txt for syntax documentation. -# hw/virtio/vhost.c +# vhost.c vhost_commit(bool started, bool changed) "Started: %d Changed: %d" vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64 @@ -8,7 +8,7 @@ vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, vhost_section(const char *name, int r) "%s:%d" vhost_iotlb_miss(void *dev, int step) "%p step %d" -# hw/virtio/vhost-user.c +# vhost-user.c vhost_user_postcopy_end_entry(void) "" vhost_user_postcopy_end_exit(void) "" vhost_user_postcopy_fault_handler(const char *name, uint64_t fault_address, int nregions) "%s: @0x%"PRIx64" nregions:%d" @@ -21,7 +21,7 @@ vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64 vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 -# hw/virtio/virtio.c +# virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" virtqueue_flush(void *vq, unsigned int count) "vq %p count %u" @@ -31,7 +31,7 @@ virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p" virtio_notify(void *vdev, void *vq) "vdev %p vq %p" virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u" -# hw/virtio/virtio-rng.c +# virtio-rng.c virtio_rng_guest_not_ready(void *rng) "rng %p: guest not ready" virtio_rng_cpu_is_stopped(void *rng, int size) "rng %p: cpu is stopped, dropping %d bytes" virtio_rng_popped(void *rng) "rng %p: elem popped" @@ -39,7 +39,7 @@ virtio_rng_pushed(void *rng, size_t len) "rng %p: %zd bytes pushed" virtio_rng_request(void *rng, size_t size, unsigned quota) "rng %p: %zd bytes requested, %u bytes quota left" virtio_rng_vm_state_change(void *rng, int running, int state) "rng %p: state change to running %d state %d" -# hw/virtio/virtio-balloon.c +# virtio-balloon.c # virtio_balloon_bad_addr(uint64_t gpa) "0x%"PRIx64 virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s gpa: 0x%"PRIx64 diff --git a/hw/virtio/vhost-stub.c b/hw/virtio/vhost-stub.c index 049089b5e2..c175148fce 100644 --- a/hw/virtio/vhost-stub.c +++ b/hw/virtio/vhost-stub.c @@ -7,9 +7,9 @@ bool vhost_has_free_slot(void) return true; } -VhostUserState *vhost_user_init(void) +bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) { - return NULL; + return false; } void vhost_user_cleanup(VhostUserState *user) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 0d6c64e5ca..553319c7ac 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -56,6 +56,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_CONFIG = 9, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_MAX }; @@ -93,6 +94,8 @@ typedef enum VhostUserRequest { VHOST_USER_POSTCOPY_ADVISE = 28, VHOST_USER_POSTCOPY_LISTEN = 29, VHOST_USER_POSTCOPY_END = 30, + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, VHOST_USER_MAX } VhostUserRequest; @@ -151,6 +154,13 @@ typedef struct VhostUserVringArea { uint64_t offset; } VhostUserVringArea; +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + typedef struct { VhostUserRequest request; @@ -173,6 +183,7 @@ typedef union { VhostUserConfig config; VhostUserCryptoSession session; VhostUserVringArea area; + VhostUserInflight inflight; } VhostUserPayload; typedef struct VhostUserMsg { @@ -214,7 +225,7 @@ static bool ioeventfd_enabled(void) return !kvm_enabled() || kvm_eventfds_enabled(); } -static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) { struct vhost_user *u = dev->opaque; CharBackend *chr = u->user->chr; @@ -225,7 +236,7 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) if (r != size) { error_report("Failed to read msg header. Read %d instead of %d." " Original request %d.", r, size, msg->hdr.request); - goto fail; + return -1; } /* validate received flags */ @@ -233,7 +244,21 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) error_report("Failed to read msg header." " Flags 0x%x instead of 0x%x.", msg->hdr.flags, VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); - goto fail; + return -1; + } + + return 0; +} + +static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +{ + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->user->chr; + uint8_t *p = (uint8_t *) msg; + int r, size; + + if (vhost_user_read_header(dev, msg) < 0) { + return -1; } /* validate message size is sane */ @@ -241,7 +266,7 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) error_report("Failed to read msg header." " Size %d exceeds the maximum %zu.", msg->hdr.size, VHOST_USER_PAYLOAD_SIZE); - goto fail; + return -1; } if (msg->hdr.size) { @@ -251,14 +276,11 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) if (r != size) { error_report("Failed to read msg payload." " Read %d instead of %d.", r, msg->hdr.size); - goto fail; + return -1; } } return 0; - -fail: - return -1; } static int process_message_reply(struct vhost_dev *dev, @@ -968,7 +990,10 @@ static void slave_read(void *opaque) iov.iov_base = &hdr; iov.iov_len = VHOST_USER_HDR_SIZE; - size = recvmsg(u->slave_fd, &msgh, 0); + do { + size = recvmsg(u->slave_fd, &msgh, 0); + } while (size < 0 && (errno == EINTR || errno == EAGAIN)); + if (size != VHOST_USER_HDR_SIZE) { error_report("Failed to read from slave."); goto err; @@ -997,7 +1022,10 @@ static void slave_read(void *opaque) } /* Read payload */ - size = read(u->slave_fd, &payload, hdr.size); + do { + size = read(u->slave_fd, &payload, hdr.size); + } while (size < 0 && (errno == EINTR || errno == EAGAIN)); + if (size != hdr.size) { error_report("Failed to read payload from slave."); goto err; @@ -1045,7 +1073,10 @@ static void slave_read(void *opaque) iovec[1].iov_base = &payload; iovec[1].iov_len = hdr.size; - size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); + do { + size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); + } while (size < 0 && (errno == EINTR || errno == EAGAIN)); + if (size != VHOST_USER_HDR_SIZE + hdr.size) { error_report("Failed to send msg reply to slave."); goto err; @@ -1750,17 +1781,118 @@ static bool vhost_user_mem_section_filter(struct vhost_dev *dev, return result; } -VhostUserState *vhost_user_init(void) +static int vhost_user_get_inflight_fd(struct vhost_dev *dev, + uint16_t queue_size, + struct vhost_inflight *inflight) +{ + void *addr; + int fd; + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->user->chr; + VhostUserMsg msg = { + .hdr.request = VHOST_USER_GET_INFLIGHT_FD, + .hdr.flags = VHOST_USER_VERSION, + .payload.inflight.num_queues = dev->nvqs, + .payload.inflight.queue_size = queue_size, + .hdr.size = sizeof(msg.payload.inflight), + }; + + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { + return 0; + } + + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } + + if (vhost_user_read(dev, &msg) < 0) { + return -1; + } + + if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { + error_report("Received unexpected msg type. " + "Expected %d received %d", + VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); + return -1; + } + + if (msg.hdr.size != sizeof(msg.payload.inflight)) { + error_report("Received bad msg size."); + return -1; + } + + if (!msg.payload.inflight.mmap_size) { + return 0; + } + + fd = qemu_chr_fe_get_msgfd(chr); + if (fd < 0) { + error_report("Failed to get mem fd"); + return -1; + } + + addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, msg.payload.inflight.mmap_offset); + + if (addr == MAP_FAILED) { + error_report("Failed to mmap mem fd"); + close(fd); + return -1; + } + + inflight->addr = addr; + inflight->fd = fd; + inflight->size = msg.payload.inflight.mmap_size; + inflight->offset = msg.payload.inflight.mmap_offset; + inflight->queue_size = queue_size; + + return 0; +} + +static int vhost_user_set_inflight_fd(struct vhost_dev *dev, + struct vhost_inflight *inflight) { - VhostUserState *user = g_new0(struct VhostUserState, 1); + VhostUserMsg msg = { + .hdr.request = VHOST_USER_SET_INFLIGHT_FD, + .hdr.flags = VHOST_USER_VERSION, + .payload.inflight.mmap_size = inflight->size, + .payload.inflight.mmap_offset = inflight->offset, + .payload.inflight.num_queues = dev->nvqs, + .payload.inflight.queue_size = inflight->queue_size, + .hdr.size = sizeof(msg.payload.inflight), + }; - return user; + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { + return 0; + } + + if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) { + return -1; + } + + return 0; +} + +bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) +{ + if (user->chr) { + error_setg(errp, "Cannot initialize vhost-user state"); + return false; + } + user->chr = chr; + return true; } void vhost_user_cleanup(VhostUserState *user) { int i; + if (!user->chr) { + return; + } + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { if (user->notifier[i].addr) { object_unparent(OBJECT(&user->notifier[i].mr)); @@ -1768,6 +1900,7 @@ void vhost_user_cleanup(VhostUserState *user) user->notifier[i].addr = NULL; } } + user->chr = NULL; } const VhostOps user_ops = { @@ -1801,4 +1934,6 @@ const VhostOps user_ops = { .vhost_crypto_create_session = vhost_user_crypto_create_session, .vhost_crypto_close_session = vhost_user_crypto_close_session, .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, + .vhost_get_inflight_fd = vhost_user_get_inflight_fd, + .vhost_set_inflight_fd = vhost_user_set_inflight_fd, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 311432f190..7f61018f2a 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1481,6 +1481,102 @@ void vhost_dev_set_config_notifier(struct vhost_dev *hdev, hdev->config_ops = ops; } +void vhost_dev_free_inflight(struct vhost_inflight *inflight) +{ + if (inflight->addr) { + qemu_memfd_free(inflight->addr, inflight->size, inflight->fd); + inflight->addr = NULL; + inflight->fd = -1; + } +} + +static int vhost_dev_resize_inflight(struct vhost_inflight *inflight, + uint64_t new_size) +{ + Error *err = NULL; + int fd = -1; + void *addr = qemu_memfd_alloc("vhost-inflight", new_size, + F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, + &fd, &err); + + if (err) { + error_report_err(err); + return -1; + } + + vhost_dev_free_inflight(inflight); + inflight->offset = 0; + inflight->addr = addr; + inflight->fd = fd; + inflight->size = new_size; + + return 0; +} + +void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f) +{ + if (inflight->addr) { + qemu_put_be64(f, inflight->size); + qemu_put_be16(f, inflight->queue_size); + qemu_put_buffer(f, inflight->addr, inflight->size); + } else { + qemu_put_be64(f, 0); + } +} + +int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f) +{ + uint64_t size; + + size = qemu_get_be64(f); + if (!size) { + return 0; + } + + if (inflight->size != size) { + if (vhost_dev_resize_inflight(inflight, size)) { + return -1; + } + } + inflight->queue_size = qemu_get_be16(f); + + qemu_get_buffer(f, inflight->addr, size); + + return 0; +} + +int vhost_dev_set_inflight(struct vhost_dev *dev, + struct vhost_inflight *inflight) +{ + int r; + + if (dev->vhost_ops->vhost_set_inflight_fd && inflight->addr) { + r = dev->vhost_ops->vhost_set_inflight_fd(dev, inflight); + if (r) { + VHOST_OPS_DEBUG("vhost_set_inflight_fd failed"); + return -errno; + } + } + + return 0; +} + +int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, + struct vhost_inflight *inflight) +{ + int r; + + if (dev->vhost_ops->vhost_get_inflight_fd) { + r = dev->vhost_ops->vhost_get_inflight_fd(dev, queue_size, inflight); + if (r) { + VHOST_OPS_DEBUG("vhost_get_inflight_fd failed"); + return -errno; + } + } + + return 0; +} + /* Host notifiers must be enabled at this point. */ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) { diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index e3a65940ef..2112874055 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -82,7 +82,7 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, /* We've partially ballooned part of a host page, but now * we're trying to balloon part of a different one. Too hard, * give up on the old partial page */ - free(balloon->pbp); + g_free(balloon->pbp); balloon->pbp = NULL; } @@ -107,11 +107,61 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, * has already reported them, and failing to discard a balloon * page is not fatal */ - free(balloon->pbp); + g_free(balloon->pbp); balloon->pbp = NULL; } } +static void balloon_deflate_page(VirtIOBalloon *balloon, + MemoryRegion *mr, hwaddr offset) +{ + void *addr = memory_region_get_ram_ptr(mr) + offset; + RAMBlock *rb; + size_t rb_page_size; + ram_addr_t ram_offset, host_page_base; + void *host_addr; + int ret; + + /* XXX is there a better way to get to the RAMBlock than via a + * host address? */ + rb = qemu_ram_block_from_host(addr, false, &ram_offset); + rb_page_size = qemu_ram_pagesize(rb); + host_page_base = ram_offset & ~(rb_page_size - 1); + + if (balloon->pbp + && rb == balloon->pbp->rb + && host_page_base == balloon->pbp->base) { + int subpages = rb_page_size / BALLOON_PAGE_SIZE; + + /* + * This means the guest has asked to discard some of the 4kiB + * subpages of a host page, but then changed its mind and + * asked to keep them after all. It's exceedingly unlikely + * for a guest to do this in practice, but handle it anyway, + * since getting it wrong could mean discarding memory the + * guest is still using. */ + bitmap_clear(balloon->pbp->bitmap, + (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, + subpages); + + if (bitmap_empty(balloon->pbp->bitmap, subpages)) { + g_free(balloon->pbp); + balloon->pbp = NULL; + } + } + + host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); + + /* When a page is deflated, we hint the whole host page it lives + * on, since we can't do anything smaller */ + ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED); + if (ret != 0) { + warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s", + strerror(errno)); + /* Otherwise ignore, failing to page hint shouldn't be fatal */ + } +} + static const char *balloon_stat_names[] = { [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in", [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out", @@ -315,8 +365,15 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) trace_virtio_balloon_handle_output(memory_region_name(section.mr), pa); - if (!qemu_balloon_is_inhibited() && vq != s->dvq) { - balloon_inflate_page(s, section.mr, section.offset_within_region); + if (!qemu_balloon_is_inhibited()) { + if (vq == s->ivq) { + balloon_inflate_page(s, section.mr, + section.offset_within_region); + } else if (vq == s->dvq) { + balloon_deflate_page(s, section.mr, section.offset_within_region); + } else { + g_assert_not_reached(); + } } memory_region_unref(section.mr); } @@ -391,6 +448,7 @@ static bool get_free_page_hints(VirtIOBalloon *dev) VirtQueueElement *elem; VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtQueue *vq = dev->free_page_vq; + bool ret = true; while (dev->block_iothread) { qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock); @@ -405,13 +463,12 @@ static bool get_free_page_hints(VirtIOBalloon *dev) uint32_t id; size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0, &id, sizeof(id)); - virtqueue_push(vq, elem, size); - g_free(elem); virtio_tswap32s(vdev, &id); if (unlikely(size != sizeof(id))) { virtio_error(vdev, "received an incorrect cmd id"); - return false; + ret = false; + goto out; } if (id == dev->free_page_report_cmd_id) { dev->free_page_report_status = FREE_PAGE_REPORT_S_START; @@ -431,11 +488,12 @@ static bool get_free_page_hints(VirtIOBalloon *dev) qemu_guest_free_page_hint(elem->in_sg[0].iov_base, elem->in_sg[0].iov_len); } - virtqueue_push(vq, elem, 1); - g_free(elem); } - return true; +out: + virtqueue_push(vq, elem, 1); + g_free(elem); + return ret; } static void virtio_ballloon_get_free_page_hints(void *opaque) |