diff options
Diffstat (limited to 'hw/i386/kvm/xen_xenstore.c')
| -rw-r--r-- | hw/i386/kvm/xen_xenstore.c | 500 |
1 files changed, 500 insertions, 0 deletions
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c new file mode 100644 index 0000000000..14193ef3f9 --- /dev/null +++ b/hw/i386/kvm/xen_xenstore.c @@ -0,0 +1,500 @@ +/* + * QEMU Xen emulation: Shared/overlay pages support + * + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "qemu/host-utils.h" +#include "qemu/module.h" +#include "qemu/main-loop.h" +#include "qemu/cutils.h" +#include "qapi/error.h" +#include "qom/object.h" +#include "migration/vmstate.h" + +#include "hw/sysbus.h" +#include "hw/xen/xen.h" +#include "xen_overlay.h" +#include "xen_evtchn.h" +#include "xen_xenstore.h" + +#include "sysemu/kvm.h" +#include "sysemu/kvm_xen.h" + +#include "hw/xen/interface/io/xs_wire.h" +#include "hw/xen/interface/event_channel.h" + +#define TYPE_XEN_XENSTORE "xen-xenstore" +OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE) + +#define XEN_PAGE_SHIFT 12 +#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) + +#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) +#define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t)) + +#define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg)) + +struct XenXenstoreState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + MemoryRegion xenstore_page; + struct xenstore_domain_interface *xs; + uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; + uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; + uint32_t req_offset; + uint32_t rsp_offset; + bool rsp_pending; + bool fatal_error; + + evtchn_port_t guest_port; + evtchn_port_t be_port; + struct xenevtchn_handle *eh; +}; + +struct XenXenstoreState *xen_xenstore_singleton; + +static void xen_xenstore_event(void *opaque); + +static void xen_xenstore_realize(DeviceState *dev, Error **errp) +{ + XenXenstoreState *s = XEN_XENSTORE(dev); + + if (xen_mode != XEN_EMULATE) { + error_setg(errp, "Xen xenstore support is for Xen emulation"); + return; + } + memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page", + XEN_PAGE_SIZE, &error_abort); + memory_region_set_enabled(&s->xenstore_page, true); + s->xs = memory_region_get_ram_ptr(&s->xenstore_page); + memset(s->xs, 0, XEN_PAGE_SIZE); + + /* We can't map it this early as KVM isn't ready */ + xen_xenstore_singleton = s; + + s->eh = xen_be_evtchn_open(); + if (!s->eh) { + error_setg(errp, "Xenstore evtchn port init failed"); + return; + } + aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true, + xen_xenstore_event, NULL, NULL, NULL, s); +} + +static bool xen_xenstore_is_needed(void *opaque) +{ + return xen_mode == XEN_EMULATE; +} + +static int xen_xenstore_pre_save(void *opaque) +{ + XenXenstoreState *s = opaque; + + if (s->eh) { + s->guest_port = xen_be_evtchn_get_guest_port(s->eh); + } + return 0; +} + +static int xen_xenstore_post_load(void *opaque, int ver) +{ + XenXenstoreState *s = opaque; + + /* + * As qemu/dom0, rebind to the guest's port. The Windows drivers may + * unbind the XenStore evtchn and rebind to it, having obtained the + * "remote" port through EVTCHNOP_status. In the case that migration + * occurs while it's unbound, the "remote" port needs to be the same + * as before so that the guest can find it, but should remain unbound. + */ + if (s->guest_port) { + int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, + s->guest_port); + if (be_port < 0) { + return be_port; + } + s->be_port = be_port; + } + return 0; +} + +static const VMStateDescription xen_xenstore_vmstate = { + .name = "xen_xenstore", + .version_id = 1, + .minimum_version_id = 1, + .needed = xen_xenstore_is_needed, + .pre_save = xen_xenstore_pre_save, + .post_load = xen_xenstore_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState, + sizeof_field(XenXenstoreState, req_data)), + VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState, + sizeof_field(XenXenstoreState, rsp_data)), + VMSTATE_UINT32(req_offset, XenXenstoreState), + VMSTATE_UINT32(rsp_offset, XenXenstoreState), + VMSTATE_BOOL(rsp_pending, XenXenstoreState), + VMSTATE_UINT32(guest_port, XenXenstoreState), + VMSTATE_BOOL(fatal_error, XenXenstoreState), + VMSTATE_END_OF_LIST() + } +}; + +static void xen_xenstore_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xen_xenstore_realize; + dc->vmsd = &xen_xenstore_vmstate; +} + +static const TypeInfo xen_xenstore_info = { + .name = TYPE_XEN_XENSTORE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XenXenstoreState), + .class_init = xen_xenstore_class_init, +}; + +void xen_xenstore_create(void) +{ + DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL); + + xen_xenstore_singleton = XEN_XENSTORE(dev); + + /* + * Defer the init (xen_xenstore_reset()) until KVM is set up and the + * overlay page can be mapped. + */ +} + +static void xen_xenstore_register_types(void) +{ + type_register_static(&xen_xenstore_info); +} + +type_init(xen_xenstore_register_types) + +uint16_t xen_xenstore_get_port(void) +{ + XenXenstoreState *s = xen_xenstore_singleton; + if (!s) { + return 0; + } + return s->guest_port; +} + +static bool req_pending(XenXenstoreState *s) +{ + struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; + + return s->req_offset == XENSTORE_HEADER_SIZE + req->len; +} + +static void reset_req(XenXenstoreState *s) +{ + memset(s->req_data, 0, sizeof(s->req_data)); + s->req_offset = 0; +} + +static void reset_rsp(XenXenstoreState *s) +{ + s->rsp_pending = false; + + memset(s->rsp_data, 0, sizeof(s->rsp_data)); + s->rsp_offset = 0; +} + +static void process_req(XenXenstoreState *s) +{ + struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + const char enosys[] = "ENOSYS"; + + assert(req_pending(s)); + assert(!s->rsp_pending); + + rsp->type = XS_ERROR; + rsp->req_id = req->req_id; + rsp->tx_id = req->tx_id; + rsp->len = sizeof(enosys); + memcpy((void *)&rsp[1], enosys, sizeof(enosys)); + + s->rsp_pending = true; + reset_req(s); +} + +static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr, + unsigned int len) +{ + if (!len) { + return 0; + } + + XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod); + XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons); + unsigned int copied = 0; + + /* Ensure the ring contents don't cross the req_prod access. */ + smp_rmb(); + + while (len) { + unsigned int avail = prod - cons; + unsigned int offset = MASK_XENSTORE_IDX(cons); + unsigned int copylen = avail; + + if (avail > XENSTORE_RING_SIZE) { + error_report("XenStore ring handling error"); + s->fatal_error = true; + break; + } else if (avail == 0) { + break; + } + + if (copylen > len) { + copylen = len; + } + if (copylen > XENSTORE_RING_SIZE - offset) { + copylen = XENSTORE_RING_SIZE - offset; + } + + memcpy(ptr, &s->xs->req[offset], copylen); + copied += copylen; + + ptr += copylen; + len -= copylen; + + cons += copylen; + } + + /* + * Not sure this ever mattered except on Alpha, but this barrier + * is to ensure that the update to req_cons is globally visible + * only after we have consumed all the data from the ring, and we + * don't end up seeing data written to the ring *after* the other + * end sees the update and writes more to the ring. Xen's own + * xenstored has the same barrier here (although with no comment + * at all, obviously, because it's Xen code). + */ + smp_mb(); + + qatomic_set(&s->xs->req_cons, cons); + + return copied; +} + +static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr, + unsigned int len) +{ + if (!len) { + return 0; + } + + XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons); + XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod); + unsigned int copied = 0; + + /* + * This matches the barrier in copy_to_ring() (or the guest's + * equivalent) betweem writing the data to the ring and updating + * rsp_prod. It protects against the pathological case (which + * again I think never happened except on Alpha) where our + * subsequent writes to the ring could *cross* the read of + * rsp_cons and the guest could see the new data when it was + * intending to read the old. + */ + smp_mb(); + + while (len) { + unsigned int avail = cons + XENSTORE_RING_SIZE - prod; + unsigned int offset = MASK_XENSTORE_IDX(prod); + unsigned int copylen = len; + + if (avail > XENSTORE_RING_SIZE) { + error_report("XenStore ring handling error"); + s->fatal_error = true; + break; + } else if (avail == 0) { + break; + } + + if (copylen > avail) { + copylen = avail; + } + if (copylen > XENSTORE_RING_SIZE - offset) { + copylen = XENSTORE_RING_SIZE - offset; + } + + + memcpy(&s->xs->rsp[offset], ptr, copylen); + copied += copylen; + + ptr += copylen; + len -= copylen; + + prod += copylen; + } + + /* Ensure the ring contents are seen before rsp_prod update. */ + smp_wmb(); + + qatomic_set(&s->xs->rsp_prod, prod); + + return copied; +} + +static unsigned int get_req(XenXenstoreState *s) +{ + unsigned int copied = 0; + + if (s->fatal_error) { + return 0; + } + + assert(!req_pending(s)); + + if (s->req_offset < XENSTORE_HEADER_SIZE) { + void *ptr = s->req_data + s->req_offset; + unsigned int len = XENSTORE_HEADER_SIZE; + unsigned int copylen = copy_from_ring(s, ptr, len); + + copied += copylen; + s->req_offset += copylen; + } + + if (s->req_offset >= XENSTORE_HEADER_SIZE) { + struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; + + if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) { + error_report("Illegal XenStore request"); + s->fatal_error = true; + return 0; + } + + void *ptr = s->req_data + s->req_offset; + unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset; + unsigned int copylen = copy_from_ring(s, ptr, len); + + copied += copylen; + s->req_offset += copylen; + } + + return copied; +} + +static unsigned int put_rsp(XenXenstoreState *s) +{ + if (s->fatal_error) { + return 0; + } + + assert(s->rsp_pending); + + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len); + + void *ptr = s->rsp_data + s->rsp_offset; + unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset; + unsigned int copylen = copy_to_ring(s, ptr, len); + + s->rsp_offset += copylen; + + /* Have we produced a complete response? */ + if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) { + reset_rsp(s); + } + + return copylen; +} + +static void xen_xenstore_event(void *opaque) +{ + XenXenstoreState *s = opaque; + evtchn_port_t port = xen_be_evtchn_pending(s->eh); + unsigned int copied_to, copied_from; + bool processed, notify = false; + + if (port != s->be_port) { + return; + } + + /* We know this is a no-op. */ + xen_be_evtchn_unmask(s->eh, port); + + do { + copied_to = copied_from = 0; + processed = false; + + if (s->rsp_pending) { + copied_to = put_rsp(s); + } + + if (!req_pending(s)) { + copied_from = get_req(s); + } + + if (req_pending(s) && !s->rsp_pending) { + process_req(s); + processed = true; + } + + notify |= copied_to || copied_from; + } while (copied_to || copied_from || processed); + + if (notify) { + xen_be_evtchn_notify(s->eh, s->be_port); + } +} + +static void alloc_guest_port(XenXenstoreState *s) +{ + struct evtchn_alloc_unbound alloc = { + .dom = DOMID_SELF, + .remote_dom = DOMID_QEMU, + }; + + if (!xen_evtchn_alloc_unbound_op(&alloc)) { + s->guest_port = alloc.port; + } +} + +int xen_xenstore_reset(void) +{ + XenXenstoreState *s = xen_xenstore_singleton; + int err; + + if (!s) { + return -ENOTSUP; + } + + s->req_offset = s->rsp_offset = 0; + s->rsp_pending = false; + + if (!memory_region_is_mapped(&s->xenstore_page)) { + uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS; + xen_overlay_do_map_page(&s->xenstore_page, gpa); + } + + alloc_guest_port(s); + + /* + * As qemu/dom0, bind to the guest's port. For incoming migration, this + * will be unbound as the guest's evtchn table is overwritten. We then + * rebind to the correct guest port in xen_xenstore_post_load(). + */ + err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port); + if (err < 0) { + return err; + } + s->be_port = err; + + return 0; +} |