summary refs log tree commit diff stats
path: root/hw/hyperv/hyperv.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/hyperv/hyperv.c')
-rw-r--r--hw/hyperv/hyperv.c654
1 files changed, 654 insertions, 0 deletions
diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c
new file mode 100644
index 0000000000..a28e7249d8
--- /dev/null
+++ b/hw/hyperv/hyperv.c
@@ -0,0 +1,654 @@
+/*
+ * Hyper-V guest/hypervisor interaction
+ *
+ * Copyright (c) 2015-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qapi/error.h"
+#include "exec/address-spaces.h"
+#include "sysemu/kvm.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "qemu/queue.h"
+#include "qemu/rcu.h"
+#include "qemu/rcu_queue.h"
+#include "hw/hyperv/hyperv.h"
+
+typedef struct SynICState {
+    DeviceState parent_obj;
+
+    CPUState *cs;
+
+    bool enabled;
+    hwaddr msg_page_addr;
+    hwaddr event_page_addr;
+    MemoryRegion msg_page_mr;
+    MemoryRegion event_page_mr;
+    struct hyperv_message_page *msg_page;
+    struct hyperv_event_flags_page *event_page;
+} SynICState;
+
+#define TYPE_SYNIC "hyperv-synic"
+#define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
+
+static SynICState *get_synic(CPUState *cs)
+{
+    return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
+}
+
+static void synic_update(SynICState *synic, bool enable,
+                         hwaddr msg_page_addr, hwaddr event_page_addr)
+{
+
+    synic->enabled = enable;
+    if (synic->msg_page_addr != msg_page_addr) {
+        if (synic->msg_page_addr) {
+            memory_region_del_subregion(get_system_memory(),
+                                        &synic->msg_page_mr);
+        }
+        if (msg_page_addr) {
+            memory_region_add_subregion(get_system_memory(), msg_page_addr,
+                                        &synic->msg_page_mr);
+        }
+        synic->msg_page_addr = msg_page_addr;
+    }
+    if (synic->event_page_addr != event_page_addr) {
+        if (synic->event_page_addr) {
+            memory_region_del_subregion(get_system_memory(),
+                                        &synic->event_page_mr);
+        }
+        if (event_page_addr) {
+            memory_region_add_subregion(get_system_memory(), event_page_addr,
+                                        &synic->event_page_mr);
+        }
+        synic->event_page_addr = event_page_addr;
+    }
+}
+
+void hyperv_synic_update(CPUState *cs, bool enable,
+                         hwaddr msg_page_addr, hwaddr event_page_addr)
+{
+    SynICState *synic = get_synic(cs);
+
+    if (!synic) {
+        return;
+    }
+
+    synic_update(synic, enable, msg_page_addr, event_page_addr);
+}
+
+static void synic_realize(DeviceState *dev, Error **errp)
+{
+    Object *obj = OBJECT(dev);
+    SynICState *synic = SYNIC(dev);
+    char *msgp_name, *eventp_name;
+    uint32_t vp_index;
+
+    /* memory region names have to be globally unique */
+    vp_index = hyperv_vp_index(synic->cs);
+    msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
+    eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
+
+    memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
+                           sizeof(*synic->msg_page), &error_abort);
+    memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
+                           sizeof(*synic->event_page), &error_abort);
+    synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
+    synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
+
+    g_free(msgp_name);
+    g_free(eventp_name);
+}
+static void synic_reset(DeviceState *dev)
+{
+    SynICState *synic = SYNIC(dev);
+    memset(synic->msg_page, 0, sizeof(*synic->msg_page));
+    memset(synic->event_page, 0, sizeof(*synic->event_page));
+    synic_update(synic, false, 0, 0);
+}
+
+static void synic_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = synic_realize;
+    dc->reset = synic_reset;
+    dc->user_creatable = false;
+}
+
+void hyperv_synic_add(CPUState *cs)
+{
+    Object *obj;
+    SynICState *synic;
+
+    obj = object_new(TYPE_SYNIC);
+    synic = SYNIC(obj);
+    synic->cs = cs;
+    object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
+    object_unref(obj);
+    object_property_set_bool(obj, true, "realized", &error_abort);
+}
+
+void hyperv_synic_reset(CPUState *cs)
+{
+    device_reset(DEVICE(get_synic(cs)));
+}
+
+static const TypeInfo synic_type_info = {
+    .name = TYPE_SYNIC,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(SynICState),
+    .class_init = synic_class_init,
+};
+
+static void synic_register_types(void)
+{
+    type_register_static(&synic_type_info);
+}
+
+type_init(synic_register_types)
+
+/*
+ * KVM has its own message producers (SynIC timers).  To guarantee
+ * serialization with both KVM vcpu and the guest cpu, the messages are first
+ * staged in an intermediate area and then posted to the SynIC message page in
+ * the vcpu thread.
+ */
+typedef struct HvSintStagedMessage {
+    /* message content staged by hyperv_post_msg */
+    struct hyperv_message msg;
+    /* callback + data (r/o) to complete the processing in a BH */
+    HvSintMsgCb cb;
+    void *cb_data;
+    /* message posting status filled by cpu_post_msg */
+    int status;
+    /* passing the buck: */
+    enum {
+        /* initial state */
+        HV_STAGED_MSG_FREE,
+        /*
+         * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
+         * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
+         */
+        HV_STAGED_MSG_BUSY,
+        /*
+         * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
+         * notify the guest, records the status, marks the posting done (BUSY
+         * -> POSTED), and schedules sint_msg_bh BH
+         */
+        HV_STAGED_MSG_POSTED,
+        /*
+         * sint_msg_bh (BH) verifies that the posting is done, runs the
+         * callback, and starts over (POSTED -> FREE)
+         */
+    } state;
+} HvSintStagedMessage;
+
+struct HvSintRoute {
+    uint32_t sint;
+    SynICState *synic;
+    int gsi;
+    EventNotifier sint_set_notifier;
+    EventNotifier sint_ack_notifier;
+
+    HvSintStagedMessage *staged_msg;
+
+    unsigned refcount;
+};
+
+static CPUState *hyperv_find_vcpu(uint32_t vp_index)
+{
+    CPUState *cs = qemu_get_cpu(vp_index);
+    assert(hyperv_vp_index(cs) == vp_index);
+    return cs;
+}
+
+/*
+ * BH to complete the processing of a staged message.
+ */
+static void sint_msg_bh(void *opaque)
+{
+    HvSintRoute *sint_route = opaque;
+    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
+
+    if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
+        /* status nor ready yet (spurious ack from guest?), ignore */
+        return;
+    }
+
+    staged_msg->cb(staged_msg->cb_data, staged_msg->status);
+    staged_msg->status = 0;
+
+    /* staged message processing finished, ready to start over */
+    atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
+    /* drop the reference taken in hyperv_post_msg */
+    hyperv_sint_route_unref(sint_route);
+}
+
+/*
+ * Worker to transfer the message from the staging area into the SynIC message
+ * page in vcpu context.
+ */
+static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
+{
+    HvSintRoute *sint_route = data.host_ptr;
+    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
+    SynICState *synic = sint_route->synic;
+    struct hyperv_message *dst_msg;
+    bool wait_for_sint_ack = false;
+
+    assert(staged_msg->state == HV_STAGED_MSG_BUSY);
+
+    if (!synic->enabled || !synic->msg_page_addr) {
+        staged_msg->status = -ENXIO;
+        goto posted;
+    }
+
+    dst_msg = &synic->msg_page->slot[sint_route->sint];
+
+    if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
+        dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
+        staged_msg->status = -EAGAIN;
+        wait_for_sint_ack = true;
+    } else {
+        memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
+        staged_msg->status = hyperv_sint_route_set_sint(sint_route);
+    }
+
+    memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
+
+posted:
+    atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
+    /*
+     * Notify the msg originator of the progress made; if the slot was busy we
+     * set msg_pending flag in it so it will be the guest who will do EOM and
+     * trigger the notification from KVM via sint_ack_notifier
+     */
+    if (!wait_for_sint_ack) {
+        aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
+                                sint_route);
+    }
+}
+
+/*
+ * Post a Hyper-V message to the staging area, for delivery to guest in the
+ * vcpu thread.
+ */
+int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
+{
+    HvSintStagedMessage *staged_msg = sint_route->staged_msg;
+
+    assert(staged_msg);
+
+    /* grab the staging area */
+    if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
+                       HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
+        return -EAGAIN;
+    }
+
+    memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
+
+    /* hold a reference on sint_route until the callback is finished */
+    hyperv_sint_route_ref(sint_route);
+
+    /* schedule message posting attempt in vcpu thread */
+    async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
+                     RUN_ON_CPU_HOST_PTR(sint_route));
+    return 0;
+}
+
+static void sint_ack_handler(EventNotifier *notifier)
+{
+    HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
+                                           sint_ack_notifier);
+    event_notifier_test_and_clear(notifier);
+
+    /*
+     * the guest consumed the previous message so complete the current one with
+     * -EAGAIN and let the msg originator retry
+     */
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
+}
+
+/*
+ * Set given event flag for a given sint on a given vcpu, and signal the sint.
+ */
+int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
+{
+    int ret;
+    SynICState *synic = sint_route->synic;
+    unsigned long *flags, set_mask;
+    unsigned set_idx;
+
+    if (eventno > HV_EVENT_FLAGS_COUNT) {
+        return -EINVAL;
+    }
+    if (!synic->enabled || !synic->event_page_addr) {
+        return -ENXIO;
+    }
+
+    set_idx = BIT_WORD(eventno);
+    set_mask = BIT_MASK(eventno);
+    flags = synic->event_page->slot[sint_route->sint].flags;
+
+    if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
+        memory_region_set_dirty(&synic->event_page_mr, 0,
+                                sizeof(*synic->event_page));
+        ret = hyperv_sint_route_set_sint(sint_route);
+    } else {
+        ret = 0;
+    }
+    return ret;
+}
+
+HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
+                                   HvSintMsgCb cb, void *cb_data)
+{
+    HvSintRoute *sint_route;
+    EventNotifier *ack_notifier;
+    int r, gsi;
+    CPUState *cs;
+    SynICState *synic;
+
+    cs = hyperv_find_vcpu(vp_index);
+    if (!cs) {
+        return NULL;
+    }
+
+    synic = get_synic(cs);
+    if (!synic) {
+        return NULL;
+    }
+
+    sint_route = g_new0(HvSintRoute, 1);
+    r = event_notifier_init(&sint_route->sint_set_notifier, false);
+    if (r) {
+        goto err;
+    }
+
+
+    ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
+    if (ack_notifier) {
+        sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
+        sint_route->staged_msg->cb = cb;
+        sint_route->staged_msg->cb_data = cb_data;
+
+        r = event_notifier_init(ack_notifier, false);
+        if (r) {
+            goto err_sint_set_notifier;
+        }
+
+        event_notifier_set_handler(ack_notifier, sint_ack_handler);
+    }
+
+    gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
+    if (gsi < 0) {
+        goto err_gsi;
+    }
+
+    r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+                                           &sint_route->sint_set_notifier,
+                                           ack_notifier, gsi);
+    if (r) {
+        goto err_irqfd;
+    }
+    sint_route->gsi = gsi;
+    sint_route->synic = synic;
+    sint_route->sint = sint;
+    sint_route->refcount = 1;
+
+    return sint_route;
+
+err_irqfd:
+    kvm_irqchip_release_virq(kvm_state, gsi);
+err_gsi:
+    if (ack_notifier) {
+        event_notifier_set_handler(ack_notifier, NULL);
+        event_notifier_cleanup(ack_notifier);
+        g_free(sint_route->staged_msg);
+    }
+err_sint_set_notifier:
+    event_notifier_cleanup(&sint_route->sint_set_notifier);
+err:
+    g_free(sint_route);
+
+    return NULL;
+}
+
+void hyperv_sint_route_ref(HvSintRoute *sint_route)
+{
+    sint_route->refcount++;
+}
+
+void hyperv_sint_route_unref(HvSintRoute *sint_route)
+{
+    if (!sint_route) {
+        return;
+    }
+
+    assert(sint_route->refcount > 0);
+
+    if (--sint_route->refcount) {
+        return;
+    }
+
+    kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
+                                          &sint_route->sint_set_notifier,
+                                          sint_route->gsi);
+    kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
+    if (sint_route->staged_msg) {
+        event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
+        event_notifier_cleanup(&sint_route->sint_ack_notifier);
+        g_free(sint_route->staged_msg);
+    }
+    event_notifier_cleanup(&sint_route->sint_set_notifier);
+    g_free(sint_route);
+}
+
+int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
+{
+    return event_notifier_set(&sint_route->sint_set_notifier);
+}
+
+typedef struct MsgHandler {
+    struct rcu_head rcu;
+    QLIST_ENTRY(MsgHandler) link;
+    uint32_t conn_id;
+    HvMsgHandler handler;
+    void *data;
+} MsgHandler;
+
+typedef struct EventFlagHandler {
+    struct rcu_head rcu;
+    QLIST_ENTRY(EventFlagHandler) link;
+    uint32_t conn_id;
+    EventNotifier *notifier;
+} EventFlagHandler;
+
+static QLIST_HEAD(, MsgHandler) msg_handlers;
+static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
+static QemuMutex handlers_mutex;
+
+static void __attribute__((constructor)) hv_init(void)
+{
+    QLIST_INIT(&msg_handlers);
+    QLIST_INIT(&event_flag_handlers);
+    qemu_mutex_init(&handlers_mutex);
+}
+
+int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
+{
+    int ret;
+    MsgHandler *mh;
+
+    qemu_mutex_lock(&handlers_mutex);
+    QLIST_FOREACH(mh, &msg_handlers, link) {
+        if (mh->conn_id == conn_id) {
+            if (handler) {
+                ret = -EEXIST;
+            } else {
+                QLIST_REMOVE_RCU(mh, link);
+                g_free_rcu(mh, rcu);
+                ret = 0;
+            }
+            goto unlock;
+        }
+    }
+
+    if (handler) {
+        mh = g_new(MsgHandler, 1);
+        mh->conn_id = conn_id;
+        mh->handler = handler;
+        mh->data = data;
+        QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
+        ret = 0;
+    } else {
+        ret = -ENOENT;
+    }
+unlock:
+    qemu_mutex_unlock(&handlers_mutex);
+    return ret;
+}
+
+uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
+{
+    uint16_t ret;
+    hwaddr len;
+    struct hyperv_post_message_input *msg;
+    MsgHandler *mh;
+
+    if (fast) {
+        return HV_STATUS_INVALID_HYPERCALL_CODE;
+    }
+    if (param & (__alignof__(*msg) - 1)) {
+        return HV_STATUS_INVALID_ALIGNMENT;
+    }
+
+    len = sizeof(*msg);
+    msg = cpu_physical_memory_map(param, &len, 0);
+    if (len < sizeof(*msg)) {
+        ret = HV_STATUS_INSUFFICIENT_MEMORY;
+        goto unmap;
+    }
+    if (msg->payload_size > sizeof(msg->payload)) {
+        ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+        goto unmap;
+    }
+
+    ret = HV_STATUS_INVALID_CONNECTION_ID;
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
+        if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
+            ret = mh->handler(msg, mh->data);
+            break;
+        }
+    }
+    rcu_read_unlock();
+
+unmap:
+    cpu_physical_memory_unmap(msg, len, 0, 0);
+    return ret;
+}
+
+static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
+{
+    int ret;
+    EventFlagHandler *handler;
+
+    qemu_mutex_lock(&handlers_mutex);
+    QLIST_FOREACH(handler, &event_flag_handlers, link) {
+        if (handler->conn_id == conn_id) {
+            if (notifier) {
+                ret = -EEXIST;
+            } else {
+                QLIST_REMOVE_RCU(handler, link);
+                g_free_rcu(handler, rcu);
+                ret = 0;
+            }
+            goto unlock;
+        }
+    }
+
+    if (notifier) {
+        handler = g_new(EventFlagHandler, 1);
+        handler->conn_id = conn_id;
+        handler->notifier = notifier;
+        QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
+        ret = 0;
+    } else {
+        ret = -ENOENT;
+    }
+unlock:
+    qemu_mutex_unlock(&handlers_mutex);
+    return ret;
+}
+
+static bool process_event_flags_userspace;
+
+int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
+{
+    if (!process_event_flags_userspace &&
+        !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
+        process_event_flags_userspace = true;
+
+        warn_report("Hyper-V event signaling is not supported by this kernel; "
+                    "using slower userspace hypercall processing");
+    }
+
+    if (!process_event_flags_userspace) {
+        struct kvm_hyperv_eventfd hvevfd = {
+            .conn_id = conn_id,
+            .fd = notifier ? event_notifier_get_fd(notifier) : -1,
+            .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
+        };
+
+        return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
+    }
+    return set_event_flag_handler(conn_id, notifier);
+}
+
+uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
+{
+    uint16_t ret;
+    EventFlagHandler *handler;
+
+    if (unlikely(!fast)) {
+        hwaddr addr = param;
+
+        if (addr & (__alignof__(addr) - 1)) {
+            return HV_STATUS_INVALID_ALIGNMENT;
+        }
+
+        param = ldq_phys(&address_space_memory, addr);
+    }
+
+    /*
+     * Per spec, bits 32-47 contain the extra "flag number".  However, we
+     * have no use for it, and in all known usecases it is zero, so just
+     * report lookup failure if it isn't.
+     */
+    if (param & 0xffff00000000ULL) {
+        return HV_STATUS_INVALID_PORT_ID;
+    }
+    /* remaining bits are reserved-zero */
+    if (param & ~HV_CONNECTION_ID_MASK) {
+        return HV_STATUS_INVALID_HYPERCALL_INPUT;
+    }
+
+    ret = HV_STATUS_INVALID_CONNECTION_ID;
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
+        if (handler->conn_id == param) {
+            event_notifier_set(handler->notifier);
+            ret = 0;
+            break;
+        }
+    }
+    rcu_read_unlock();
+    return ret;
+}