summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/apic.c3
-rw-r--r--hw/kvm/apic.c34
-rw-r--r--hw/msi.h5
-rw-r--r--hw/msix.c121
-rw-r--r--hw/msix.h6
-rw-r--r--hw/pc.c9
-rw-r--r--hw/pc_piix.c14
-rw-r--r--hw/pci.h8
-rw-r--r--hw/virtio-pci.c126
-rw-r--r--hw/virtio-pci.h6
-rw-r--r--hw/xen.h10
-rw-r--r--hw/xen_apic.c5
12 files changed, 311 insertions, 36 deletions
diff --git a/hw/apic.c b/hw/apic.c
index 4eeaf8801c..5fbf01c278 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,7 @@
 #include "apic_internal.h"
 #include "apic.h"
 #include "ioapic.h"
+#include "msi.h"
 #include "host-utils.h"
 #include "trace.h"
 #include "pc.h"
@@ -862,6 +863,8 @@ static void apic_init(APICCommonState *s)
 
     s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s);
     local_apics[s->idx] = s;
+
+    msi_supported = true;
 }
 
 static void apic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
index ffe7a521b7..8ba4079025 100644
--- a/hw/kvm/apic.c
+++ b/hw/kvm/apic.c
@@ -10,6 +10,7 @@
  * See the COPYING file in the top-level directory.
  */
 #include "hw/apic_internal.h"
+#include "hw/msi.h"
 #include "kvm.h"
 
 static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
@@ -145,10 +146,39 @@ static void kvm_apic_external_nmi(APICCommonState *s)
     run_on_cpu(s->cpu_env, do_inject_external_nmi, s);
 }
 
+static uint64_t kvm_apic_mem_read(void *opaque, target_phys_addr_t addr,
+                                  unsigned size)
+{
+    return ~(uint64_t)0;
+}
+
+static void kvm_apic_mem_write(void *opaque, target_phys_addr_t addr,
+                               uint64_t data, unsigned size)
+{
+    MSIMessage msg = { .address = addr, .data = data };
+    int ret;
+
+    ret = kvm_irqchip_send_msi(kvm_state, msg);
+    if (ret < 0) {
+        fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
+                strerror(-ret));
+    }
+}
+
+static const MemoryRegionOps kvm_apic_io_ops = {
+    .read = kvm_apic_mem_read,
+    .write = kvm_apic_mem_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
 static void kvm_apic_init(APICCommonState *s)
 {
-    memory_region_init_reservation(&s->io_memory, "kvm-apic-msi",
-                                   MSI_SPACE_SIZE);
+    memory_region_init_io(&s->io_memory, &kvm_apic_io_ops, s, "kvm-apic-msi",
+                          MSI_SPACE_SIZE);
+
+    if (kvm_has_gsi_routing()) {
+        msi_supported = true;
+    }
 }
 
 static void kvm_apic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/msi.h b/hw/msi.h
index 3040bb0b43..75747abc25 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -24,6 +24,11 @@
 #include "qemu-common.h"
 #include "pci.h"
 
+struct MSIMessage {
+    uint64_t address;
+    uint32_t data;
+};
+
 extern bool msi_supported;
 
 bool msi_enabled(const PCIDevice *dev);
diff --git a/hw/msix.c b/hw/msix.c
index 3835eaaf28..59c7a8388f 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -35,6 +35,15 @@
 #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
 #define MSIX_MAX_ENTRIES 32
 
+static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
+    MSIMessage msg;
+
+    msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
+    msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
+    return msg;
+}
 
 /* Add MSI-X capability to the config space for the device. */
 /* Given a bar and its size, add MSI-X table on top of it
@@ -130,13 +139,34 @@ static bool msix_is_masked(PCIDevice *dev, int vector)
     return msix_vector_masked(dev, vector, dev->msix_function_masked);
 }
 
+static void msix_fire_vector_notifier(PCIDevice *dev,
+                                      unsigned int vector, bool is_masked)
+{
+    MSIMessage msg;
+    int ret;
+
+    if (!dev->msix_vector_use_notifier) {
+        return;
+    }
+    if (is_masked) {
+        dev->msix_vector_release_notifier(dev, vector);
+    } else {
+        msg = msix_get_message(dev, vector);
+        ret = dev->msix_vector_use_notifier(dev, vector, msg);
+        assert(ret >= 0);
+    }
+}
+
 static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
 {
     bool is_masked = msix_is_masked(dev, vector);
+
     if (is_masked == was_masked) {
         return;
     }
 
+    msix_fire_vector_notifier(dev, vector, is_masked);
+
     if (!is_masked && msix_is_pending(dev, vector)) {
         msix_clr_pending(dev, vector);
         msix_notify(dev, vector);
@@ -222,10 +252,14 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
 {
     int vector;
+
     for (vector = 0; vector < nentries; ++vector) {
         unsigned offset =
             vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
+        bool was_masked = msix_is_masked(dev, vector);
+
         dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+        msix_handle_mask_update(dev, vector, was_masked);
     }
 }
 
@@ -317,6 +351,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
 void msix_load(PCIDevice *dev, QEMUFile *f)
 {
     unsigned n = dev->msix_entries_nr;
+    unsigned int vector;
 
     if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
         return;
@@ -326,6 +361,10 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
     qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
     qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
     msix_update_function_masked(dev);
+
+    for (vector = 0; vector < n; vector++) {
+        msix_handle_mask_update(dev, vector, true);
+    }
 }
 
 /* Does device support MSI-X? */
@@ -352,9 +391,7 @@ uint32_t msix_bar_size(PCIDevice *dev)
 /* Send an MSI-X message */
 void msix_notify(PCIDevice *dev, unsigned vector)
 {
-    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
-    uint64_t address;
-    uint32_t data;
+    MSIMessage msg;
 
     if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
         return;
@@ -363,9 +400,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
         return;
     }
 
-    address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
-    data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
-    stl_le_phys(address, data);
+    msg = msix_get_message(dev, vector);
+
+    stl_le_phys(msg.address, msg.data);
 }
 
 void msix_reset(PCIDevice *dev)
@@ -414,3 +451,75 @@ void msix_unuse_all_vectors(PCIDevice *dev)
         return;
     msix_free_irq_entries(dev);
 }
+
+unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
+{
+    return dev->msix_entries_nr;
+}
+
+static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
+{
+    MSIMessage msg;
+
+    if (msix_is_masked(dev, vector)) {
+        return 0;
+    }
+    msg = msix_get_message(dev, vector);
+    return dev->msix_vector_use_notifier(dev, vector, msg);
+}
+
+static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
+{
+    if (msix_is_masked(dev, vector)) {
+        return;
+    }
+    dev->msix_vector_release_notifier(dev, vector);
+}
+
+int msix_set_vector_notifiers(PCIDevice *dev,
+                              MSIVectorUseNotifier use_notifier,
+                              MSIVectorReleaseNotifier release_notifier)
+{
+    int vector, ret;
+
+    assert(use_notifier && release_notifier);
+
+    dev->msix_vector_use_notifier = use_notifier;
+    dev->msix_vector_release_notifier = release_notifier;
+
+    if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
+        (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
+        for (vector = 0; vector < dev->msix_entries_nr; vector++) {
+            ret = msix_set_notifier_for_vector(dev, vector);
+            if (ret < 0) {
+                goto undo;
+            }
+        }
+    }
+    return 0;
+
+undo:
+    while (--vector >= 0) {
+        msix_unset_notifier_for_vector(dev, vector);
+    }
+    dev->msix_vector_use_notifier = NULL;
+    dev->msix_vector_release_notifier = NULL;
+    return ret;
+}
+
+void msix_unset_vector_notifiers(PCIDevice *dev)
+{
+    int vector;
+
+    assert(dev->msix_vector_use_notifier &&
+           dev->msix_vector_release_notifier);
+
+    if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
+        (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
+        for (vector = 0; vector < dev->msix_entries_nr; vector++) {
+            msix_unset_notifier_for_vector(dev, vector);
+        }
+    }
+    dev->msix_vector_use_notifier = NULL;
+    dev->msix_vector_release_notifier = NULL;
+}
diff --git a/hw/msix.h b/hw/msix.h
index 5aba22b858..50aee8221a 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -13,6 +13,8 @@ void msix_write_config(PCIDevice *pci_dev, uint32_t address,
 
 int msix_uninit(PCIDevice *d, MemoryRegion *bar);
 
+unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
+
 void msix_save(PCIDevice *dev, QEMUFile *f);
 void msix_load(PCIDevice *dev, QEMUFile *f);
 
@@ -29,4 +31,8 @@ void msix_notify(PCIDevice *dev, unsigned vector);
 
 void msix_reset(PCIDevice *dev);
 
+int msix_set_vector_notifiers(PCIDevice *dev,
+                              MSIVectorUseNotifier use_notifier,
+                              MSIVectorReleaseNotifier release_notifier);
+void msix_unset_vector_notifiers(PCIDevice *dev);
 #endif
diff --git a/hw/pc.c b/hw/pc.c
index e81a06c161..c790bcbfd7 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -912,15 +912,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
         apic_mapped = 1;
     }
 
-    /* KVM does not support MSI yet. */
-    if (!kvm_irqchip_in_kernel()) {
-        msi_supported = true;
-    }
-
-    if (xen_msi_support()) {
-        msi_supported = true;
-    }
-
     return dev;
 }
 
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index a7aad4b022..f49b0aaf89 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -56,31 +56,27 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled)
 {
 #ifdef CONFIG_KVM
     KVMState *s = kvm_state;
-    int ret, i;
+    int i;
 
     if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
         for (i = 0; i < 8; ++i) {
             if (i == 2) {
                 continue;
             }
-            kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_MASTER, i);
+            kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i);
         }
         for (i = 8; i < 16; ++i) {
-            kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
+            kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
         }
         if (pci_enabled) {
             for (i = 0; i < 24; ++i) {
                 if (i == 0) {
-                    kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, 2);
+                    kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2);
                 } else if (i != 2) {
-                    kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, i);
+                    kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i);
                 }
             }
         }
-        ret = kvm_irqchip_commit_routes(s);
-        if (ret < 0) {
-            hw_error("KVM IRQ routing setup failed");
-        }
     }
 #endif /* CONFIG_KVM */
 }
diff --git a/hw/pci.h b/hw/pci.h
index 8d0aa498e5..c3cacce046 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -173,6 +173,10 @@ typedef struct PCIDeviceClass {
     const char *romfile;
 } PCIDeviceClass;
 
+typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector,
+                                      MSIMessage msg);
+typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector);
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
@@ -243,6 +247,10 @@ struct PCIDevice {
     bool has_rom;
     MemoryRegion rom;
     uint32_t rom_bar;
+
+    /* MSI-X notifiers */
+    MSIVectorUseNotifier msix_vector_use_notifier;
+    MSIVectorReleaseNotifier msix_vector_release_notifier;
 };
 
 void pci_register_bar(PCIDevice *pci_dev, int region_num,
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 79b86f1aad..d08c1590d2 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -24,6 +24,7 @@
 #include "virtio-scsi.h"
 #include "pci.h"
 #include "qemu-error.h"
+#include "msi.h"
 #include "msix.h"
 #include "net.h"
 #include "loader.h"
@@ -539,6 +540,107 @@ static void virtio_pci_guest_notifier_read(void *opaque)
     }
 }
 
+static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
+                                        unsigned int queue_no,
+                                        unsigned int vector,
+                                        MSIMessage msg)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    int fd, ret;
+
+    fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
+
+    if (irqfd->users == 0) {
+        ret = kvm_irqchip_add_msi_route(kvm_state, msg);
+        if (ret < 0) {
+            return ret;
+        }
+        irqfd->virq = ret;
+    }
+    irqfd->users++;
+
+    ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq);
+    if (ret < 0) {
+        if (--irqfd->users == 0) {
+            kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+        }
+        return ret;
+    }
+
+    qemu_set_fd_handler(fd, NULL, NULL, NULL);
+
+    return 0;
+}
+
+static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
+                                             unsigned int queue_no,
+                                             unsigned int vector)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    int fd, ret;
+
+    fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
+
+    ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq);
+    assert(ret == 0);
+
+    if (--irqfd->users == 0) {
+        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+    }
+
+    qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq);
+}
+
+static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
+                                     MSIMessage msg)
+{
+    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+    VirtIODevice *vdev = proxy->vdev;
+    int ret, queue_no;
+
+    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        if (virtio_queue_vector(vdev, queue_no) != vector) {
+            continue;
+        }
+        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+        if (ret < 0) {
+            goto undo;
+        }
+    }
+    return 0;
+
+undo:
+    while (--queue_no >= 0) {
+        if (virtio_queue_vector(vdev, queue_no) != vector) {
+            continue;
+        }
+        kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+    }
+    return ret;
+}
+
+static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
+{
+    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+    VirtIODevice *vdev = proxy->vdev;
+    int queue_no;
+
+    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        if (virtio_queue_vector(vdev, queue_no) != vector) {
+            continue;
+        }
+        kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+    }
+}
+
 static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -555,6 +657,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
     } else {
         qemu_set_fd_handler(event_notifier_get_fd(notifier),
                             NULL, NULL, NULL);
+        /* Test and clear notifier before closing it,
+         * in case poll callback didn't have time to run. */
+        virtio_pci_guest_notifier_read(vq);
         event_notifier_cleanup(notifier);
     }
 
@@ -573,6 +678,13 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
     VirtIODevice *vdev = proxy->vdev;
     int r, n;
 
+    /* Must unset vector notifier while guest notifier is still assigned */
+    if (kvm_irqchip_in_kernel() && !assign) {
+        msix_unset_vector_notifiers(&proxy->pci_dev);
+        g_free(proxy->vector_irqfd);
+        proxy->vector_irqfd = NULL;
+    }
+
     for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
         if (!virtio_queue_get_num(vdev, n)) {
             break;
@@ -584,10 +696,24 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
         }
     }
 
+    /* Must set vector notifier after guest notifier has been assigned */
+    if (kvm_irqchip_in_kernel() && assign) {
+        proxy->vector_irqfd =
+            g_malloc0(sizeof(*proxy->vector_irqfd) *
+                      msix_nr_vectors_allocated(&proxy->pci_dev));
+        r = msix_set_vector_notifiers(&proxy->pci_dev,
+                                      kvm_virtio_pci_vector_use,
+                                      kvm_virtio_pci_vector_release);
+        if (r < 0) {
+            goto assign_error;
+        }
+    }
+
     return 0;
 
 assign_error:
     /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
+    assert(assign);
     while (--n >= 0) {
         virtio_pci_set_guest_notifier(opaque, n, !assign);
     }
diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h
index 889e59e421..91b791ba9d 100644
--- a/hw/virtio-pci.h
+++ b/hw/virtio-pci.h
@@ -26,6 +26,11 @@
 #define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
 
 typedef struct {
+    int virq;
+    unsigned int users;
+} VirtIOIRQFD;
+
+typedef struct {
     PCIDevice pci_dev;
     VirtIODevice *vdev;
     MemoryRegion bar;
@@ -44,6 +49,7 @@ typedef struct {
     VirtIOSCSIConf scsi;
     bool ioeventfd_disabled;
     bool ioeventfd_started;
+    VirtIOIRQFD *vector_irqfd;
 } VirtIOPCIProxy;
 
 void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);
diff --git a/hw/xen.h b/hw/xen.h
index 3ae4cd0f5c..e5926b7b8a 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -57,14 +57,4 @@ void xen_register_framebuffer(struct MemoryRegion *mr);
 #  define HVM_MAX_VCPUS 32
 #endif
 
-static inline int xen_msi_support(void)
-{
-#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \
-    && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420
-    return xen_enabled();
-#else
-    return 0;
-#endif
-}
-
 #endif /* QEMU_HW_XEN_H */
diff --git a/hw/xen_apic.c b/hw/xen_apic.c
index 1725ff67dd..a9e101f315 100644
--- a/hw/xen_apic.c
+++ b/hw/xen_apic.c
@@ -40,6 +40,11 @@ static void xen_apic_init(APICCommonState *s)
 {
     memory_region_init_io(&s->io_memory, &xen_apic_io_ops, s, "xen-apic-msi",
                           MSI_SPACE_SIZE);
+
+#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \
+    && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420
+    msi_supported = true;
+#endif
 }
 
 static void xen_apic_set_base(APICCommonState *s, uint64_t val)