summary refs log tree commit diff stats
path: root/accel/kvm/kvm-all.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-06-12 23:06:22 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-06-12 23:06:22 +0100
commit7d3660e79830a069f1848bb4fa1cdf8f666424fb (patch)
tree5651ddf02414086c31d0bfca7713e1800d4f0fc8 /accel/kvm/kvm-all.c
parent9e3903136d9acde2fb2dd9e967ba928050a6cb4a (diff)
parent3575b0aea983ad57804c9af739ed8ff7bc168393 (diff)
downloadfocaccia-qemu-7d3660e79830a069f1848bb4fa1cdf8f666424fb.tar.gz
focaccia-qemu-7d3660e79830a069f1848bb4fa1cdf8f666424fb.zip
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* Miscellaneous fixes and feature enablement (many)
* SEV refactoring (David)
* Hyper-V initial support (Jon)
* i386 TCG fixes (x87 and SSE, Joseph)
* vmport cleanup and improvements (Philippe, Liran)
* Use-after-free with vCPU hot-unplug (Nengyuan)
* run-coverity-scan improvements (myself)
* Record/replay fixes (Pavel)
* -machine kernel_irqchip=split improvements for INTx (Peter)
* Code cleanups (Philippe)
* Crash and security fixes (PJP)
* HVF cleanups (Roman)

# gpg: Signature made Fri 12 Jun 2020 16:57:04 BST
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (116 commits)
  target/i386: Remove obsolete TODO file
  stubs: move Xen stubs to accel/
  replay: fix replay shutdown for console mode
  exec/cpu-common: Move MUSB specific typedefs to 'hw/usb/hcd-musb.h'
  hw/usb: Move device-specific declarations to new 'hcd-musb.h' header
  exec/memory: Remove unused MemoryRegionMmio type
  checkpatch: reversed logic with acpi test checks
  target/i386: sev: Unify SEVState and SevGuestState
  target/i386: sev: Remove redundant handle field
  target/i386: sev: Remove redundant policy field
  target/i386: sev: Remove redundant cbitpos and reduced_phys_bits fields
  target/i386: sev: Partial cleanup to sev_state global
  target/i386: sev: Embed SEVState in SevGuestState
  target/i386: sev: Rename QSevGuestInfo
  target/i386: sev: Move local structure definitions into .c file
  target/i386: sev: Remove unused QSevGuestInfoClass
  xen: fix build without pci passthrough
  i386: hvf: Drop HVFX86EmulatorState
  i386: hvf: Move mmio_buf into CPUX86State
  i386: hvf: Move lazy_flags into CPUX86State
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

# Conflicts:
#	hw/i386/acpi-build.c
Diffstat (limited to 'accel/kvm/kvm-all.c')
-rw-r--r--accel/kvm/kvm-all.c95
1 files changed, 87 insertions, 8 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index d06cc04079..f24d7da783 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -160,9 +160,59 @@ static const KVMCapabilityInfo kvm_required_capabilites[] = {
 static NotifierList kvm_irqchip_change_notifiers =
     NOTIFIER_LIST_INITIALIZER(kvm_irqchip_change_notifiers);
 
+struct KVMResampleFd {
+    int gsi;
+    EventNotifier *resample_event;
+    QLIST_ENTRY(KVMResampleFd) node;
+};
+typedef struct KVMResampleFd KVMResampleFd;
+
+/*
+ * Only used with split irqchip where we need to do the resample fd
+ * kick for the kernel from userspace.
+ */
+static QLIST_HEAD(, KVMResampleFd) kvm_resample_fd_list =
+    QLIST_HEAD_INITIALIZER(kvm_resample_fd_list);
+
 #define kvm_slots_lock(kml)      qemu_mutex_lock(&(kml)->slots_lock)
 #define kvm_slots_unlock(kml)    qemu_mutex_unlock(&(kml)->slots_lock)
 
+static inline void kvm_resample_fd_remove(int gsi)
+{
+    KVMResampleFd *rfd;
+
+    QLIST_FOREACH(rfd, &kvm_resample_fd_list, node) {
+        if (rfd->gsi == gsi) {
+            QLIST_REMOVE(rfd, node);
+            g_free(rfd);
+            break;
+        }
+    }
+}
+
+static inline void kvm_resample_fd_insert(int gsi, EventNotifier *event)
+{
+    KVMResampleFd *rfd = g_new0(KVMResampleFd, 1);
+
+    rfd->gsi = gsi;
+    rfd->resample_event = event;
+
+    QLIST_INSERT_HEAD(&kvm_resample_fd_list, rfd, node);
+}
+
+void kvm_resample_fd_notify(int gsi)
+{
+    KVMResampleFd *rfd;
+
+    QLIST_FOREACH(rfd, &kvm_resample_fd_list, node) {
+        if (rfd->gsi == gsi) {
+            event_notifier_set(rfd->resample_event);
+            trace_kvm_resample_fd_notify(gsi);
+            return;
+        }
+    }
+}
+
 int kvm_get_max_memslots(void)
 {
     KVMState *s = KVM_STATE(current_accel());
@@ -1662,9 +1712,13 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
     return kvm_update_routing_entry(s, &kroute);
 }
 
-static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int rfd, int virq,
+static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event,
+                                    EventNotifier *resample, int virq,
                                     bool assign)
 {
+    int fd = event_notifier_get_fd(event);
+    int rfd = resample ? event_notifier_get_fd(resample) : -1;
+
     struct kvm_irqfd irqfd = {
         .fd = fd,
         .gsi = virq,
@@ -1672,8 +1726,33 @@ static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int rfd, int virq,
     };
 
     if (rfd != -1) {
-        irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
-        irqfd.resamplefd = rfd;
+        assert(assign);
+        if (kvm_irqchip_is_split()) {
+            /*
+             * When the slow irqchip (e.g. IOAPIC) is in the
+             * userspace, KVM kernel resamplefd will not work because
+             * the EOI of the interrupt will be delivered to userspace
+             * instead, so the KVM kernel resamplefd kick will be
+             * skipped.  The userspace here mimics what the kernel
+             * provides with resamplefd, remember the resamplefd and
+             * kick it when we receive EOI of this IRQ.
+             *
+             * This is hackery because IOAPIC is mostly bypassed
+             * (except EOI broadcasts) when irqfd is used.  However
+             * this can bring much performance back for split irqchip
+             * with INTx IRQs (for VFIO, this gives 93% perf of the
+             * full fast path, which is 46% perf boost comparing to
+             * the INTx slow path).
+             */
+            kvm_resample_fd_insert(virq, resample);
+        } else {
+            irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
+            irqfd.resamplefd = rfd;
+        }
+    } else if (!assign) {
+        if (kvm_irqchip_is_split()) {
+            kvm_resample_fd_remove(virq);
+        }
     }
 
     if (!kvm_irqfds_enabled()) {
@@ -1769,7 +1848,9 @@ int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
     return -ENOSYS;
 }
 
-static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
+static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event,
+                                    EventNotifier *resample, int virq,
+                                    bool assign)
 {
     abort();
 }
@@ -1783,15 +1864,13 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
 int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
                                        EventNotifier *rn, int virq)
 {
-    return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
-           rn ? event_notifier_get_fd(rn) : -1, virq, true);
+    return kvm_irqchip_assign_irqfd(s, n, rn, virq, true);
 }
 
 int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
                                           int virq)
 {
-    return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
-           false);
+    return kvm_irqchip_assign_irqfd(s, n, NULL, virq, false);
 }
 
 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,