summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2012-07-18 14:44:43 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2012-07-18 14:44:43 -0500
commit09f06a6c603ce64284287d32f6ffadaaa5064850 (patch)
tree176bbc325a87eb4961ec8605e75d326746e7c9bb
parent513e6bde4fa551c293e7d346e27b618b57a4087a (diff)
parent15b2bd1847239fe0b4a1041b69a631741d2e273a (diff)
downloadfocaccia-qemu-09f06a6c603ce64284287d32f6ffadaaa5064850.tar.gz
focaccia-qemu-09f06a6c603ce64284287d32f6ffadaaa5064850.zip
Merge remote-tracking branch 'qemu-kvm/uq/master' into staging
* qemu-kvm/uq/master:
  virtio: move common irqfd handling out of virtio-pci
  virtio: move common ioeventfd handling out of virtio-pci
  event_notifier: add event_notifier_set_handler
  memory: pass EventNotifier, not eventfd
  ivshmem: wrap ivshmem_del_eventfd loops with transaction
  ivshmem: use EventNotifier and memory API
  event_notifier: add event_notifier_init_fd
  event_notifier: remove event_notifier_test
  event_notifier: add event_notifier_set
  apic: Defer interrupt updates to VCPU thread
  apic: Reevaluate pending interrupts on LVT_LINT0 changes
  apic: Resolve potential endless loop around apic_update_irq
  kvm: expose tsc deadline timer feature to guest
  kvm_pv_eoi: add flag support
  kvm: Don't abort on kvm_irqchip_add_msi_route()
-rw-r--r--cpu-exec.c6
-rw-r--r--event_notifier.c30
-rw-r--r--event_notifier.h8
-rw-r--r--exec.c8
-rw-r--r--hw/apic.c27
-rw-r--r--hw/apic.h1
-rw-r--r--hw/apic_internal.h1
-rw-r--r--hw/ivshmem.c67
-rw-r--r--hw/vhost.c4
-rw-r--r--hw/virtio-pci.c77
-rw-r--r--hw/virtio.c46
-rw-r--r--hw/virtio.h3
-rw-r--r--hw/xen_pt.c2
-rw-r--r--kvm-all.c31
-rw-r--r--kvm-stub.c10
-rw-r--r--kvm.h2
-rw-r--r--memory.c18
-rw-r--r--memory.h9
-rw-r--r--target-i386/cpu.c2
-rw-r--r--target-i386/cpu.h5
-rw-r--r--target-i386/kvm.c9
-rw-r--r--xen-all.c6
22 files changed, 230 insertions, 142 deletions
diff --git a/cpu-exec.c b/cpu-exec.c
index 08c35f72d4..fc185a4f04 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -288,6 +288,12 @@ int cpu_exec(CPUArchState *env)
                     }
 #endif
 #if defined(TARGET_I386)
+#if !defined(CONFIG_USER_ONLY)
+                    if (interrupt_request & CPU_INTERRUPT_POLL) {
+                        env->interrupt_request &= ~CPU_INTERRUPT_POLL;
+                        apic_poll_irq(env->apic_state);
+                    }
+#endif
                     if (interrupt_request & CPU_INTERRUPT_INIT) {
                             cpu_svm_check_intercept_param(env, SVM_EXIT_INIT,
                                                           0);
diff --git a/event_notifier.c b/event_notifier.c
index 0b829813d3..2c207e1399 100644
--- a/event_notifier.c
+++ b/event_notifier.c
@@ -10,11 +10,19 @@
  * See the COPYING file in the top-level directory.
  */
 
+#include "qemu-common.h"
 #include "event_notifier.h"
+#include "qemu-char.h"
+
 #ifdef CONFIG_EVENTFD
 #include <sys/eventfd.h>
 #endif
 
+void event_notifier_init_fd(EventNotifier *e, int fd)
+{
+    e->fd = fd;
+}
+
 int event_notifier_init(EventNotifier *e, int active)
 {
 #ifdef CONFIG_EVENTFD
@@ -38,24 +46,22 @@ int event_notifier_get_fd(EventNotifier *e)
     return e->fd;
 }
 
-int event_notifier_test_and_clear(EventNotifier *e)
+int event_notifier_set_handler(EventNotifier *e,
+                               EventNotifierHandler *handler)
 {
-    uint64_t value;
-    int r = read(e->fd, &value, sizeof(value));
+    return qemu_set_fd_handler(e->fd, (IOHandler *)handler, NULL, e);
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    uint64_t value = 1;
+    int r = write(e->fd, &value, sizeof(value));
     return r == sizeof(value);
 }
 
-int event_notifier_test(EventNotifier *e)
+int event_notifier_test_and_clear(EventNotifier *e)
 {
     uint64_t value;
     int r = read(e->fd, &value, sizeof(value));
-    if (r == sizeof(value)) {
-        /* restore previous value. */
-        int s = write(e->fd, &value, sizeof(value));
-        /* never blocks because we use EFD_SEMAPHORE.
-         * If we didn't we'd get EAGAIN on overflow
-         * and we'd have to write code to ignore it. */
-        assert(s == sizeof(value));
-    }
     return r == sizeof(value);
 }
diff --git a/event_notifier.h b/event_notifier.h
index 886222cb36..f0ec2f2171 100644
--- a/event_notifier.h
+++ b/event_notifier.h
@@ -16,13 +16,17 @@
 #include "qemu-common.h"
 
 struct EventNotifier {
-	int fd;
+    int fd;
 };
 
+typedef void EventNotifierHandler(EventNotifier *);
+
+void event_notifier_init_fd(EventNotifier *, int fd);
 int event_notifier_init(EventNotifier *, int active);
 void event_notifier_cleanup(EventNotifier *);
 int event_notifier_get_fd(EventNotifier *);
+int event_notifier_set(EventNotifier *);
 int event_notifier_test_and_clear(EventNotifier *);
-int event_notifier_test(EventNotifier *);
+int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *);
 
 #endif
diff --git a/exec.c b/exec.c
index c9fa17de6f..feb4795525 100644
--- a/exec.c
+++ b/exec.c
@@ -3210,13 +3210,13 @@ static void core_log_global_stop(MemoryListener *listener)
 
 static void core_eventfd_add(MemoryListener *listener,
                              MemoryRegionSection *section,
-                             bool match_data, uint64_t data, int fd)
+                             bool match_data, uint64_t data, EventNotifier *e)
 {
 }
 
 static void core_eventfd_del(MemoryListener *listener,
                              MemoryRegionSection *section,
-                             bool match_data, uint64_t data, int fd)
+                             bool match_data, uint64_t data, EventNotifier *e)
 {
 }
 
@@ -3276,13 +3276,13 @@ static void io_log_global_stop(MemoryListener *listener)
 
 static void io_eventfd_add(MemoryListener *listener,
                            MemoryRegionSection *section,
-                           bool match_data, uint64_t data, int fd)
+                           bool match_data, uint64_t data, EventNotifier *e)
 {
 }
 
 static void io_eventfd_del(MemoryListener *listener,
                            MemoryRegionSection *section,
-                           bool match_data, uint64_t data, int fd)
+                           bool match_data, uint64_t data, EventNotifier *e)
 {
 }
 
diff --git a/hw/apic.c b/hw/apic.c
index 60552df619..385555eb43 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -16,6 +16,7 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>
  */
+#include "qemu-thread.h"
 #include "apic_internal.h"
 #include "apic.h"
 #include "ioapic.h"
@@ -361,11 +362,10 @@ static void apic_update_irq(APICCommonState *s)
     if (!(s->spurious_vec & APIC_SV_ENABLE)) {
         return;
     }
-    if (apic_irq_pending(s) > 0) {
+    if (!qemu_cpu_is_self(s->cpu_env)) {
+        cpu_interrupt(s->cpu_env, CPU_INTERRUPT_POLL);
+    } else if (apic_irq_pending(s) > 0) {
         cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD);
-    } else if (apic_accept_pic_intr(&s->busdev.qdev) &&
-               pic_get_output(isa_pic)) {
-        apic_deliver_pic_intr(&s->busdev.qdev, 1);
     }
 }
 
@@ -535,6 +535,15 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode,
     apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+static bool apic_check_pic(APICCommonState *s)
+{
+    if (!apic_accept_pic_intr(&s->busdev.qdev) || !pic_get_output(isa_pic)) {
+        return false;
+    }
+    apic_deliver_pic_intr(&s->busdev.qdev, 1);
+    return true;
+}
+
 int apic_get_interrupt(DeviceState *d)
 {
     APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
@@ -560,7 +569,12 @@ int apic_get_interrupt(DeviceState *d)
     reset_bit(s->irr, intno);
     set_bit(s->isr, intno);
     apic_sync_vapic(s, SYNC_TO_VAPIC);
+
+    /* re-inject if there is still a pending PIC interrupt */
+    apic_check_pic(s);
+
     apic_update_irq(s);
+
     return intno;
 }
 
@@ -800,8 +814,11 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
         {
             int n = index - 0x32;
             s->lvt[n] = val;
-            if (n == APIC_LVT_TIMER)
+            if (n == APIC_LVT_TIMER) {
                 apic_timer_update(s, qemu_get_clock_ns(vm_clock));
+            } else if (n == APIC_LVT_LINT0 && apic_check_pic(s)) {
+                apic_update_irq(s);
+            }
         }
         break;
     case 0x38:
diff --git a/hw/apic.h b/hw/apic.h
index 62179cebee..a89542b231 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -20,6 +20,7 @@ void apic_init_reset(DeviceState *s);
 void apic_sipi(DeviceState *s);
 void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
                                    TPRAccess access);
+void apic_poll_irq(DeviceState *d);
 
 /* pc.c */
 int cpu_is_bsp(CPUX86State *env);
diff --git a/hw/apic_internal.h b/hw/apic_internal.h
index 60a6a8bdae..4d8ff490ce 100644
--- a/hw/apic_internal.h
+++ b/hw/apic_internal.h
@@ -141,7 +141,6 @@ void apic_report_irq_delivered(int delivered);
 bool apic_next_timer(APICCommonState *s, int64_t current_time);
 void apic_enable_tpr_access_reporting(DeviceState *d, bool enable);
 void apic_enable_vapic(DeviceState *d, target_phys_addr_t paddr);
-void apic_poll_irq(DeviceState *d);
 
 void vapic_report_tpr_access(DeviceState *dev, void *cpu, target_ulong ip,
                              TPRAccess access);
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 05559b639c..bba21c55e2 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -23,6 +23,7 @@
 #include "kvm.h"
 #include "migration.h"
 #include "qerror.h"
+#include "event_notifier.h"
 
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -45,7 +46,7 @@
 
 typedef struct Peer {
     int nb_eventfds;
-    int *eventfds;
+    EventNotifier *eventfds;
 } Peer;
 
 typedef struct EventfdEntry {
@@ -63,7 +64,6 @@ typedef struct IVShmemState {
     CharDriverState *server_chr;
     MemoryRegion ivshmem_mmio;
 
-    pcibus_t mmio_addr;
     /* We might need to register the BAR before we actually have the memory.
      * So prepare a container MemoryRegion for the BAR immediately and
      * add a subregion when we have the memory.
@@ -168,7 +168,6 @@ static void ivshmem_io_write(void *opaque, target_phys_addr_t addr,
 {
     IVShmemState *s = opaque;
 
-    uint64_t write_one = 1;
     uint16_t dest = val >> 16;
     uint16_t vector = val & 0xff;
 
@@ -194,12 +193,8 @@ static void ivshmem_io_write(void *opaque, target_phys_addr_t addr,
 
             /* check doorbell range */
             if (vector < s->peers[dest].nb_eventfds) {
-                IVSHMEM_DPRINTF("Writing %" PRId64 " to VM %d on vector %d\n",
-                                                    write_one, dest, vector);
-                if (write(s->peers[dest].eventfds[vector],
-                                                    &(write_one), 8) != 8) {
-                    IVSHMEM_DPRINTF("error writing to eventfd\n");
-                }
+                IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
+                event_notifier_set(&s->peers[dest].eventfds[vector]);
             }
             break;
         default:
@@ -279,12 +274,13 @@ static void fake_irqfd(void *opaque, const uint8_t *buf, int size) {
     msix_notify(pdev, entry->vector);
 }
 
-static CharDriverState* create_eventfd_chr_device(void * opaque, int eventfd,
-                                                                    int vector)
+static CharDriverState* create_eventfd_chr_device(void * opaque, EventNotifier *n,
+                                                  int vector)
 {
     /* create a event character device based on the passed eventfd */
     IVShmemState *s = opaque;
     CharDriverState * chr;
+    int eventfd = event_notifier_get_fd(n);
 
     chr = qemu_chr_open_eventfd(eventfd);
 
@@ -347,16 +343,39 @@ static void create_shared_memory_BAR(IVShmemState *s, int fd) {
     pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
 }
 
+static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
+{
+    memory_region_add_eventfd(&s->ivshmem_mmio,
+                              DOORBELL,
+                              4,
+                              true,
+                              (posn << 16) | i,
+                              &s->peers[posn].eventfds[i]);
+}
+
+static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
+{
+    memory_region_del_eventfd(&s->ivshmem_mmio,
+                              DOORBELL,
+                              4,
+                              true,
+                              (posn << 16) | i,
+                              &s->peers[posn].eventfds[i]);
+}
+
 static void close_guest_eventfds(IVShmemState *s, int posn)
 {
     int i, guest_curr_max;
 
     guest_curr_max = s->peers[posn].nb_eventfds;
 
+    memory_region_transaction_begin();
+    for (i = 0; i < guest_curr_max; i++) {
+        ivshmem_del_eventfd(s, posn, i);
+    }
+    memory_region_transaction_commit();
     for (i = 0; i < guest_curr_max; i++) {
-        kvm_set_ioeventfd_mmio(s->peers[posn].eventfds[i],
-                    s->mmio_addr + DOORBELL, (posn << 16) | i, 0, 4);
-        close(s->peers[posn].eventfds[i]);
+        event_notifier_cleanup(&s->peers[posn].eventfds[i]);
     }
 
     g_free(s->peers[posn].eventfds);
@@ -369,12 +388,7 @@ static void setup_ioeventfds(IVShmemState *s) {
 
     for (i = 0; i <= s->max_peer; i++) {
         for (j = 0; j < s->peers[i].nb_eventfds; j++) {
-            memory_region_add_eventfd(&s->ivshmem_mmio,
-                                      DOORBELL,
-                                      4,
-                                      true,
-                                      (i << 16) | j,
-                                      s->peers[i].eventfds[j]);
+            ivshmem_add_eventfd(s, i, j);
         }
     }
 }
@@ -476,14 +490,14 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
 
     if (guest_max_eventfd == 0) {
         /* one eventfd per MSI vector */
-        s->peers[incoming_posn].eventfds = (int *) g_malloc(s->vectors *
-                                                                sizeof(int));
+        s->peers[incoming_posn].eventfds = g_new(EventNotifier, s->vectors);
     }
 
     /* this is an eventfd for a particular guest VM */
     IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn,
                                             guest_max_eventfd, incoming_fd);
-    s->peers[incoming_posn].eventfds[guest_max_eventfd] = incoming_fd;
+    event_notifier_init_fd(&s->peers[incoming_posn].eventfds[guest_max_eventfd],
+                           incoming_fd);
 
     /* increment count for particular guest */
     s->peers[incoming_posn].nb_eventfds++;
@@ -495,15 +509,12 @@ static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
 
     if (incoming_posn == s->vm_id) {
         s->eventfd_chr[guest_max_eventfd] = create_eventfd_chr_device(s,
-                   s->peers[s->vm_id].eventfds[guest_max_eventfd],
+                   &s->peers[s->vm_id].eventfds[guest_max_eventfd],
                    guest_max_eventfd);
     }
 
     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
-        if (kvm_set_ioeventfd_mmio(incoming_fd, s->mmio_addr + DOORBELL,
-                        (incoming_posn << 16) | guest_max_eventfd, 1, 4) < 0) {
-            fprintf(stderr, "ivshmem: ioeventfd not available\n");
-        }
+        ivshmem_add_eventfd(s, incoming_posn, guest_max_eventfd);
     }
 
     return;
diff --git a/hw/vhost.c b/hw/vhost.c
index 43664e7f4d..0fd8da84e2 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -737,13 +737,13 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
 
 static void vhost_eventfd_add(MemoryListener *listener,
                               MemoryRegionSection *section,
-                              bool match_data, uint64_t data, int fd)
+                              bool match_data, uint64_t data, EventNotifier *e)
 {
 }
 
 static void vhost_eventfd_del(MemoryListener *listener,
                               MemoryRegionSection *section,
-                              bool match_data, uint64_t data, int fd)
+                              bool match_data, uint64_t data, EventNotifier *e)
 {
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 9342eed070..a66c946f48 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -173,46 +173,18 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
                          __func__, r);
             return r;
         }
+        virtio_queue_set_host_notifier_fd_handler(vq, true);
         memory_region_add_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2,
-                                  true, n, event_notifier_get_fd(notifier));
+                                  true, n, notifier);
     } else {
         memory_region_del_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2,
-                                  true, n, event_notifier_get_fd(notifier));
-        /* Handle the race condition where the guest kicked and we deassigned
-         * before we got around to handling the kick.
-         */
-        if (event_notifier_test_and_clear(notifier)) {
-            virtio_queue_notify_vq(vq);
-        }
-
+                                  true, n, notifier);
+        virtio_queue_set_host_notifier_fd_handler(vq, false);
         event_notifier_cleanup(notifier);
     }
     return r;
 }
 
-static void virtio_pci_host_notifier_read(void *opaque)
-{
-    VirtQueue *vq = opaque;
-    EventNotifier *n = virtio_queue_get_host_notifier(vq);
-    if (event_notifier_test_and_clear(n)) {
-        virtio_queue_notify_vq(vq);
-    }
-}
-
-static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
-                                                    int n, bool assign)
-{
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
-    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
-    if (assign) {
-        qemu_set_fd_handler(event_notifier_get_fd(notifier),
-                            virtio_pci_host_notifier_read, NULL, vq);
-    } else {
-        qemu_set_fd_handler(event_notifier_get_fd(notifier),
-                            NULL, NULL, NULL);
-    }
-}
-
 static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
 {
     int n, r;
@@ -232,8 +204,6 @@ static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
         if (r < 0) {
             goto assign_error;
         }
-
-        virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
     }
     proxy->ioeventfd_started = true;
     return;
@@ -244,7 +214,6 @@ assign_error:
             continue;
         }
 
-        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
         r = virtio_pci_set_host_notifier_internal(proxy, n, false);
         assert(r >= 0);
     }
@@ -266,7 +235,6 @@ static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
             continue;
         }
 
-        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
         r = virtio_pci_set_host_notifier_internal(proxy, n, false);
         assert(r >= 0);
     }
@@ -528,25 +496,15 @@ static unsigned virtio_pci_get_features(void *opaque)
     return proxy->host_features;
 }
 
-static void virtio_pci_guest_notifier_read(void *opaque)
-{
-    VirtQueue *vq = opaque;
-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
-    if (event_notifier_test_and_clear(n)) {
-        virtio_irq(vq);
-    }
-}
-
 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
                                         unsigned int queue_no,
                                         unsigned int vector,
                                         MSIMessage msg)
 {
     VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
-    int fd, ret;
-
-    fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
+    int ret;
 
     if (irqfd->users == 0) {
         ret = kvm_irqchip_add_msi_route(kvm_state, msg);
@@ -557,7 +515,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
     }
     irqfd->users++;
 
-    ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq);
+    ret = kvm_irqchip_add_irq_notifier(kvm_state, n, irqfd->virq);
     if (ret < 0) {
         if (--irqfd->users == 0) {
             kvm_irqchip_release_virq(kvm_state, irqfd->virq);
@@ -565,8 +523,7 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
         return ret;
     }
 
-    qemu_set_fd_handler(fd, NULL, NULL, NULL);
-
+    virtio_queue_set_guest_notifier_fd_handler(vq, true, true);
     return 0;
 }
 
@@ -575,19 +532,18 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
                                              unsigned int vector)
 {
     VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
-    int fd, ret;
-
-    fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
+    int ret;
 
-    ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq);
+    ret = kvm_irqchip_remove_irq_notifier(kvm_state, n, irqfd->virq);
     assert(ret == 0);
 
     if (--irqfd->users == 0) {
         kvm_irqchip_release_virq(kvm_state, irqfd->virq);
     }
 
-    qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq);
+    virtio_queue_set_guest_notifier_fd_handler(vq, true, false);
 }
 
 static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
@@ -649,14 +605,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
         if (r < 0) {
             return r;
         }
-        qemu_set_fd_handler(event_notifier_get_fd(notifier),
-                            virtio_pci_guest_notifier_read, NULL, vq);
+        virtio_queue_set_guest_notifier_fd_handler(vq, true, false);
     } else {
-        qemu_set_fd_handler(event_notifier_get_fd(notifier),
-                            NULL, NULL, NULL);
-        /* Test and clear notifier before closing it,
-         * in case poll callback didn't have time to run. */
-        virtio_pci_guest_notifier_read(vq);
+        virtio_queue_set_guest_notifier_fd_handler(vq, false, false);
         event_notifier_cleanup(notifier);
     }
 
diff --git a/hw/virtio.c b/hw/virtio.c
index 168abe4864..d146f86f13 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -984,10 +984,56 @@ VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
     return vdev->vq + n;
 }
 
+static void virtio_queue_guest_notifier_read(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_irq(vq);
+    }
+}
+
+void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                                bool with_irqfd)
+{
+    if (assign && !with_irqfd) {
+        event_notifier_set_handler(&vq->guest_notifier,
+                                   virtio_queue_guest_notifier_read);
+    } else {
+        event_notifier_set_handler(&vq->guest_notifier, NULL);
+    }
+    if (!assign) {
+        /* Test and clear notifier before closing it,
+         * in case poll callback didn't have time to run. */
+        virtio_queue_guest_notifier_read(&vq->guest_notifier);
+    }
+}
+
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
 {
     return &vq->guest_notifier;
 }
+
+static void virtio_queue_host_notifier_read(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_queue_notify_vq(vq);
+    }
+}
+
+void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign)
+{
+    if (assign) {
+        event_notifier_set_handler(&vq->host_notifier,
+                                   virtio_queue_host_notifier_read);
+    } else {
+        event_notifier_set_handler(&vq->host_notifier, NULL);
+        /* Test and clear notifier before after disabling event,
+         * in case poll callback didn't have time to run. */
+        virtio_queue_host_notifier_read(&vq->host_notifier);
+    }
+}
+
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
 {
     return &vq->host_notifier;
diff --git a/hw/virtio.h b/hw/virtio.h
index 85aabe53d8..96f4dbb8d9 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -231,7 +231,10 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
 int virtio_queue_get_id(VirtQueue *vq);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
+void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                                bool with_irqfd);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign);
 void virtio_queue_notify_vq(VirtQueue *vq);
 void virtio_irq(VirtQueue *vq);
 #endif
diff --git a/hw/xen_pt.c b/hw/xen_pt.c
index 3b6d1867ab..fdf68aa564 100644
--- a/hw/xen_pt.c
+++ b/hw/xen_pt.c
@@ -634,7 +634,7 @@ static void xen_pt_log_global_fns(MemoryListener *l)
 }
 
 static void xen_pt_eventfd_fns(MemoryListener *l, MemoryRegionSection *s,
-                               bool match_data, uint64_t data, int fd)
+                               bool match_data, uint64_t data, EventNotifier *n)
 {
 }
 
diff --git a/kvm-all.c b/kvm-all.c
index 1016ca49b1..2148b20bdb 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -32,6 +32,7 @@
 #include "bswap.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "event_notifier.h"
 
 /* This check must be after config-host.h is included */
 #ifdef CONFIG_EVENTFD
@@ -800,23 +801,29 @@ static void kvm_io_ioeventfd_del(MemoryRegionSection *section,
 
 static void kvm_eventfd_add(MemoryListener *listener,
                             MemoryRegionSection *section,
-                            bool match_data, uint64_t data, int fd)
+                            bool match_data, uint64_t data,
+                            EventNotifier *e)
 {
     if (section->address_space == get_system_memory()) {
-        kvm_mem_ioeventfd_add(section, match_data, data, fd);
+        kvm_mem_ioeventfd_add(section, match_data, data,
+			      event_notifier_get_fd(e));
     } else {
-        kvm_io_ioeventfd_add(section, match_data, data, fd);
+        kvm_io_ioeventfd_add(section, match_data, data,
+			     event_notifier_get_fd(e));
     }
 }
 
 static void kvm_eventfd_del(MemoryListener *listener,
                             MemoryRegionSection *section,
-                            bool match_data, uint64_t data, int fd)
+                            bool match_data, uint64_t data,
+                            EventNotifier *e)
 {
     if (section->address_space == get_system_memory()) {
-        kvm_mem_ioeventfd_del(section, match_data, data, fd);
+        kvm_mem_ioeventfd_del(section, match_data, data,
+			      event_notifier_get_fd(e));
     } else {
-        kvm_io_ioeventfd_del(section, match_data, data, fd);
+        kvm_io_ioeventfd_del(section, match_data, data,
+			     event_notifier_get_fd(e));
     }
 }
 
@@ -1142,7 +1149,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
 
 int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
 {
-    abort();
+    return -ENOSYS;
 }
 
 static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
@@ -1156,11 +1163,21 @@ int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq)
     return kvm_irqchip_assign_irqfd(s, fd, virq, true);
 }
 
+int kvm_irqchip_add_irq_notifier(KVMState *s, EventNotifier *n, int virq)
+{
+    return kvm_irqchip_add_irqfd(s, event_notifier_get_fd(n), virq);
+}
+
 int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq)
 {
     return kvm_irqchip_assign_irqfd(s, fd, virq, false);
 }
 
+int kvm_irqchip_remove_irq_notifier(KVMState *s, EventNotifier *n, int virq)
+{
+    return kvm_irqchip_remove_irqfd(s, event_notifier_get_fd(n), virq);
+}
+
 static int kvm_irqchip_create(KVMState *s)
 {
     QemuOptsList *list = qemu_find_opts("machine");
diff --git a/kvm-stub.c b/kvm-stub.c
index ec9a36454d..d23b11c020 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -147,7 +147,17 @@ int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq)
     return -ENOSYS;
 }
 
+int kvm_irqchip_add_irq_notifier(KVMState *s, EventNotifier *n, int virq)
+{
+    return -ENOSYS;
+}
+
 int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq)
 {
     return -ENOSYS;
 }
+
+int kvm_irqchip_remove_irq_notifier(KVMState *s, EventNotifier *n, int virq)
+{
+    return -ENOSYS;
+}
diff --git a/kvm.h b/kvm.h
index ddc7c53cb0..2617dd5acd 100644
--- a/kvm.h
+++ b/kvm.h
@@ -220,4 +220,6 @@ void kvm_irqchip_release_virq(KVMState *s, int virq);
 
 int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq);
 int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq);
+int kvm_irqchip_add_irq_notifier(KVMState *s, EventNotifier *n, int virq);
+int kvm_irqchip_remove_irq_notifier(KVMState *s, EventNotifier *n, int virq);
 #endif
diff --git a/memory.c b/memory.c
index aab4a31323..643871bafa 100644
--- a/memory.c
+++ b/memory.c
@@ -156,7 +156,7 @@ struct MemoryRegionIoeventfd {
     AddrRange addr;
     bool match_data;
     uint64_t data;
-    int fd;
+    EventNotifier *e;
 };
 
 static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd a,
@@ -181,9 +181,9 @@ static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd a,
             return false;
         }
     }
-    if (a.fd < b.fd) {
+    if (a.e < b.e) {
         return true;
-    } else if (a.fd > b.fd) {
+    } else if (a.e > b.e) {
         return false;
     }
     return false;
@@ -597,7 +597,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as,
                 .size = int128_get64(fd->addr.size),
             };
             MEMORY_LISTENER_CALL(eventfd_del, Forward, &section,
-                                 fd->match_data, fd->data, fd->fd);
+                                 fd->match_data, fd->data, fd->e);
             ++iold;
         } else if (inew < fds_new_nb
                    && (iold == fds_old_nb
@@ -610,7 +610,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as,
                 .size = int128_get64(fd->addr.size),
             };
             MEMORY_LISTENER_CALL(eventfd_add, Reverse, &section,
-                                 fd->match_data, fd->data, fd->fd);
+                                 fd->match_data, fd->data, fd->e);
             ++inew;
         } else {
             ++iold;
@@ -1195,14 +1195,14 @@ void memory_region_add_eventfd(MemoryRegion *mr,
                                unsigned size,
                                bool match_data,
                                uint64_t data,
-                               int fd)
+                               EventNotifier *e)
 {
     MemoryRegionIoeventfd mrfd = {
         .addr.start = int128_make64(addr),
         .addr.size = int128_make64(size),
         .match_data = match_data,
         .data = data,
-        .fd = fd,
+        .e = e,
     };
     unsigned i;
 
@@ -1225,14 +1225,14 @@ void memory_region_del_eventfd(MemoryRegion *mr,
                                unsigned size,
                                bool match_data,
                                uint64_t data,
-                               int fd)
+                               EventNotifier *e)
 {
     MemoryRegionIoeventfd mrfd = {
         .addr.start = int128_make64(addr),
         .addr.size = int128_make64(size),
         .match_data = match_data,
         .data = data,
-        .fd = fd,
+        .e = e,
     };
     unsigned i;
 
diff --git a/memory.h b/memory.h
index 740c48e8e5..bd1bbaeabe 100644
--- a/memory.h
+++ b/memory.h
@@ -198,9 +198,9 @@ struct MemoryListener {
     void (*log_global_start)(MemoryListener *listener);
     void (*log_global_stop)(MemoryListener *listener);
     void (*eventfd_add)(MemoryListener *listener, MemoryRegionSection *section,
-                        bool match_data, uint64_t data, int fd);
+                        bool match_data, uint64_t data, EventNotifier *e);
     void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section,
-                        bool match_data, uint64_t data, int fd);
+                        bool match_data, uint64_t data, EventNotifier *e);
     /* Lower = earlier (during add), later (during del) */
     unsigned priority;
     MemoryRegion *address_space_filter;
@@ -541,7 +541,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
                                unsigned size,
                                bool match_data,
                                uint64_t data,
-                               int fd);
+                               EventNotifier *e);
 
 /**
  * memory_region_del_eventfd: Cancel an eventfd.
@@ -561,7 +561,8 @@ void memory_region_del_eventfd(MemoryRegion *mr,
                                unsigned size,
                                bool match_data,
                                uint64_t data,
-                               int fd);
+                               EventNotifier *e);
+
 /**
  * memory_region_add_subregion: Add a subregion to a container.
  *
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5521709240..b3bcbacd76 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -77,7 +77,7 @@ static const char *ext3_feature_name[] = {
 };
 
 static const char *kvm_feature_name[] = {
-    "kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock", "kvm_asyncpf", NULL, NULL, NULL,
+    "kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock", "kvm_asyncpf", NULL, "kvm_pv_eoi", NULL,
     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index f257c972fb..2a61c810bb 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -400,6 +400,7 @@
 #define CPUID_EXT_X2APIC   (1 << 21)
 #define CPUID_EXT_MOVBE    (1 << 22)
 #define CPUID_EXT_POPCNT   (1 << 23)
+#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24)
 #define CPUID_EXT_XSAVE    (1 << 26)
 #define CPUID_EXT_OSXSAVE  (1 << 27)
 #define CPUID_EXT_HYPERVISOR  (1 << 31)
@@ -477,6 +478,7 @@
                                  for syscall instruction */
 
 /* i386-specific interrupt pending bits.  */
+#define CPU_INTERRUPT_POLL      CPU_INTERRUPT_TGT_EXT_1
 #define CPU_INTERRUPT_SMI       CPU_INTERRUPT_TGT_EXT_2
 #define CPU_INTERRUPT_NMI       CPU_INTERRUPT_TGT_EXT_3
 #define CPU_INTERRUPT_MCE       CPU_INTERRUPT_TGT_EXT_4
@@ -1047,7 +1049,8 @@ static inline void cpu_clone_regs(CPUX86State *env, target_ulong newsp)
 
 static inline bool cpu_has_work(CPUX86State *env)
 {
-    return ((env->interrupt_request & CPU_INTERRUPT_HARD) &&
+    return ((env->interrupt_request & (CPU_INTERRUPT_HARD |
+                                       CPU_INTERRUPT_POLL)) &&
             (env->eflags & IF_MASK)) ||
            (env->interrupt_request & (CPU_INTERRUPT_NMI |
                                       CPU_INTERRUPT_INIT |
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 0d0d8f69d3..e53c2f6bdf 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -361,8 +361,13 @@ int kvm_arch_init_vcpu(CPUX86State *env)
     env->cpuid_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
 
     i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
+    j = env->cpuid_ext_features & CPUID_EXT_TSC_DEADLINE_TIMER;
     env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX);
     env->cpuid_ext_features |= i;
+    if (j && kvm_irqchip_in_kernel() &&
+        kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
+        env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER;
+    }
 
     env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(s, 0x80000001,
                                                              0, R_EDX);
@@ -1727,6 +1732,10 @@ int kvm_arch_process_async_events(CPUX86State *env)
         return 0;
     }
 
+    if (env->interrupt_request & CPU_INTERRUPT_POLL) {
+        env->interrupt_request &= ~CPU_INTERRUPT_POLL;
+        apic_poll_irq(env->apic_state);
+    }
     if (((env->interrupt_request & CPU_INTERRUPT_HARD) &&
          (env->eflags & IF_MASK)) ||
         (env->interrupt_request & CPU_INTERRUPT_NMI)) {
diff --git a/xen-all.c b/xen-all.c
index 59f232395e..61def2ec8f 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -560,13 +560,15 @@ static void xen_log_global_stop(MemoryListener *listener)
 
 static void xen_eventfd_add(MemoryListener *listener,
                             MemoryRegionSection *section,
-                            bool match_data, uint64_t data, int fd)
+                            bool match_data, uint64_t data,
+                            EventNotifier *e)
 {
 }
 
 static void xen_eventfd_del(MemoryListener *listener,
                             MemoryRegionSection *section,
-                            bool match_data, uint64_t data, int fd)
+                            bool match_data, uint64_t data,
+                            EventNotifier *e)
 {
 }