summary refs log tree commit diff stats
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/9pfs/virtio-9p-device.c14
-rw-r--r--hw/alpha/typhoon.c2
-rw-r--r--hw/block/virtio-blk.c18
-rw-r--r--hw/char/virtio-serial-bus.c62
-rw-r--r--hw/display/virtio-gpu.c36
-rw-r--r--hw/i386/Makefile.objs2
-rw-r--r--hw/i386/acpi-build.c43
-rw-r--r--hw/i386/intel_iommu.c437
-rw-r--r--hw/i386/intel_iommu_internal.h50
-rw-r--r--hw/i386/kvm/pci-assign.c10
-rw-r--r--hw/i386/pc.c3
-rw-r--r--hw/i386/trace-events3
-rw-r--r--hw/i386/x86-iommu.c128
-rw-r--r--hw/input/virtio-input.c26
-rw-r--r--hw/intc/ioapic.c135
-rw-r--r--hw/mem/nvdimm.c1
-rw-r--r--hw/mips/gt64xxx_pci.c2
-rw-r--r--hw/misc/ivshmem.c4
-rw-r--r--hw/net/virtio-net.c102
-rw-r--r--hw/pci-host/apb.c15
-rw-r--r--hw/pci-host/grackle.c2
-rw-r--r--hw/pci-host/prep.c1
-rw-r--r--hw/pci-host/versatile.c1
-rw-r--r--hw/pci/pci.c15
-rw-r--r--hw/scsi/virtio-scsi.c35
-rw-r--r--hw/vfio/pci.c12
-rw-r--r--hw/virtio/virtio-balloon.c19
-rw-r--r--hw/virtio/virtio-pci.c10
-rw-r--r--hw/virtio/virtio-rng.c20
-rw-r--r--hw/virtio/virtio.c51
30 files changed, 941 insertions, 318 deletions
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 494e85e029..009b43f6d0 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -97,14 +97,9 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config)
     g_free(cfg);
 }
 
-static void virtio_9p_save(QEMUFile *f, void *opaque)
+static int virtio_9p_load(QEMUFile *f, void *opaque, size_t size)
 {
-    virtio_save(VIRTIO_DEVICE(opaque), f);
-}
-
-static int virtio_9p_load(QEMUFile *f, void *opaque, int version_id)
-{
-    return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
+    return virtio_load(VIRTIO_DEVICE(opaque), f, 1);
 }
 
 static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
@@ -120,7 +115,6 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
     v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag);
     virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size);
     v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output);
-    register_savevm(dev, "virtio-9p", -1, 1, virtio_9p_save, virtio_9p_load, v);
 
 out:
     return;
@@ -133,7 +127,6 @@ static void virtio_9p_device_unrealize(DeviceState *dev, Error **errp)
     V9fsState *s = &v->state;
 
     virtio_cleanup(vdev);
-    unregister_savevm(dev, "virtio-9p", v);
     v9fs_device_unrealize_common(s, errp);
 }
 
@@ -175,6 +168,8 @@ void virtio_init_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov,
 
 /* virtio-9p device */
 
+VMSTATE_VIRTIO_DEVICE(9p, 1, virtio_9p_load, virtio_vmstate_save);
+
 static Property virtio_9p_properties[] = {
     DEFINE_PROP_STRING("mount_tag", V9fsVirtioState, state.fsconf.tag),
     DEFINE_PROP_STRING("fsdev", V9fsVirtioState, state.fsconf.fsdev_id),
@@ -187,6 +182,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
     dc->props = virtio_9p_properties;
+    dc->vmsd = &vmstate_virtio_9p;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     vdc->realize = virtio_9p_device_realize;
     vdc->unrealize = virtio_9p_device_unrealize;
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 97721b535d..883db13f96 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -824,7 +824,6 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
     int i;
 
     dev = qdev_create(NULL, TYPE_TYPHOON_PCI_HOST_BRIDGE);
-    qdev_init_nofail(dev);
 
     s = TYPHOON_PCI_HOST_BRIDGE(dev);
     phb = PCI_HOST_BRIDGE(dev);
@@ -889,6 +888,7 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
                          &s->pchip.reg_mem, &s->pchip.reg_io,
                          0, 64, TYPE_PCI_BUS);
     phb->bus = b;
+    qdev_init_nofail(dev);
 
     /* Host memory as seen from the PCI side, via the IOMMU.  */
     memory_region_init_iommu(&s->pchip.iommu, OBJECT(s), &typhoon_iommu_ops,
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 357ff9081e..475a822f5a 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -798,7 +798,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
     }
 }
 
-static void virtio_blk_save(QEMUFile *f, void *opaque)
+static void virtio_blk_save(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
 
@@ -823,15 +823,12 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_sbyte(f, 0);
 }
 
-static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_blk_load(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIOBlock *s = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
-    if (version_id != 2)
-        return -EINVAL;
-
-    return virtio_load(vdev, f, version_id);
+    return virtio_load(vdev, f, 2);
 }
 
 static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -880,7 +877,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     VirtIOBlock *s = VIRTIO_BLK(dev);
     VirtIOBlkConf *conf = &s->conf;
     Error *err = NULL;
-    static int virtio_blk_id;
     unsigned i;
 
     if (!conf->conf.blk) {
@@ -914,7 +910,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
 
     for (i = 0; i < conf->num_queues; i++) {
-        virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+        virtio_add_queue_aio(vdev, 128, virtio_blk_handle_output);
     }
     virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
     if (err != NULL) {
@@ -924,8 +920,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
-    register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
-                    virtio_blk_save, virtio_blk_load, s);
     blk_set_dev_ops(s->blk, &virtio_block_ops, s);
     blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
 
@@ -940,7 +934,6 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
     virtio_blk_data_plane_destroy(s->dataplane);
     s->dataplane = NULL;
     qemu_del_vm_change_state_handler(s->change);
-    unregister_savevm(dev, "virtio-blk", s);
     blockdev_mark_auto_del(s->blk);
     virtio_cleanup(vdev);
 }
@@ -958,6 +951,8 @@ static void virtio_blk_instance_init(Object *obj)
                                   DEVICE(obj), NULL);
 }
 
+VMSTATE_VIRTIO_DEVICE(blk, 2, virtio_blk_load, virtio_blk_save);
+
 static Property virtio_blk_properties[] = {
     DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf),
     DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf),
@@ -979,6 +974,7 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data)
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
     dc->props = virtio_blk_properties;
+    dc->vmsd = &vmstate_virtio_blk;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     vdc->realize = virtio_blk_device_realize;
     vdc->unrealize = virtio_blk_device_unrealize;
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 6e5de6dec2..db57a38546 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -594,12 +594,6 @@ static void vser_reset(VirtIODevice *vdev)
     guest_reset(vser);
 }
 
-static void virtio_serial_save(QEMUFile *f, void *opaque)
-{
-    /* The virtio device */
-    virtio_save(VIRTIO_DEVICE(opaque), f);
-}
-
 static void virtio_serial_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
     VirtIOSerial *s = VIRTIO_SERIAL(vdev);
@@ -685,7 +679,7 @@ static void virtio_serial_post_load_timer_cb(void *opaque)
     s->post_load = NULL;
 }
 
-static int fetch_active_ports_list(QEMUFile *f, int version_id,
+static int fetch_active_ports_list(QEMUFile *f,
                                    VirtIOSerial *s, uint32_t nr_active_ports)
 {
     uint32_t i;
@@ -702,6 +696,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
     /* Items in struct VirtIOSerialPort */
     for (i = 0; i < nr_active_ports; i++) {
         VirtIOSerialPort *port;
+        uint32_t elem_popped;
         uint32_t id;
 
         id = qemu_get_be32(f);
@@ -714,37 +709,29 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
         s->post_load->connected[i].port = port;
         s->post_load->connected[i].host_connected = qemu_get_byte(f);
 
-        if (version_id > 2) {
-            uint32_t elem_popped;
-
-            qemu_get_be32s(f, &elem_popped);
-            if (elem_popped) {
-                qemu_get_be32s(f, &port->iov_idx);
-                qemu_get_be64s(f, &port->iov_offset);
+        qemu_get_be32s(f, &elem_popped);
+        if (elem_popped) {
+            qemu_get_be32s(f, &port->iov_idx);
+            qemu_get_be64s(f, &port->iov_offset);
 
-                port->elem =
-                    qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
+            port->elem =
+                qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
 
-                /*
-                 *  Port was throttled on source machine.  Let's
-                 *  unthrottle it here so data starts flowing again.
-                 */
-                virtio_serial_throttle_port(port, false);
-            }
+            /*
+             *  Port was throttled on source machine.  Let's
+             *  unthrottle it here so data starts flowing again.
+             */
+            virtio_serial_throttle_port(port, false);
         }
     }
     timer_mod(s->post_load->timer, 1);
     return 0;
 }
 
-static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_serial_load(QEMUFile *f, void *opaque, size_t size)
 {
-    if (version_id > 3) {
-        return -EINVAL;
-    }
-
     /* The virtio device */
-    return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
+    return virtio_load(VIRTIO_DEVICE(opaque), f, 3);
 }
 
 static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -756,10 +743,6 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
     int ret;
     uint32_t tmp;
 
-    if (version_id < 2) {
-        return 0;
-    }
-
     /* Unused */
     qemu_get_be16s(f, (uint16_t *) &tmp);
     qemu_get_be16s(f, (uint16_t *) &tmp);
@@ -781,7 +764,7 @@ static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
     qemu_get_be32s(f, &nr_active_ports);
 
     if (nr_active_ports) {
-        ret = fetch_active_ports_list(f, version_id, s, nr_active_ports);
+        ret = fetch_active_ports_list(f, s, nr_active_ports);
         if (ret) {
             return ret;
         }
@@ -1049,13 +1032,6 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
 
     vser->post_load = NULL;
 
-    /*
-     * Register for the savevm section with the virtio-console name
-     * to preserve backward compat
-     */
-    register_savevm(dev, "virtio-console", -1, 3, virtio_serial_save,
-                    virtio_serial_load, vser);
-
     QLIST_INSERT_HEAD(&vserdevices.devices, vser, next);
 }
 
@@ -1086,8 +1062,6 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
 
     QLIST_REMOVE(vser, next);
 
-    unregister_savevm(dev, "virtio-console", vser);
-
     g_free(vser->ivqs);
     g_free(vser->ovqs);
     g_free(vser->ports_map);
@@ -1100,6 +1074,9 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
     virtio_cleanup(vdev);
 }
 
+/* Note: 'console' is used for backwards compatibility */
+VMSTATE_VIRTIO_DEVICE(console, 3, virtio_serial_load, virtio_vmstate_save);
+
 static Property virtio_serial_properties[] = {
     DEFINE_PROP_UINT32("max_ports", VirtIOSerial, serial.max_virtserial_ports,
                                                   31),
@@ -1115,6 +1092,7 @@ static void virtio_serial_class_init(ObjectClass *klass, void *data)
     QLIST_INIT(&vserdevices.devices);
 
     dc->props = virtio_serial_properties;
+    dc->vmsd = &vmstate_virtio_console;
     set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
     vdc->realize = virtio_serial_device_realize;
     vdc->unrealize = virtio_serial_device_unrealize;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 929c3c8b8a..7fe6ed8bf0 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -19,6 +19,7 @@
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-gpu.h"
 #include "hw/virtio/virtio-bus.h"
+#include "migration/migration.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
 
@@ -986,12 +987,7 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = {
     },
 };
 
-static const VMStateDescription vmstate_virtio_gpu_unmigratable = {
-    .name = "virtio-gpu-with-virgl",
-    .unmigratable = 1,
-};
-
-static void virtio_gpu_save(QEMUFile *f, void *opaque)
+static void virtio_gpu_save(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIOGPU *g = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(g);
@@ -1021,7 +1017,7 @@ static void virtio_gpu_save(QEMUFile *f, void *opaque)
     vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL);
 }
 
-static int virtio_gpu_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIOGPU *g = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(g);
@@ -1030,11 +1026,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, int version_id)
     uint32_t resource_id, pformat;
     int i, ret;
 
-    if (version_id != VIRTIO_GPU_VM_VERSION) {
-        return -EINVAL;
-    }
-
-    ret = virtio_load(vdev, f, version_id);
+    ret = virtio_load(vdev, f, VIRTIO_GPU_VM_VERSION);
     if (ret) {
         return ret;
     }
@@ -1169,10 +1161,17 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
     }
 
     if (virtio_gpu_virgl_enabled(g->conf)) {
-        vmstate_register(qdev, -1, &vmstate_virtio_gpu_unmigratable, g);
-    } else {
-        register_savevm(qdev, "virtio-gpu", -1, VIRTIO_GPU_VM_VERSION,
-                        virtio_gpu_save, virtio_gpu_load, g);
+        error_setg(&g->migration_blocker, "virgl is not yet migratable");
+        migrate_add_blocker(g->migration_blocker);
+    }
+}
+
+static void virtio_gpu_device_unrealize(DeviceState *qdev, Error **errp)
+{
+    VirtIOGPU *g = VIRTIO_GPU(qdev);
+    if (g->migration_blocker) {
+        migrate_del_blocker(g->migration_blocker);
+        error_free(g->migration_blocker);
     }
 }
 
@@ -1220,6 +1219,9 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
 #endif
 }
 
+VMSTATE_VIRTIO_DEVICE(gpu, VIRTIO_GPU_VM_VERSION, virtio_gpu_load,
+                      virtio_gpu_save);
+
 static Property virtio_gpu_properties[] = {
     DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
 #ifdef CONFIG_VIRGL
@@ -1237,6 +1239,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data)
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
     vdc->realize = virtio_gpu_device_realize;
+    vdc->unrealize = virtio_gpu_device_unrealize;
     vdc->get_config = virtio_gpu_get_config;
     vdc->set_config = virtio_gpu_set_config;
     vdc->get_features = virtio_gpu_get_features;
@@ -1245,6 +1248,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data)
     vdc->reset = virtio_gpu_reset;
 
     dc->props = virtio_gpu_properties;
+    dc->vmsd = &vmstate_virtio_gpu;
 }
 
 static const TypeInfo virtio_gpu_info = {
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index b52d5b8756..90e94ffefd 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -2,7 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
 obj-y += multiboot.o
 obj-y += pc.o pc_piix.o pc_q35.o
 obj-y += pc_sysfw.o
-obj-y += intel_iommu.o
+obj-y += x86-iommu.o intel_iommu.o
 obj-$(CONFIG_XEN) += ../xenpv/ xen/
 
 obj-y += kvmvapic.o
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index fbba461a87..77c40d92e2 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -52,13 +52,14 @@
 #include "hw/i386/ich9.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci-host/q35.h"
-#include "hw/i386/intel_iommu.h"
+#include "hw/i386/x86-iommu.h"
 #include "hw/timer/hpet.h"
 
 #include "hw/acpi/aml-build.h"
 
 #include "qapi/qmp/qint.h"
 #include "qom/qom-qobject.h"
+#include "hw/i386/x86-iommu.h"
 
 #include "hw/acpi/ipmi.h"
 
@@ -80,6 +81,9 @@
 #define ACPI_BUILD_DPRINTF(fmt, ...)
 #endif
 
+/* Default IOAPIC ID */
+#define ACPI_BUILD_IOAPIC_ID 0x0
+
 typedef struct AcpiMcfgInfo {
     uint64_t mcfg_base;
     uint32_t mcfg_size;
@@ -383,7 +387,6 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
     io_apic = acpi_data_push(table_data, sizeof *io_apic);
     io_apic->type = ACPI_APIC_IO;
     io_apic->length = sizeof(*io_apic);
-#define ACPI_BUILD_IOAPIC_ID 0x0
     io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
     io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
     io_apic->interrupt = cpu_to_le32(0);
@@ -2454,6 +2457,10 @@ build_mcfg_q35(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info)
     build_header(linker, table_data, (void *)mcfg, sig, len, 1, NULL, NULL);
 }
 
+/*
+ * VT-d spec 8.1 DMA Remapping Reporting Structure
+ * (version Oct. 2014 or later)
+ */
 static void
 build_dmar_q35(GArray *table_data, BIOSLinker *linker)
 {
@@ -2461,19 +2468,38 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
 
     AcpiTableDmar *dmar;
     AcpiDmarHardwareUnit *drhd;
+    uint8_t dmar_flags = 0;
+    X86IOMMUState *iommu = x86_iommu_get_default();
+    AcpiDmarDeviceScope *scope = NULL;
+    /* Root complex IOAPIC use one path[0] only */
+    size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
+
+    assert(iommu);
+    if (iommu->intr_supported) {
+        dmar_flags |= 0x1;      /* Flags: 0x1: INT_REMAP */
+    }
 
     dmar = acpi_data_push(table_data, sizeof(*dmar));
     dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
-    dmar->flags = 0;    /* No intr_remap for now */
+    dmar->flags = dmar_flags;
 
     /* DMAR Remapping Hardware Unit Definition structure */
-    drhd = acpi_data_push(table_data, sizeof(*drhd));
+    drhd = acpi_data_push(table_data, sizeof(*drhd) + ioapic_scope_size);
     drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
-    drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
+    drhd->length = cpu_to_le16(sizeof(*drhd) + ioapic_scope_size);
     drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
     drhd->pci_segment = cpu_to_le16(0);
     drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
 
+    /* Scope definition for the root-complex IOAPIC. See VT-d spec
+     * 8.3.1 (version Oct. 2014 or later). */
+    scope = &drhd->scope[0];
+    scope->entry_type = 0x03;   /* Type: 0x03 for IOAPIC */
+    scope->length = ioapic_scope_size;
+    scope->enumeration_id = ACPI_BUILD_IOAPIC_ID;
+    scope->bus = Q35_PSEUDO_BUS_PLATFORM;
+    scope->path[0] = cpu_to_le16(Q35_PSEUDO_DEVFN_IOAPIC);
+
     build_header(linker, table_data, (void *)(table_data->data + dmar_start),
                  "DMAR", table_data->len - dmar_start, 1, NULL, NULL);
 }
@@ -2539,12 +2565,7 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
 
 static bool acpi_has_iommu(void)
 {
-    bool ambiguous;
-    Object *intel_iommu;
-
-    intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE,
-                                           &ambiguous);
-    return intel_iommu && !ambiguous;
+    return !!x86_iommu_get_default();
 }
 
 static
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 464f2a0518..28c31a2cdf 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -20,18 +20,23 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "hw/sysbus.h"
 #include "exec/address-spaces.h"
 #include "intel_iommu_internal.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/i386/pc.h"
+#include "hw/boards.h"
+#include "hw/i386/x86-iommu.h"
+#include "hw/pci-host/q35.h"
+#include "sysemu/kvm.h"
 
 /*#define DEBUG_INTEL_IOMMU*/
 #ifdef DEBUG_INTEL_IOMMU
 enum {
     DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
-    DEBUG_CACHE,
+    DEBUG_CACHE, DEBUG_IR,
 };
 #define VTD_DBGBIT(x)   (1 << DEBUG_##x)
 static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
@@ -192,7 +197,7 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
 
     VTD_DPRINTF(CACHE, "global context_cache_gen=1");
     while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
-        for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
+        for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
             vtd_as = vtd_bus->dev_as[devfn_it];
             if (!vtd_as) {
                 continue;
@@ -901,6 +906,27 @@ static void vtd_root_table_setup(IntelIOMMUState *s)
                 (s->root_extended ? "(extended)" : ""));
 }
 
+static void vtd_iec_notify_all(IntelIOMMUState *s, bool global,
+                               uint32_t index, uint32_t mask)
+{
+    x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
+}
+
+static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
+{
+    uint64_t value = 0;
+    value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
+    s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
+    s->intr_root = value & VTD_IRTA_ADDR_MASK;
+    s->intr_eime = value & VTD_IRTA_EIME;
+
+    /* Notify global invalidation */
+    vtd_iec_notify_all(s, true, 0, 0);
+
+    VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32,
+                s->intr_root, s->intr_size);
+}
+
 static void vtd_context_global_invalidate(IntelIOMMUState *s)
 {
     s->context_cache_gen++;
@@ -964,7 +990,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
     vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
     if (vtd_bus) {
         devfn = VTD_SID_TO_DEVFN(source_id);
-        for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
+        for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
             vtd_as = vtd_bus->dev_as[devfn_it];
             if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
                 VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
@@ -1139,6 +1165,16 @@ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
     vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
 }
 
+/* Set Interrupt Remap Table Pointer */
+static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
+{
+    VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer");
+
+    vtd_interrupt_remap_table_setup(s);
+    /* Ok - report back to driver */
+    vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
+}
+
 /* Handle Translation Enable/Disable */
 static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
 {
@@ -1158,6 +1194,22 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
     }
 }
 
+/* Handle Interrupt Remap Enable/Disable */
+static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
+{
+    VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off"));
+
+    if (en) {
+        s->intr_enabled = true;
+        /* Ok - report back to driver */
+        vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES);
+    } else {
+        s->intr_enabled = false;
+        /* Ok - report back to driver */
+        vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0);
+    }
+}
+
 /* Handle write to Global Command Register */
 static void vtd_handle_gcmd_write(IntelIOMMUState *s)
 {
@@ -1178,6 +1230,14 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
         /* Queued Invalidation Enable */
         vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE);
     }
+    if (val & VTD_GCMD_SIRTP) {
+        /* Set/update the interrupt remapping root-table pointer */
+        vtd_handle_gcmd_sirtp(s);
+    }
+    if (changed & VTD_GCMD_IRE) {
+        /* Interrupt remap enable/disable */
+        vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
+    }
 }
 
 /* Handle write to Context Command Register */
@@ -1363,6 +1423,21 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
     return true;
 }
 
+static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
+                                     VTDInvDesc *inv_desc)
+{
+    VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d",
+                inv_desc->iec.granularity,
+                inv_desc->iec.index,
+                inv_desc->iec.index_mask);
+
+    vtd_iec_notify_all(s, !inv_desc->iec.granularity,
+                       inv_desc->iec.index,
+                       inv_desc->iec.index_mask);
+
+    return true;
+}
+
 static bool vtd_process_inv_desc(IntelIOMMUState *s)
 {
     VTDInvDesc inv_desc;
@@ -1402,6 +1477,15 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
+    case VTD_INV_DESC_IEC:
+        VTD_DPRINTF(INV, "Invalidation Interrupt Entry Cache "
+                    "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
+                    inv_desc.hi, inv_desc.lo);
+        if (!vtd_process_inv_iec_desc(s, &inv_desc)) {
+            return false;
+        }
+        break;
+
     default:
         VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
                     "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
@@ -1830,6 +1914,23 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         vtd_update_fsts_ppf(s);
         break;
 
+    case DMAR_IRTA_REG:
+        VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64
+                    ", size %d, val 0x%"PRIx64, addr, size, val);
+        if (size == 4) {
+            vtd_set_long(s, addr, val);
+        } else {
+            vtd_set_quad(s, addr, val);
+        }
+        break;
+
+    case DMAR_IRTA_REG_HI:
+        VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64
+                    ", size %d, val 0x%"PRIx64, addr, size, val);
+        assert(size == 4);
+        vtd_set_long(s, addr, val);
+        break;
+
     default:
         VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64
                     ", size %d, val 0x%"PRIx64, addr, size, val);
@@ -1907,6 +2008,295 @@ static Property vtd_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+/* Read IRTE entry with specific index */
+static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
+                        VTD_IR_TableEntry *entry, uint16_t sid)
+{
+    static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
+        {0xffff, 0xfffb, 0xfff9, 0xfff8};
+    dma_addr_t addr = 0x00;
+    uint16_t mask, source_id;
+    uint8_t bus, bus_max, bus_min;
+
+    addr = iommu->intr_root + index * sizeof(*entry);
+    if (dma_memory_read(&address_space_memory, addr, entry,
+                        sizeof(*entry))) {
+        VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64
+                    " + %"PRIu16, iommu->intr_root, index);
+        return -VTD_FR_IR_ROOT_INVAL;
+    }
+
+    if (!entry->irte.present) {
+        VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
+                    " entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
+                    index, le64_to_cpu(entry->data[1]),
+                    le64_to_cpu(entry->data[0]));
+        return -VTD_FR_IR_ENTRY_P;
+    }
+
+    if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
+        entry->irte.__reserved_2) {
+        VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
+                    " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
+                    index, le64_to_cpu(entry->data[1]),
+                    le64_to_cpu(entry->data[0]));
+        return -VTD_FR_IR_IRTE_RSVD;
+    }
+
+    if (sid != X86_IOMMU_SID_INVALID) {
+        /* Validate IRTE SID */
+        source_id = le32_to_cpu(entry->irte.source_id);
+        switch (entry->irte.sid_vtype) {
+        case VTD_SVT_NONE:
+            VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index);
+            break;
+
+        case VTD_SVT_ALL:
+            mask = vtd_svt_mask[entry->irte.sid_q];
+            if ((source_id & mask) != (sid & mask)) {
+                VTD_DPRINTF(GENERAL, "SID validation for IRTE index "
+                            "%d failed (reqid 0x%04x sid 0x%04x)", index,
+                            sid, source_id);
+                return -VTD_FR_IR_SID_ERR;
+            }
+            break;
+
+        case VTD_SVT_BUS:
+            bus_max = source_id >> 8;
+            bus_min = source_id & 0xff;
+            bus = sid >> 8;
+            if (bus > bus_max || bus < bus_min) {
+                VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d "
+                            "failed (bus %d outside %d-%d)", index, bus,
+                            bus_min, bus_max);
+                return -VTD_FR_IR_SID_ERR;
+            }
+            break;
+
+        default:
+            VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index "
+                        "%d", entry->irte.sid_vtype, index);
+            /* Take this as verification failure. */
+            return -VTD_FR_IR_SID_ERR;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+/* Fetch IRQ information of specific IR index */
+static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
+                             VTDIrq *irq, uint16_t sid)
+{
+    VTD_IR_TableEntry irte = {};
+    int ret = 0;
+
+    ret = vtd_irte_get(iommu, index, &irte, sid);
+    if (ret) {
+        return ret;
+    }
+
+    irq->trigger_mode = irte.irte.trigger_mode;
+    irq->vector = irte.irte.vector;
+    irq->delivery_mode = irte.irte.delivery_mode;
+    irq->dest = le32_to_cpu(irte.irte.dest_id);
+    if (!iommu->intr_eime) {
+#define  VTD_IR_APIC_DEST_MASK         (0xff00ULL)
+#define  VTD_IR_APIC_DEST_SHIFT        (8)
+        irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >>
+            VTD_IR_APIC_DEST_SHIFT;
+    }
+    irq->dest_mode = irte.irte.dest_mode;
+    irq->redir_hint = irte.irte.redir_hint;
+
+    VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u,"
+                "deliver:%u,dest:%u,dest_mode:%u", index,
+                irq->trigger_mode, irq->vector, irq->delivery_mode,
+                irq->dest, irq->dest_mode);
+
+    return 0;
+}
+
+/* Generate one MSI message from VTDIrq info */
+static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
+{
+    VTD_MSIMessage msg = {};
+
+    /* Generate address bits */
+    msg.dest_mode = irq->dest_mode;
+    msg.redir_hint = irq->redir_hint;
+    msg.dest = irq->dest;
+    msg.__addr_head = cpu_to_le32(0xfee);
+    /* Keep this from original MSI address bits */
+    msg.__not_used = irq->msi_addr_last_bits;
+
+    /* Generate data bits */
+    msg.vector = irq->vector;
+    msg.delivery_mode = irq->delivery_mode;
+    msg.level = 1;
+    msg.trigger_mode = irq->trigger_mode;
+
+    msg_out->address = msg.msi_addr;
+    msg_out->data = msg.msi_data;
+}
+
+/* Interrupt remapping for MSI/MSI-X entry */
+static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
+                                   MSIMessage *origin,
+                                   MSIMessage *translated,
+                                   uint16_t sid)
+{
+    int ret = 0;
+    VTD_IR_MSIAddress addr;
+    uint16_t index;
+    VTDIrq irq = {};
+
+    assert(origin && translated);
+
+    if (!iommu || !iommu->intr_enabled) {
+        goto do_not_translate;
+    }
+
+    if (origin->address & VTD_MSI_ADDR_HI_MASK) {
+        VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero"
+                    " during interrupt remapping: 0x%"PRIx32,
+                    (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \
+                    VTD_MSI_ADDR_HI_SHIFT));
+        return -VTD_FR_IR_REQ_RSVD;
+    }
+
+    addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
+    if (le16_to_cpu(addr.addr.__head) != 0xfee) {
+        VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
+                    "0x%"PRIx32, addr.data);
+        return -VTD_FR_IR_REQ_RSVD;
+    }
+
+    /* This is compatible mode. */
+    if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) {
+        goto do_not_translate;
+    }
+
+    index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
+
+#define  VTD_IR_MSI_DATA_SUBHANDLE       (0x0000ffff)
+#define  VTD_IR_MSI_DATA_RESERVED        (0xffff0000)
+
+    if (addr.addr.sub_valid) {
+        /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */
+        index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
+    }
+
+    ret = vtd_remap_irq_get(iommu, index, &irq, sid);
+    if (ret) {
+        return ret;
+    }
+
+    if (addr.addr.sub_valid) {
+        VTD_DPRINTF(IR, "received MSI interrupt");
+        if (origin->data & VTD_IR_MSI_DATA_RESERVED) {
+            VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for "
+                        "interrupt remappable entry: 0x%"PRIx32,
+                        origin->data);
+            return -VTD_FR_IR_REQ_RSVD;
+        }
+    } else {
+        uint8_t vector = origin->data & 0xff;
+        VTD_DPRINTF(IR, "received IOAPIC interrupt");
+        /* IOAPIC entry vector should be aligned with IRTE vector
+         * (see vt-d spec 5.1.5.1). */
+        if (vector != irq.vector) {
+            VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: "
+                        "entry: %d, IRTE: %d, index: %d",
+                        vector, irq.vector, index);
+        }
+    }
+
+    /*
+     * We'd better keep the last two bits, assuming that guest OS
+     * might modify it. Keep it does not hurt after all.
+     */
+    irq.msi_addr_last_bits = addr.addr.__not_care;
+
+    /* Translate VTDIrq to MSI message */
+    vtd_generate_msi_message(&irq, translated);
+
+    VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> "
+                "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data,
+                translated->address, translated->data);
+    return 0;
+
+do_not_translate:
+    memcpy(translated, origin, sizeof(*origin));
+    return 0;
+}
+
+static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src,
+                         MSIMessage *dst, uint16_t sid)
+{
+    return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu),
+                                   src, dst, sid);
+}
+
+static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
+                                   uint64_t *data, unsigned size,
+                                   MemTxAttrs attrs)
+{
+    return MEMTX_OK;
+}
+
+static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
+                                    uint64_t value, unsigned size,
+                                    MemTxAttrs attrs)
+{
+    int ret = 0;
+    MSIMessage from = {}, to = {};
+    uint16_t sid = X86_IOMMU_SID_INVALID;
+
+    from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
+    from.data = (uint32_t) value;
+
+    if (!attrs.unspecified) {
+        /* We have explicit Source ID */
+        sid = attrs.requester_id;
+    }
+
+    ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
+    if (ret) {
+        /* TODO: report error */
+        VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64
+                    " data 0x%"PRIx32, from.address, from.data);
+        /* Drop this interrupt */
+        return MEMTX_ERROR;
+    }
+
+    VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32
+                " for device sid 0x%04x",
+                to.address, to.data, sid);
+
+    if (dma_memory_write(&address_space_memory, to.address,
+                         &to.data, size)) {
+        VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64
+                    " value 0x%"PRIx32, to.address, to.data);
+    }
+
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps vtd_mem_ir_ops = {
+    .read_with_attrs = vtd_mem_ir_read,
+    .write_with_attrs = vtd_mem_ir_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
 {
@@ -1916,7 +2306,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
 
     if (!vtd_bus) {
         /* No corresponding free() */
-        vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
+        vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
+                            X86_IOMMU_PCI_DEVFN_MAX);
         vtd_bus->bus = bus;
         key = (uintptr_t)bus;
         g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus);
@@ -1933,6 +2324,11 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
         vtd_dev_as->context_cache_entry.context_cache_gen = 0;
         memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
                                  &s->iommu_ops, "intel_iommu", UINT64_MAX);
+        memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
+                              &vtd_mem_ir_ops, s, "intel_iommu_ir",
+                              VTD_INTERRUPT_ADDR_SIZE);
+        memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST,
+                                    &vtd_dev_as->iommu_ir);
         address_space_init(&vtd_dev_as->as,
                            &vtd_dev_as->iommu, "intel_iommu");
     }
@@ -1944,6 +2340,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
  */
 static void vtd_init(IntelIOMMUState *s)
 {
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
     memset(s->csr, 0, DMAR_REG_SIZE);
     memset(s->wmask, 0, DMAR_REG_SIZE);
     memset(s->w1cmask, 0, DMAR_REG_SIZE);
@@ -1965,6 +2363,10 @@ static void vtd_init(IntelIOMMUState *s)
              VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
 
+    if (x86_iommu->intr_supported) {
+        s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM | VTD_ECAP_MHMV;
+    }
+
     vtd_reset_context_cache(s);
     vtd_reset_iotlb(s);
 
@@ -2014,6 +2416,11 @@ static void vtd_init(IntelIOMMUState *s)
     /* Fault Recording Registers, 128-bit */
     vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0);
     vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL);
+
+    /*
+     * Interrupt remapping registers.
+     */
+    vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0);
 }
 
 /* Should not reset address_spaces when reset because devices will still use
@@ -2032,7 +2439,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     IntelIOMMUState *s = opaque;
     VTDAddressSpace *vtd_as;
 
-    assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
+    assert(0 <= devfn && devfn <= X86_IOMMU_PCI_DEVFN_MAX);
 
     vtd_as = vtd_find_add_as(s, bus, devfn);
     return &vtd_as->as;
@@ -2040,8 +2447,10 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 
 static void vtd_realize(DeviceState *dev, Error **errp)
 {
-    PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+    PCIBus *bus = pcms->bus;
     IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
 
     VTD_DPRINTF(GENERAL, "");
     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
@@ -2056,22 +2465,34 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     vtd_init(s);
     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
     pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
+    /* Pseudo address space under root PCI bus. */
+    pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
+
+    /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
+    if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
+        !kvm_irqchip_is_split()) {
+        error_report("Intel Interrupt Remapping cannot work with "
+                     "kernel-irqchip=on, please use 'split|off'.");
+        exit(1);
+    }
 }
 
 static void vtd_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass);
 
     dc->reset = vtd_reset;
-    dc->realize = vtd_realize;
     dc->vmsd = &vtd_vmstate;
     dc->props = vtd_properties;
     dc->hotpluggable = false;
+    x86_class->realize = vtd_realize;
+    x86_class->int_remap = vtd_int_remap;
 }
 
 static const TypeInfo vtd_info = {
     .name          = TYPE_INTEL_IOMMU_DEVICE,
-    .parent        = TYPE_SYS_BUS_DEVICE,
+    .parent        = TYPE_X86_IOMMU_DEVICE,
     .instance_size = sizeof(IntelIOMMUState),
     .class_init    = vtd_class_init,
 };
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e5f514c6e3..0829a5064f 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -110,6 +110,8 @@
 /* Interrupt Address Range */
 #define VTD_INTERRUPT_ADDR_FIRST    0xfee00000ULL
 #define VTD_INTERRUPT_ADDR_LAST     0xfeefffffULL
+#define VTD_INTERRUPT_ADDR_SIZE     (VTD_INTERRUPT_ADDR_LAST - \
+                                     VTD_INTERRUPT_ADDR_FIRST + 1)
 
 /* The shift of source_id in the key of IOTLB hash table */
 #define VTD_IOTLB_SID_SHIFT         36
@@ -172,10 +174,19 @@
 #define VTD_RTADDR_RTT              (1ULL << 11)
 #define VTD_RTADDR_ADDR_MASK        (VTD_HAW_MASK ^ 0xfffULL)
 
+/* IRTA_REG */
+#define VTD_IRTA_ADDR_MASK          (VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_IRTA_EIME               (1ULL << 11)
+#define VTD_IRTA_SIZE_MASK          (0xfULL)
+
 /* ECAP_REG */
 /* (offset >> 4) << 8 */
 #define VTD_ECAP_IRO                (DMAR_IOTLB_REG_OFFSET << 4)
 #define VTD_ECAP_QI                 (1ULL << 1)
+/* Interrupt Remapping support */
+#define VTD_ECAP_IR                 (1ULL << 3)
+#define VTD_ECAP_EIM                (1ULL << 4)
+#define VTD_ECAP_MHMV               (15ULL << 20)
 
 /* CAP_REG */
 /* (offset >> 4) << 24 */
@@ -265,6 +276,19 @@ typedef enum VTDFaultReason {
      * context-entry.
      */
     VTD_FR_CONTEXT_ENTRY_TT,
+
+    /* Interrupt remapping transition faults */
+    VTD_FR_IR_REQ_RSVD = 0x20, /* One or more IR request reserved
+                                * fields set */
+    VTD_FR_IR_INDEX_OVER = 0x21, /* Index value greater than max */
+    VTD_FR_IR_ENTRY_P = 0x22,    /* Present (P) not set in IRTE */
+    VTD_FR_IR_ROOT_INVAL = 0x23, /* IR Root table invalid */
+    VTD_FR_IR_IRTE_RSVD = 0x24,  /* IRTE Rsvd field non-zero with
+                                  * Present flag set */
+    VTD_FR_IR_REQ_COMPAT = 0x25, /* Encountered compatible IR
+                                  * request while disabled */
+    VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
+
     /* This is not a normal fault reason. We use this to indicate some faults
      * that are not referenced by the VT-d specification.
      * Fault event with such reason should not be recorded.
@@ -275,17 +299,35 @@ typedef enum VTDFaultReason {
 
 #define VTD_CONTEXT_CACHE_GEN_MAX       0xffffffffUL
 
+/* Interrupt Entry Cache Invalidation Descriptor: VT-d 6.5.2.7. */
+struct VTDInvDescIEC {
+    uint32_t type:4;            /* Should always be 0x4 */
+    uint32_t granularity:1;     /* If set, it's global IR invalidation */
+    uint32_t resved_1:22;
+    uint32_t index_mask:5;      /* 2^N for continuous int invalidation */
+    uint32_t index:16;          /* Start index to invalidate */
+    uint32_t reserved_2:16;
+};
+typedef struct VTDInvDescIEC VTDInvDescIEC;
+
 /* Queued Invalidation Descriptor */
-struct VTDInvDesc {
-    uint64_t lo;
-    uint64_t hi;
+union VTDInvDesc {
+    struct {
+        uint64_t lo;
+        uint64_t hi;
+    };
+    union {
+        VTDInvDescIEC iec;
+    };
 };
-typedef struct VTDInvDesc VTDInvDesc;
+typedef union VTDInvDesc VTDInvDesc;
 
 /* Masks for struct VTDInvDesc */
 #define VTD_INV_DESC_TYPE               0xf
 #define VTD_INV_DESC_CC                 0x1 /* Context-cache Invalidate Desc */
 #define VTD_INV_DESC_IOTLB              0x2
+#define VTD_INV_DESC_IEC                0x4 /* Interrupt Entry Cache
+                                               Invalidate Descriptor */
 #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait Descriptor */
 #define VTD_INV_DESC_NONE               0   /* Not an Invalidate Descriptor */
 
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 1a429e5402..8238fbc630 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -974,10 +974,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
     }
 
     if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
-        MSIMessage msg = msi_get_message(pci_dev, 0);
         int virq;
 
-        virq = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev);
+        virq = kvm_irqchip_add_msi_route(kvm_state, 0, pci_dev);
         if (virq < 0) {
             perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
             return;
@@ -1016,6 +1015,7 @@ static void assigned_dev_update_msi_msg(PCIDevice *pci_dev)
 
     kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0],
                                  msi_get_message(pci_dev, 0), pci_dev);
+    kvm_irqchip_commit_routes(kvm_state);
 }
 
 static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
@@ -1042,7 +1042,6 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     uint16_t entries_nr = 0;
     int i, r = 0;
     MSIXTableEntry *entry = adev->msix_table;
-    MSIMessage msg;
 
     /* Get the usable entry number for allocating */
     for (i = 0; i < adev->msix_max; i++, entry++) {
@@ -1079,9 +1078,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
             continue;
         }
 
-        msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32);
-        msg.data = entry->data;
-        r = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev);
+        r = kvm_irqchip_add_msi_route(kvm_state, i, pci_dev);
         if (r < 0) {
             return r;
         }
@@ -1606,6 +1603,7 @@ static void assigned_dev_msix_mmio_write(void *opaque, hwaddr addr,
                 if (ret) {
                     error_report("Error updating irq routing entry (%d)", ret);
                 }
+                kvm_irqchip_commit_routes(kvm_state);
             }
         }
     }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index ac7a4d5a47..9e3c70fb23 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1473,6 +1473,9 @@ void pc_memory_init(PCMachineState *pcms,
         rom_add_option(option_rom[i].name, option_rom[i].bootindex);
     }
     pcms->fw_cfg = fw_cfg;
+
+    /* Init default IOAPIC address space */
+    pcms->ioapic_as = &address_space_memory;
 }
 
 qemu_irq pc_allocate_cpu_irq(void)
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index ea77bc24c3..b4882c1157 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -10,3 +10,6 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad
 # hw/i386/pc.c
 mhp_pc_dimm_assigned_slot(int slot) "0x%d"
 mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
+
+# hw/i386/x86-iommu.c
+x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
new file mode 100644
index 0000000000..ce26b2a71d
--- /dev/null
+++ b/hw/i386/x86-iommu.c
@@ -0,0 +1,128 @@
+/*
+ * QEMU emulation of common X86 IOMMU
+ *
+ * Copyright (C) 2016 Peter Xu, Red Hat <peterx@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/boards.h"
+#include "hw/i386/x86-iommu.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+void x86_iommu_iec_register_notifier(X86IOMMUState *iommu,
+                                     iec_notify_fn fn, void *data)
+{
+    IEC_Notifier *notifier = g_new0(IEC_Notifier, 1);
+
+    notifier->iec_notify = fn;
+    notifier->private = data;
+
+    QLIST_INSERT_HEAD(&iommu->iec_notifiers, notifier, list);
+}
+
+void x86_iommu_iec_notify_all(X86IOMMUState *iommu, bool global,
+                              uint32_t index, uint32_t mask)
+{
+    IEC_Notifier *notifier;
+
+    trace_x86_iommu_iec_notify(global, index, mask);
+
+    QLIST_FOREACH(notifier, &iommu->iec_notifiers, list) {
+        if (notifier->iec_notify) {
+            notifier->iec_notify(notifier->private, global,
+                                 index, mask);
+        }
+    }
+}
+
+/* Default X86 IOMMU device */
+static X86IOMMUState *x86_iommu_default = NULL;
+
+static void x86_iommu_set_default(X86IOMMUState *x86_iommu)
+{
+    assert(x86_iommu);
+
+    if (x86_iommu_default) {
+        error_report("QEMU does not support multiple vIOMMUs "
+                     "for x86 yet.");
+        exit(1);
+    }
+
+    x86_iommu_default = x86_iommu;
+}
+
+X86IOMMUState *x86_iommu_get_default(void)
+{
+    return x86_iommu_default;
+}
+
+static void x86_iommu_realize(DeviceState *dev, Error **errp)
+{
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
+    X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev);
+    QLIST_INIT(&x86_iommu->iec_notifiers);
+    if (x86_class->realize) {
+        x86_class->realize(dev, errp);
+    }
+    x86_iommu_set_default(X86_IOMMU_DEVICE(dev));
+}
+
+static void x86_iommu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->realize = x86_iommu_realize;
+}
+
+static bool x86_iommu_intremap_prop_get(Object *o, Error **errp)
+{
+    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+    return s->intr_supported;
+}
+
+static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp)
+{
+    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+    s->intr_supported = value;
+}
+
+static void x86_iommu_instance_init(Object *o)
+{
+    X86IOMMUState *s = X86_IOMMU_DEVICE(o);
+
+    /* By default, do not support IR */
+    s->intr_supported = false;
+    object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get,
+                             x86_iommu_intremap_prop_set, NULL);
+}
+
+static const TypeInfo x86_iommu_info = {
+    .name          = TYPE_X86_IOMMU_DEVICE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_init = x86_iommu_instance_init,
+    .instance_size = sizeof(X86IOMMUState),
+    .class_init    = x86_iommu_class_init,
+    .class_size    = sizeof(X86IOMMUClass),
+    .abstract      = true,
+};
+
+static void x86_iommu_register_types(void)
+{
+    type_register_static(&x86_iommu_info);
+}
+
+type_init(x86_iommu_register_types)
diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c
index edf69903a6..a87fd6862e 100644
--- a/hw/input/virtio-input.c
+++ b/hw/input/virtio-input.c
@@ -217,26 +217,14 @@ static void virtio_input_reset(VirtIODevice *vdev)
     }
 }
 
-static void virtio_input_save(QEMUFile *f, void *opaque)
-{
-    VirtIOInput *vinput = opaque;
-    VirtIODevice *vdev = VIRTIO_DEVICE(vinput);
-
-    virtio_save(vdev, f);
-}
-
-static int virtio_input_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_input_load(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIOInput *vinput = opaque;
     VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(vinput);
     VirtIODevice *vdev = VIRTIO_DEVICE(vinput);
     int ret;
 
-    if (version_id != VIRTIO_INPUT_VM_VERSION) {
-        return -EINVAL;
-    }
-
-    ret = virtio_load(vdev, f, version_id);
+    ret = virtio_load(vdev, f, VIRTIO_INPUT_VM_VERSION);
     if (ret) {
         return ret;
     }
@@ -280,20 +268,14 @@ static void virtio_input_device_realize(DeviceState *dev, Error **errp)
                 vinput->cfg_size);
     vinput->evt = virtio_add_queue(vdev, 64, virtio_input_handle_evt);
     vinput->sts = virtio_add_queue(vdev, 64, virtio_input_handle_sts);
-
-    register_savevm(dev, "virtio-input", -1, VIRTIO_INPUT_VM_VERSION,
-                    virtio_input_save, virtio_input_load, vinput);
 }
 
 static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
 {
     VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev);
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
-    VirtIOInput *vinput = VIRTIO_INPUT(dev);
     Error *local_err = NULL;
 
-    unregister_savevm(dev, "virtio-input", vinput);
-
     if (vic->unrealize) {
         vic->unrealize(dev, &local_err);
         if (local_err) {
@@ -304,6 +286,9 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
     virtio_cleanup(vdev);
 }
 
+VMSTATE_VIRTIO_DEVICE(input, VIRTIO_INPUT_VM_VERSION, virtio_input_load,
+                      virtio_vmstate_save);
+
 static Property virtio_input_properties[] = {
     DEFINE_PROP_STRING("serial", VirtIOInput, serial),
     DEFINE_PROP_END_OF_LIST(),
@@ -315,6 +300,7 @@ static void virtio_input_class_init(ObjectClass *klass, void *data)
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
     dc->props          = virtio_input_properties;
+    dc->vmsd           = &vmstate_virtio_input;
     set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
     vdc->realize      = virtio_input_device_realize;
     vdc->unrealize    = virtio_input_device_unrealize;
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 273bb0854c..2d3282a864 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -29,6 +29,9 @@
 #include "hw/i386/ioapic_internal.h"
 #include "include/hw/pci/msi.h"
 #include "sysemu/kvm.h"
+#include "target-i386/cpu.h"
+#include "hw/i386/apic-msidef.h"
+#include "hw/i386/x86-iommu.h"
 
 //#define DEBUG_IOAPIC
 
@@ -48,16 +51,56 @@ static IOAPICCommonState *ioapics[MAX_IOAPICS];
 /* global variable from ioapic_common.c */
 extern int ioapic_no;
 
+struct ioapic_entry_info {
+    /* fields parsed from IOAPIC entries */
+    uint8_t masked;
+    uint8_t trig_mode;
+    uint16_t dest_idx;
+    uint8_t dest_mode;
+    uint8_t delivery_mode;
+    uint8_t vector;
+
+    /* MSI message generated from above parsed fields */
+    uint32_t addr;
+    uint32_t data;
+};
+
+static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
+{
+    memset(info, 0, sizeof(*info));
+    info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
+    info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
+    /*
+     * By default, this would be dest_id[8] + reserved[8]. When IR
+     * is enabled, this would be interrupt_index[15] +
+     * interrupt_format[1]. This field never means anything, but
+     * only used to generate corresponding MSI.
+     */
+    info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
+    info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
+    info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
+        & IOAPIC_DM_MASK;
+    if (info->delivery_mode == IOAPIC_DM_EXTINT) {
+        info->vector = pic_read_irq(isa_pic);
+    } else {
+        info->vector = entry & IOAPIC_VECTOR_MASK;
+    }
+
+    info->addr = APIC_DEFAULT_ADDRESS | \
+        (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
+        (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
+    info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
+        (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
+        (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
+}
+
 static void ioapic_service(IOAPICCommonState *s)
 {
+    AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as;
+    struct ioapic_entry_info info;
     uint8_t i;
-    uint8_t trig_mode;
-    uint8_t vector;
-    uint8_t delivery_mode;
     uint32_t mask;
     uint64_t entry;
-    uint8_t dest;
-    uint8_t dest_mode;
 
     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
         mask = 1 << i;
@@ -65,26 +108,18 @@ static void ioapic_service(IOAPICCommonState *s)
             int coalesce = 0;
 
             entry = s->ioredtbl[i];
-            if (!(entry & IOAPIC_LVT_MASKED)) {
-                trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1);
-                dest = entry >> IOAPIC_LVT_DEST_SHIFT;
-                dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
-                delivery_mode =
-                    (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK;
-                if (trig_mode == IOAPIC_TRIGGER_EDGE) {
+            ioapic_entry_parse(entry, &info);
+            if (!info.masked) {
+                if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
                     s->irr &= ~mask;
                 } else {
                     coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
                     s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
                 }
-                if (delivery_mode == IOAPIC_DM_EXTINT) {
-                    vector = pic_read_irq(isa_pic);
-                } else {
-                    vector = entry & IOAPIC_VECTOR_MASK;
-                }
+
 #ifdef CONFIG_KVM
                 if (kvm_irqchip_is_split()) {
-                    if (trig_mode == IOAPIC_TRIGGER_EDGE) {
+                    if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
                         kvm_set_irq(kvm_state, i, 1);
                         kvm_set_irq(kvm_state, i, 0);
                     } else {
@@ -97,8 +132,11 @@ static void ioapic_service(IOAPICCommonState *s)
 #else
                 (void)coalesce;
 #endif
-                apic_deliver_irq(dest, dest_mode, delivery_mode, vector,
-                                 trig_mode);
+                /* No matter whether IR is enabled, we translate
+                 * the IOAPIC message into a MSI one, and its
+                 * address space will decide whether we need a
+                 * translation. */
+                stl_le_phys(ioapic_as, info.addr, info.data);
             }
         }
     }
@@ -149,30 +187,11 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s)
 
     if (kvm_irqchip_is_split()) {
         for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-            uint64_t entry = s->ioredtbl[i];
-            uint8_t trig_mode;
-            uint8_t delivery_mode;
-            uint8_t dest;
-            uint8_t dest_mode;
-            uint64_t pin_polarity;
             MSIMessage msg;
-
-            trig_mode = ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1);
-            dest = entry >> IOAPIC_LVT_DEST_SHIFT;
-            dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
-            pin_polarity = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
-            delivery_mode =
-                (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) & IOAPIC_DM_MASK;
-
-            msg.address = APIC_DEFAULT_ADDRESS;
-            msg.address |= dest_mode << 2;
-            msg.address |= dest << 12;
-
-            msg.data = entry & IOAPIC_VECTOR_MASK;
-            msg.data |= delivery_mode << APIC_DELIVERY_MODE_SHIFT;
-            msg.data |= pin_polarity << APIC_POLARITY_SHIFT;
-            msg.data |= trig_mode << APIC_TRIG_MODE_SHIFT;
-
+            struct ioapic_entry_info info;
+            ioapic_entry_parse(s->ioredtbl[i], &info);
+            msg.address = info.addr;
+            msg.data = info.data;
             kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
         }
         kvm_irqchip_commit_routes(kvm_state);
@@ -180,6 +199,16 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s)
 #endif
 }
 
+#ifdef CONFIG_KVM
+static void ioapic_iec_notifier(void *private, bool global,
+                                uint32_t index, uint32_t mask)
+{
+    IOAPICCommonState *s = (IOAPICCommonState *)private;
+    /* For simplicity, we just update all the routes */
+    ioapic_update_kvm_routes(s);
+}
+#endif
+
 void ioapic_eoi_broadcast(int vector)
 {
     IOAPICCommonState *s;
@@ -336,6 +365,24 @@ static const MemoryRegionOps ioapic_io_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static void ioapic_machine_done_notify(Notifier *notifier, void *data)
+{
+#ifdef CONFIG_KVM
+    IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
+                                        machine_done);
+
+    if (kvm_irqchip_is_split()) {
+        X86IOMMUState *iommu = x86_iommu_get_default();
+        if (iommu) {
+            /* Register this IOAPIC with IOMMU IEC notifier, so that
+             * when there are IR invalidates, we can be notified to
+             * update kernel IR cache. */
+            x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
+        }
+    }
+#endif
+}
+
 static void ioapic_realize(DeviceState *dev, Error **errp)
 {
     IOAPICCommonState *s = IOAPIC_COMMON(dev);
@@ -346,6 +393,8 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
     qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
 
     ioapics[ioapic_no] = s;
+    s->machine_done.notify = ioapic_machine_done_notify;
+    qemu_add_machine_init_done_notifier(&s->machine_done);
 }
 
 static void ioapic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 81896c0e84..7895805a23 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -98,6 +98,7 @@ static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
                    "small to contain nvdimm label (0x%" PRIx64 ") and "
                    "aligned PMEM (0x%" PRIx64 ")",
                    path, memory_region_size(mr), nvdimm->label_size, align);
+        g_free(path);
         return;
     }
 
diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 3f4523df22..4811843ab6 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c
@@ -1167,7 +1167,6 @@ PCIBus *gt64120_register(qemu_irq *pic)
     DeviceState *dev;
 
     dev = qdev_create(NULL, TYPE_GT64120_PCI_HOST_BRIDGE);
-    qdev_init_nofail(dev);
     d = GT64120_PCI_HOST_BRIDGE(dev);
     phb = PCI_HOST_BRIDGE(dev);
     memory_region_init(&d->pci0_mem, OBJECT(dev), "pci0-mem", UINT32_MAX);
@@ -1178,6 +1177,7 @@ PCIBus *gt64120_register(qemu_irq *pic)
                                 &d->pci0_mem,
                                 get_system_io(),
                                 PCI_DEVFN(18, 0), 4, TYPE_PCI_BUS);
+    qdev_init_nofail(dev);
     memory_region_init_io(&d->ISD_mem, OBJECT(dev), &isd_mem_ops, d, "isd-mem", 0x1000);
 
     pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "gt64120_pci");
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 7e7c843b32..40a2ebca20 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -322,6 +322,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
     if (ret < 0) {
         return ret;
     }
+    kvm_irqchip_commit_routes(kvm_state);
 
     return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
 }
@@ -441,13 +442,12 @@ static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
                                      Error **errp)
 {
     PCIDevice *pdev = PCI_DEVICE(s);
-    MSIMessage msg = msix_get_message(pdev, vector);
     int ret;
 
     IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
     assert(!s->msi_vectors[vector].pdev);
 
-    ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev);
+    ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
     if (ret < 0) {
         error_setg(errp, "kvm_irqchip_add_msi_route failed");
         return;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 56d85062ff..01f1351554 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1492,7 +1492,7 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
     virtio_net_set_queues(n);
 }
 
-static void virtio_net_save(QEMUFile *f, void *opaque)
+static void virtio_net_save(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIONet *n = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
@@ -1538,15 +1538,12 @@ static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
     }
 }
 
-static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_net_load(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIONet *n = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 
-    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
-        return -EINVAL;
-
-    return virtio_load(vdev, f, version_id);
+    return virtio_load(vdev, f, VIRTIO_NET_VM_VERSION);
 }
 
 static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -1562,68 +1559,49 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
                                virtio_vdev_has_feature(vdev,
                                                        VIRTIO_F_VERSION_1));
 
-    if (version_id >= 3)
-        n->status = qemu_get_be16(f);
+    n->status = qemu_get_be16(f);
 
-    if (version_id >= 4) {
-        if (version_id < 8) {
-            n->promisc = qemu_get_be32(f);
-            n->allmulti = qemu_get_be32(f);
-        } else {
-            n->promisc = qemu_get_byte(f);
-            n->allmulti = qemu_get_byte(f);
-        }
-    }
+    n->promisc = qemu_get_byte(f);
+    n->allmulti = qemu_get_byte(f);
 
-    if (version_id >= 5) {
-        n->mac_table.in_use = qemu_get_be32(f);
-        /* MAC_TABLE_ENTRIES may be different from the saved image */
-        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
-            qemu_get_buffer(f, n->mac_table.macs,
-                            n->mac_table.in_use * ETH_ALEN);
-        } else {
-            int64_t i;
-
-            /* Overflow detected - can happen if source has a larger MAC table.
-             * We simply set overflow flag so there's no need to maintain the
-             * table of addresses, discard them all.
-             * Note: 64 bit math to avoid integer overflow.
-             */
-            for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
-                qemu_get_byte(f);
-            }
-            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
-            n->mac_table.in_use = 0;
+    n->mac_table.in_use = qemu_get_be32(f);
+    /* MAC_TABLE_ENTRIES may be different from the saved image */
+    if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
+        qemu_get_buffer(f, n->mac_table.macs,
+                        n->mac_table.in_use * ETH_ALEN);
+    } else {
+        int64_t i;
+
+        /* Overflow detected - can happen if source has a larger MAC table.
+         * We simply set overflow flag so there's no need to maintain the
+         * table of addresses, discard them all.
+         * Note: 64 bit math to avoid integer overflow.
+         */
+        for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
+            qemu_get_byte(f);
         }
+        n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
+        n->mac_table.in_use = 0;
     }
  
-    if (version_id >= 6)
-        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
+    qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
 
-    if (version_id >= 7) {
-        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
-            error_report("virtio-net: saved image requires vnet_hdr=on");
-            return -1;
-        }
+    if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
+        error_report("virtio-net: saved image requires vnet_hdr=on");
+        return -1;
     }
 
-    if (version_id >= 9) {
-        n->mac_table.multi_overflow = qemu_get_byte(f);
-        n->mac_table.uni_overflow = qemu_get_byte(f);
-    }
+    n->mac_table.multi_overflow = qemu_get_byte(f);
+    n->mac_table.uni_overflow = qemu_get_byte(f);
 
-    if (version_id >= 10) {
-        n->alluni = qemu_get_byte(f);
-        n->nomulti = qemu_get_byte(f);
-        n->nouni = qemu_get_byte(f);
-        n->nobcast = qemu_get_byte(f);
-    }
+    n->alluni = qemu_get_byte(f);
+    n->nomulti = qemu_get_byte(f);
+    n->nouni = qemu_get_byte(f);
+    n->nobcast = qemu_get_byte(f);
 
-    if (version_id >= 11) {
-        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
-            error_report("virtio-net: saved image requires TUN_F_UFO support");
-            return -1;
-        }
+    if (qemu_get_byte(f) && !peer_has_ufo(n)) {
+        error_report("virtio-net: saved image requires TUN_F_UFO support");
+        return -1;
     }
 
     if (n->max_queues > 1) {
@@ -1809,8 +1787,6 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     nc->rxfilter_notify_enabled = 1;
 
     n->qdev = dev;
-    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
-                    virtio_net_save, virtio_net_load, n);
 }
 
 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
@@ -1822,8 +1798,6 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
     /* This will stop vhost backend if appropriate. */
     virtio_net_set_status(vdev, 0);
 
-    unregister_savevm(dev, "virtio-net", n);
-
     g_free(n->netclient_name);
     n->netclient_name = NULL;
     g_free(n->netclient_type);
@@ -1858,6 +1832,9 @@ static void virtio_net_instance_init(Object *obj)
                                   DEVICE(n), NULL);
 }
 
+VMSTATE_VIRTIO_DEVICE(net, VIRTIO_NET_VM_VERSION, virtio_net_load,
+                      virtio_net_save);
+
 static Property virtio_net_properties[] = {
     DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
     DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
@@ -1912,6 +1889,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
     dc->props = virtio_net_properties;
+    dc->vmsd = &vmstate_virtio_net;
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
     vdc->realize = virtio_net_device_realize;
     vdc->unrealize = virtio_net_device_unrealize;
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index babbbef0c2..16587f8373 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -670,6 +670,13 @@ PCIBus *pci_apb_init(hwaddr special_base,
 
     /* Ultrasparc PBM main bus */
     dev = qdev_create(NULL, TYPE_APB);
+    d = APB_DEVICE(dev);
+    phb = PCI_HOST_BRIDGE(dev);
+    phb->bus = pci_register_bus(DEVICE(phb), "pci",
+                                pci_apb_set_irq, pci_pbm_map_irq, d,
+                                &d->pci_mmio,
+                                get_system_io(),
+                                0, 32, TYPE_PCI_BUS);
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
     /* apb_config */
@@ -678,18 +685,10 @@ PCIBus *pci_apb_init(hwaddr special_base,
     sysbus_mmio_map(s, 1, special_base + 0x1000000ULL);
     /* pci_ioport */
     sysbus_mmio_map(s, 2, special_base + 0x2000000ULL);
-    d = APB_DEVICE(dev);
 
     memory_region_init(&d->pci_mmio, OBJECT(s), "pci-mmio", 0x100000000ULL);
     memory_region_add_subregion(get_system_memory(), mem_base, &d->pci_mmio);
 
-    phb = PCI_HOST_BRIDGE(dev);
-    phb->bus = pci_register_bus(DEVICE(phb), "pci",
-                                pci_apb_set_irq, pci_pbm_map_irq, d,
-                                &d->pci_mmio,
-                                get_system_io(),
-                                0, 32, TYPE_PCI_BUS);
-
     *pbm_irqs = d->pbm_irqs;
     d->ivec_irqs = ivec_irqs;
 
diff --git a/hw/pci-host/grackle.c b/hw/pci-host/grackle.c
index 8f91216157..2c8acdaaca 100644
--- a/hw/pci-host/grackle.c
+++ b/hw/pci-host/grackle.c
@@ -72,7 +72,6 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic,
     GrackleState *d;
 
     dev = qdev_create(NULL, TYPE_GRACKLE_PCI_HOST_BRIDGE);
-    qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
     phb = PCI_HOST_BRIDGE(dev);
     d = GRACKLE_PCI_HOST_BRIDGE(dev);
@@ -92,6 +91,7 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic,
                                 0, 4, TYPE_PCI_BUS);
 
     pci_create_simple(phb->bus, 0, "grackle");
+    qdev_init_nofail(dev);
 
     sysbus_mmio_map(s, 0, base);
     sysbus_mmio_map(s, 1, base + 0x00200000);
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 487e32ecbf..5580293f93 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -247,6 +247,7 @@ static void raven_pcihost_realizefn(DeviceState *d, Error **errp)
     memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->pci_intack);
 
     /* TODO Remove once realize propagates to child devices. */
+    object_property_set_bool(OBJECT(&s->pci_bus), true, "realized", errp);
     object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp);
 }
 
diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index 0792c4501c..467cbb9cb8 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c
@@ -455,6 +455,7 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp)
     }
 
     /* TODO Remove once realize propagates to child devices. */
+    object_property_set_bool(OBJECT(&s->pci_bus), true, "realized", errp);
     object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp);
 }
 
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 149994b815..728c6d4b3b 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2596,6 +2596,21 @@ PCIDevice *pci_get_function_0(PCIDevice *pci_dev)
     }
 }
 
+MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
+{
+    MSIMessage msg;
+    if (msix_enabled(dev)) {
+        msg = msix_get_message(dev, vector);
+    } else if (msi_enabled(dev)) {
+        msg = msi_get_message(dev, vector);
+    } else {
+        /* Should never happen */
+        error_report("%s: unknown interrupt type", __func__);
+        abort();
+    }
+    return msg;
+}
+
 static const TypeInfo pci_device_type_info = {
     .name = TYPE_PCI_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 722c93e5fc..ce57ef6248 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -663,22 +663,17 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
 /* The device does not have anything to save beyond the virtio data.
  * Request data is saved with callbacks from SCSI devices.
  */
-static void virtio_scsi_save(QEMUFile *f, void *opaque)
+static void virtio_scsi_save(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
     virtio_save(vdev, f);
 }
 
-static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_scsi_load(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
-    int ret;
 
-    ret = virtio_load(vdev, f, version_id);
-    if (ret) {
-        return ret;
-    }
-    return 0;
+    return virtio_load(vdev, f, 1);
 }
 
 void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
@@ -824,8 +819,9 @@ static struct SCSIBusInfo virtio_scsi_scsi_info = {
 };
 
 void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
-                                HandleOutput ctrl, HandleOutput evt,
-                                HandleOutput cmd)
+                                VirtIOHandleOutput ctrl,
+                                VirtIOHandleOutput evt,
+                                VirtIOHandleOutput cmd)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev);
@@ -846,13 +842,10 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
     s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
     s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
 
-    s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
-                                  ctrl);
-    s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
-                                   evt);
+    s->ctrl_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, ctrl);
+    s->event_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, evt);
     for (i = 0; i < s->conf.num_queues; i++) {
-        s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
-                                         cmd);
+        s->cmd_vqs[i] = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, cmd);
     }
 
     if (s->conf.iothread) {
@@ -864,7 +857,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
-    static int virtio_scsi_id;
     Error *err = NULL;
 
     virtio_scsi_common_realize(dev, &err, virtio_scsi_handle_ctrl,
@@ -887,9 +879,6 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
             return;
         }
     }
-
-    register_savevm(dev, "virtio-scsi", virtio_scsi_id++, 1,
-                    virtio_scsi_save, virtio_scsi_load, s);
 }
 
 static void virtio_scsi_instance_init(Object *obj)
@@ -913,9 +902,6 @@ void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp)
 
 static void virtio_scsi_device_unrealize(DeviceState *dev, Error **errp)
 {
-    VirtIOSCSI *s = VIRTIO_SCSI(dev);
-
-    unregister_savevm(dev, "virtio-scsi", s);
     virtio_scsi_common_unrealize(dev, errp);
 }
 
@@ -932,6 +918,8 @@ static Property virtio_scsi_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+VMSTATE_VIRTIO_DEVICE(scsi, 1, virtio_scsi_load, virtio_scsi_save);
+
 static void virtio_scsi_common_class_init(ObjectClass *klass, void *data)
 {
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
@@ -948,6 +936,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data)
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
     dc->props = virtio_scsi_properties;
+    dc->vmsd = &vmstate_virtio_scsi;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     vdc->realize = virtio_scsi_device_realize;
     vdc->unrealize = virtio_scsi_device_unrealize;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c8436a19d6..7bfa17ce38 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -417,11 +417,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
 }
 
 static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
-                                  MSIMessage *msg, bool msix)
+                                  int vector_n, bool msix)
 {
     int virq;
 
-    if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) {
+    if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
         return;
     }
 
@@ -429,7 +429,7 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
         return;
     }
 
-    virq = kvm_irqchip_add_msi_route(kvm_state, *msg, &vdev->pdev);
+    virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev);
     if (virq < 0) {
         event_notifier_cleanup(&vector->kvm_interrupt);
         return;
@@ -458,6 +458,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
                                      PCIDevice *pdev)
 {
     kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev);
+    kvm_irqchip_commit_routes(kvm_state);
 }
 
 static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
@@ -495,7 +496,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
             vfio_update_kvm_msi_virq(vector, *msg, pdev);
         }
     } else {
-        vfio_add_kvm_msi_virq(vdev, vector, msg, true);
+        vfio_add_kvm_msi_virq(vdev, vector, nr, true);
     }
 
     /*
@@ -639,7 +640,6 @@ retry:
 
     for (i = 0; i < vdev->nr_vectors; i++) {
         VFIOMSIVector *vector = &vdev->msi_vectors[i];
-        MSIMessage msg = msi_get_message(&vdev->pdev, i);
 
         vector->vdev = vdev;
         vector->virq = -1;
@@ -656,7 +656,7 @@ retry:
          * Attempt to enable route through KVM irqchip,
          * default to userspace handling if unavailable.
          */
-        vfio_add_kvm_msi_virq(vdev, vector, &msg, false);
+        vfio_add_kvm_msi_virq(vdev, vector, i, false);
     }
 
     /* Set interrupt type prior to possible interrupts */
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 1a22e6d993..5af429a58a 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -396,11 +396,6 @@ static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
     trace_virtio_balloon_to_target(target, dev->num_pages);
 }
 
-static void virtio_balloon_save(QEMUFile *f, void *opaque)
-{
-    virtio_save(VIRTIO_DEVICE(opaque), f);
-}
-
 static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
@@ -409,12 +404,9 @@ static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, s->actual);
 }
 
-static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_balloon_load(QEMUFile *f, void *opaque, size_t size)
 {
-    if (version_id != 1)
-        return -EINVAL;
-
-    return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
+    return virtio_load(VIRTIO_DEVICE(opaque), f, 1);
 }
 
 static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
@@ -454,9 +446,6 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
     reset_stats(s);
-
-    register_savevm(dev, "virtio-balloon", -1, 1,
-                    virtio_balloon_save, virtio_balloon_load, s);
 }
 
 static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
@@ -466,7 +455,6 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
 
     balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
-    unregister_savevm(dev, "virtio-balloon", s);
     virtio_cleanup(vdev);
 }
 
@@ -493,6 +481,8 @@ static void virtio_balloon_instance_init(Object *obj)
                         NULL, s, NULL);
 }
 
+VMSTATE_VIRTIO_DEVICE(balloon, 1, virtio_balloon_load, virtio_vmstate_save);
+
 static Property virtio_balloon_properties[] = {
     DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
                     VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
@@ -505,6 +495,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data)
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
     dc->props = virtio_balloon_properties;
+    dc->vmsd = &vmstate_virtio_balloon;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     vdc->realize = virtio_balloon_device_realize;
     vdc->unrealize = virtio_balloon_device_unrealize;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 2b34b43060..f0677b73d8 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -699,14 +699,13 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev,
 
 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
                                         unsigned int queue_no,
-                                        unsigned int vector,
-                                        MSIMessage msg)
+                                        unsigned int vector)
 {
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
     int ret;
 
     if (irqfd->users == 0) {
-        ret = kvm_irqchip_add_msi_route(kvm_state, msg, &proxy->pci_dev);
+        ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev);
         if (ret < 0) {
             return ret;
         }
@@ -757,7 +756,6 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
     unsigned int vector;
     int ret, queue_no;
-    MSIMessage msg;
 
     for (queue_no = 0; queue_no < nvqs; queue_no++) {
         if (!virtio_queue_get_num(vdev, queue_no)) {
@@ -767,8 +765,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
         if (vector >= msix_nr_vectors_allocated(dev)) {
             continue;
         }
-        msg = msix_get_message(dev, vector);
-        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
         if (ret < 0) {
             goto undo;
         }
@@ -845,6 +842,7 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
             if (ret < 0) {
                 return ret;
             }
+            kvm_irqchip_commit_routes(kvm_state);
         }
     }
 
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index 6b991a7642..cd8ca10177 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -120,22 +120,12 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp)
     return f;
 }
 
-static void virtio_rng_save(QEMUFile *f, void *opaque)
-{
-    VirtIODevice *vdev = opaque;
-
-    virtio_save(vdev, f);
-}
-
-static int virtio_rng_load(QEMUFile *f, void *opaque, int version_id)
+static int virtio_rng_load(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIORNG *vrng = opaque;
     int ret;
 
-    if (version_id != 1) {
-        return -EINVAL;
-    }
-    ret = virtio_load(VIRTIO_DEVICE(vrng), f, version_id);
+    ret = virtio_load(VIRTIO_DEVICE(vrng), f, 1);
     if (ret != 0) {
         return ret;
     }
@@ -214,8 +204,6 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
     vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                                check_rate_limit, vrng);
     vrng->activate_timer = true;
-    register_savevm(dev, "virtio-rng", -1, 1, virtio_rng_save,
-                    virtio_rng_load, vrng);
 }
 
 static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp)
@@ -225,10 +213,11 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp)
 
     timer_del(vrng->rate_limit_timer);
     timer_free(vrng->rate_limit_timer);
-    unregister_savevm(dev, "virtio-rng", vrng);
     virtio_cleanup(vdev);
 }
 
+VMSTATE_VIRTIO_DEVICE(rng, 1, virtio_rng_load, virtio_vmstate_save);
+
 static Property virtio_rng_properties[] = {
     /* Set a default rate limit of 2^47 bytes per minute or roughly 2TB/s.  If
      * you have an entropy source capable of generating more entropy than this
@@ -246,6 +235,7 @@ static void virtio_rng_class_init(ObjectClass *klass, void *data)
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
     dc->props = virtio_rng_properties;
+    dc->vmsd = &vmstate_virtio_rng;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     vdc->realize = virtio_rng_device_realize;
     vdc->unrealize = virtio_rng_device_unrealize;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 18153d5a39..752b2715d0 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -95,8 +95,9 @@ struct VirtQueue
     int inuse;
 
     uint16_t vector;
-    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
-    void (*handle_aio_output)(VirtIODevice *vdev, VirtQueue *vq);
+    VirtIOHandleOutput handle_output;
+    VirtIOHandleOutput handle_aio_output;
+    bool use_aio;
     VirtIODevice *vdev;
     EventNotifier guest_notifier;
     EventNotifier host_notifier;
@@ -1130,8 +1131,9 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
     }
 }
 
-VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
-                            void (*handle_output)(VirtIODevice *, VirtQueue *))
+static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size,
+                                            VirtIOHandleOutput handle_output,
+                                            bool use_aio)
 {
     int i;
 
@@ -1148,10 +1150,28 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
     vdev->vq[i].handle_output = handle_output;
     vdev->vq[i].handle_aio_output = NULL;
+    vdev->vq[i].use_aio = use_aio;
 
     return &vdev->vq[i];
 }
 
+/* Add a virt queue and mark AIO.
+ * An AIO queue will use the AioContext based event interface instead of the
+ * default IOHandler and EventNotifier interface.
+ */
+VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size,
+                                VirtIOHandleOutput handle_output)
+{
+    return virtio_add_queue_internal(vdev, queue_size, handle_output, true);
+}
+
+/* Add a normal virt queue (on the contrary to the AIO version above. */
+VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
+                            VirtIOHandleOutput handle_output)
+{
+    return virtio_add_queue_internal(vdev, queue_size, handle_output, false);
+}
+
 void virtio_del_queue(VirtIODevice *vdev, int n)
 {
     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
@@ -1444,6 +1464,12 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
 }
 
+/* A wrapper for use as a VMState .put function */
+void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size)
+{
+    virtio_save(VIRTIO_DEVICE(opaque), f);
+}
+
 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
 {
     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
@@ -1804,8 +1830,7 @@ static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
 }
 
 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
-                                                void (*handle_output)(VirtIODevice *,
-                                                                      VirtQueue *))
+                                                VirtIOHandleOutput handle_output)
 {
     if (handle_output) {
         vq->handle_aio_output = handle_output;
@@ -1831,11 +1856,21 @@ static void virtio_queue_host_notifier_read(EventNotifier *n)
 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool set_handler)
 {
+    AioContext *ctx = qemu_get_aio_context();
     if (assign && set_handler) {
-        event_notifier_set_handler(&vq->host_notifier, true,
+        if (vq->use_aio) {
+            aio_set_event_notifier(ctx, &vq->host_notifier, true,
                                    virtio_queue_host_notifier_read);
+        } else {
+            event_notifier_set_handler(&vq->host_notifier, true,
+                                       virtio_queue_host_notifier_read);
+        }
     } else {
-        event_notifier_set_handler(&vq->host_notifier, true, NULL);
+        if (vq->use_aio) {
+            aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
+        } else {
+            event_notifier_set_handler(&vq->host_notifier, true, NULL);
+        }
     }
     if (!assign) {
         /* Test and clear notifier before after disabling event,