summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-09-26 19:47:00 +0100
committerPeter Maydell <peter.maydell@linaro.org>2016-09-26 19:47:00 +0100
commit7cfdc02dae0d2ff58c897496cfdbbafc0eda0f3f (patch)
tree4ab986d748b8e4f914a388fe449366f9812b0f7d
parent3b71ec8516bb50e9a743645bf139571de0b39f61 (diff)
parentfb9f592623b0f9bb82a88d68d7921fb581918ef5 (diff)
downloadfocaccia-qemu-7cfdc02dae0d2ff58c897496cfdbbafc0eda0f3f.tar.gz
focaccia-qemu-7cfdc02dae0d2ff58c897496cfdbbafc0eda0f3f.zip
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio, pc: fixes and features

beginning of guest error handling for virtio devices
amd iommu
pc compat fixes

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 23 Sep 2016 23:02:09 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  hw/i386: AMD IOMMU IVRS table
  hw/i386: Introduce AMD IOMMU
  hw/i386/trace-events: Add AMD IOMMU trace events
  hw/pci: Prepare for AMD IOMMU
  virtio: handle virtqueue_get_head() errors
  virtio: handle virtqueue_num_heads() errors
  virtio: handle virtqueue_read_next_desc() errors
  virtio: use unsigned int for virtqueue_get_avail_bytes() index
  virtio: handle virtqueue_get_avail_bytes() errors
  virtio: handle virtqueue_map_desc() errors
  virtio: migrate vdev->broken flag
  virtio: stop virtqueue processing if device is broken
  virtio: fix stray tab character
  target-i386: turn off CPU.l3-cache only for 2.7 and older machine types
  pc: clean up COMPAT macro chaining
  virtio: add check for descriptor's mapped address
  tests: add /vhost-user/flags-mismatch test
  tests: add a simple /vhost-user/multiqueue test
  tests: add /vhost-user/connect-fail test

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/acpi/aml-build.c2
-rw-r--r--hw/i386/Makefile.objs1
-rw-r--r--hw/i386/acpi-build.c76
-rw-r--r--hw/i386/amd_iommu.c1202
-rw-r--r--hw/i386/amd_iommu.h289
-rw-r--r--hw/i386/intel_iommu.c1
-rw-r--r--hw/i386/trace-events29
-rw-r--r--hw/i386/x86-iommu.c6
-rw-r--r--hw/virtio/virtio.c237
-rw-r--r--include/hw/acpi/aml-build.h1
-rw-r--r--include/hw/i386/pc.h9
-rw-r--r--include/hw/i386/x86-iommu.h12
-rw-r--r--include/hw/pci/pci.h3
-rw-r--r--include/hw/virtio/virtio.h3
-rw-r--r--tests/Makefile.include2
-rw-r--r--tests/vhost-user-test.c208
16 files changed, 2009 insertions, 72 deletions
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index db3e914fb4..b2a1e4033b 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -226,7 +226,7 @@ static void build_extop_package(GArray *package, uint8_t op)
     build_prepend_byte(package, 0x5B); /* ExtOpPrefix */
 }
 
-static void build_append_int_noprefix(GArray *table, uint64_t value, int size)
+void build_append_int_noprefix(GArray *table, uint64_t value, int size)
 {
     int i;
 
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 90e94ffefd..909ead6a77 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -3,6 +3,7 @@ obj-y += multiboot.o
 obj-y += pc.o pc_piix.o pc_q35.o
 obj-y += pc_sysfw.o
 obj-y += x86-iommu.o intel_iommu.o
+obj-y += amd_iommu.o
 obj-$(CONFIG_XEN) += ../xenpv/ xen/
 
 obj-y += kvmvapic.o
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 433febafdd..c20bc71a67 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -59,7 +59,8 @@
 
 #include "qapi/qmp/qint.h"
 #include "qom/qom-qobject.h"
-#include "hw/i386/x86-iommu.h"
+#include "hw/i386/amd_iommu.h"
+#include "hw/i386/intel_iommu.h"
 
 #include "hw/acpi/ipmi.h"
 
@@ -2562,6 +2563,62 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
     build_header(linker, table_data, (void *)(table_data->data + dmar_start),
                  "DMAR", table_data->len - dmar_start, 1, NULL, NULL);
 }
+/*
+ *   IVRS table as specified in AMD IOMMU Specification v2.62, Section 5.2
+ *   accessible here http://support.amd.com/TechDocs/48882_IOMMU.pdf
+ */
+static void
+build_amd_iommu(GArray *table_data, BIOSLinker *linker)
+{
+    int iommu_start = table_data->len;
+    AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
+
+    /* IVRS header */
+    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+    /* IVinfo - IO virtualization information common to all
+     * IOMMU units in a system
+     */
+    build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+    /* reserved */
+    build_append_int_noprefix(table_data, 0, 8);
+
+    /* IVHD definition - type 10h */
+    build_append_int_noprefix(table_data, 0x10, 1);
+    /* virtualization flags */
+    build_append_int_noprefix(table_data,
+                             (1UL << 0) | /* HtTunEn      */
+                             (1UL << 4) | /* iotblSup     */
+                             (1UL << 6) | /* PrefSup      */
+                             (1UL << 7),  /* PPRSup       */
+                             1);
+    /* IVHD length */
+    build_append_int_noprefix(table_data, 0x24, 2);
+    /* DeviceID */
+    build_append_int_noprefix(table_data, s->devid, 2);
+    /* Capability offset */
+    build_append_int_noprefix(table_data, s->capab_offset, 2);
+    /* IOMMU base address */
+    build_append_int_noprefix(table_data, s->mmio.addr, 8);
+    /* PCI Segment Group */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* IOMMU info */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* IOMMU Feature Reporting */
+    build_append_int_noprefix(table_data,
+                             (48UL << 30) | /* HATS   */
+                             (48UL << 28) | /* GATS   */
+                             (1UL << 2),    /* GTSup  */
+                             4);
+    /*
+     *   Type 1 device entry reporting all devices
+     *   These are 4-byte device entries currently reporting the range of
+     *   Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte)
+     */
+    build_append_int_noprefix(table_data, 0x0000001, 4);
+
+    build_header(linker, table_data, (void *)(table_data->data + iommu_start),
+                 "IVRS", table_data->len - iommu_start, 1, NULL, NULL);
+}
 
 static GArray *
 build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
@@ -2622,11 +2679,6 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
     return true;
 }
 
-static bool acpi_has_iommu(void)
-{
-    return !!x86_iommu_get_default();
-}
-
 static
 void acpi_build(AcpiBuildTables *tables, MachineState *machine)
 {
@@ -2706,9 +2758,15 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
         acpi_add_table(table_offsets, tables_blob);
         build_mcfg_q35(tables_blob, tables->linker, &mcfg);
     }
-    if (acpi_has_iommu()) {
-        acpi_add_table(table_offsets, tables_blob);
-        build_dmar_q35(tables_blob, tables->linker);
+    if (x86_iommu_get_default()) {
+        IommuType IOMMUType = x86_iommu_get_type();
+        if (IOMMUType == TYPE_AMD) {
+            acpi_add_table(table_offsets, tables_blob);
+            build_amd_iommu(tables_blob, tables->linker);
+        } else if (IOMMUType == TYPE_INTEL) {
+            acpi_add_table(table_offsets, tables_blob);
+            build_dmar_q35(tables_blob, tables->linker);
+        }
     }
     if (pcms->acpi_nvdimm_state.is_enabled) {
         nvdimm_build_acpi(table_offsets, tables_blob, tables->linker,
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
new file mode 100644
index 0000000000..a91a1798cb
--- /dev/null
+++ b/hw/i386/amd_iommu.c
@@ -0,0 +1,1202 @@
+/*
+ * QEMU emulation of AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, <davidkiarie4@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Cache implementation inspired by hw/i386/intel_iommu.c
+ */
+#include "qemu/osdep.h"
+#include "hw/i386/amd_iommu.h"
+#include "trace.h"
+
+/* used AMD-Vi MMIO registers */
+const char *amdvi_mmio_low[] = {
+    "AMDVI_MMIO_DEVTAB_BASE",
+    "AMDVI_MMIO_CMDBUF_BASE",
+    "AMDVI_MMIO_EVTLOG_BASE",
+    "AMDVI_MMIO_CONTROL",
+    "AMDVI_MMIO_EXCL_BASE",
+    "AMDVI_MMIO_EXCL_LIMIT",
+    "AMDVI_MMIO_EXT_FEATURES",
+    "AMDVI_MMIO_PPR_BASE",
+    "UNHANDLED"
+};
+const char *amdvi_mmio_high[] = {
+    "AMDVI_MMIO_COMMAND_HEAD",
+    "AMDVI_MMIO_COMMAND_TAIL",
+    "AMDVI_MMIO_EVTLOG_HEAD",
+    "AMDVI_MMIO_EVTLOG_TAIL",
+    "AMDVI_MMIO_STATUS",
+    "AMDVI_MMIO_PPR_HEAD",
+    "AMDVI_MMIO_PPR_TAIL",
+    "UNHANDLED"
+};
+
+struct AMDVIAddressSpace {
+    uint8_t bus_num;            /* bus number                           */
+    uint8_t devfn;              /* device function                      */
+    AMDVIState *iommu_state;    /* AMDVI - one per machine              */
+    MemoryRegion iommu;         /* Device's address translation region  */
+    MemoryRegion iommu_ir;      /* Device's interrupt remapping region  */
+    AddressSpace as;            /* device's corresponding address space */
+};
+
+/* AMDVI cache entry */
+typedef struct AMDVIIOTLBEntry {
+    uint16_t domid;             /* assigned domain id  */
+    uint16_t devid;             /* device owning entry */
+    uint64_t perms;             /* access permissions  */
+    uint64_t translated_addr;   /* translated address  */
+    uint64_t page_mask;         /* physical page size  */
+} AMDVIIOTLBEntry;
+
+/* configure MMIO registers at startup/reset */
+static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
+                           uint64_t romask, uint64_t w1cmask)
+{
+    stq_le_p(&s->mmior[addr], val);
+    stq_le_p(&s->romask[addr], romask);
+    stq_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
+{
+    return lduw_le_p(&s->mmior[addr]);
+}
+
+static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
+{
+    return ldl_le_p(&s->mmior[addr]);
+}
+
+static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
+{
+    return ldq_le_p(&s->mmior[addr]);
+}
+
+/* internal write */
+static void amdvi_writeq_raw(AMDVIState *s, uint64_t val, hwaddr addr)
+{
+    stq_le_p(&s->mmior[addr], val);
+}
+
+/* external write */
+static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
+{
+    uint16_t romask = lduw_le_p(&s->romask[addr]);
+    uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
+    uint16_t oldval = lduw_le_p(&s->mmior[addr]);
+    stw_le_p(&s->mmior[addr],
+            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
+{
+    uint32_t romask = ldl_le_p(&s->romask[addr]);
+    uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
+    uint32_t oldval = ldl_le_p(&s->mmior[addr]);
+    stl_le_p(&s->mmior[addr],
+            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+    uint64_t romask = ldq_le_p(&s->romask[addr]);
+    uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
+    uint32_t oldval = ldq_le_p(&s->mmior[addr]);
+    stq_le_p(&s->mmior[addr],
+            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+/* OR a 64-bit register with a 64-bit value */
+static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+    return amdvi_readq(s, addr) | val;
+}
+
+/* OR a 64-bit register with a 64-bit value storing result in the register */
+static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+    amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
+}
+
+/* AND a 64-bit register with a 64-bit value storing result in the register */
+static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+   amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
+}
+
+static void amdvi_generate_msi_interrupt(AMDVIState *s)
+{
+    MSIMessage msg;
+    MemTxAttrs attrs;
+
+    attrs.requester_id = pci_requester_id(&s->pci.dev);
+
+    if (msi_enabled(&s->pci.dev)) {
+        msg = msi_get_message(&s->pci.dev, 0);
+        address_space_stl_le(&address_space_memory, msg.address, msg.data,
+                             attrs, NULL);
+    }
+}
+
+static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
+{
+    /* event logging not enabled */
+    if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
+        AMDVI_MMIO_STATUS_EVT_OVF)) {
+        return;
+    }
+
+    /* event log buffer full */
+    if (s->evtlog_tail >= s->evtlog_len) {
+        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
+        /* generate interrupt */
+        amdvi_generate_msi_interrupt(s);
+        return;
+    }
+
+    if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
+        &evt, AMDVI_EVENT_LEN)) {
+        trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
+    }
+
+    s->evtlog_tail += AMDVI_EVENT_LEN;
+    amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+    amdvi_generate_msi_interrupt(s);
+}
+
+static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
+                                int length)
+{
+    int index = start / 64, bitpos = start % 64;
+    uint64_t mask = ((1 << length) - 1) << bitpos;
+    buffer[index] &= ~mask;
+    buffer[index] |= (value << bitpos) & mask;
+}
+/*
+ * AMDVi event structure
+ *    0:15   -> DeviceID
+ *    55:63  -> event type + miscellaneous info
+ *    63:127 -> related address
+ */
+static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
+                               uint16_t info)
+{
+    amdvi_setevent_bits(evt, devid, 0, 16);
+    amdvi_setevent_bits(evt, info, 55, 8);
+    amdvi_setevent_bits(evt, addr, 63, 64);
+}
+/* log an error encountered during a page walk
+ *
+ * @addr: virtual address in translation request
+ */
+static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
+                             hwaddr addr, uint16_t info)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
+    amdvi_encode_event(evt, devid, addr, info);
+    amdvi_log_event(s, evt);
+    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+            PCI_STATUS_SIG_TARGET_ABORT);
+}
+/*
+ * log a master abort accessing device table
+ *  @devtab : address of device table entry
+ *  @info : error flags
+ */
+static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
+                                   hwaddr devtab, uint16_t info)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
+
+    amdvi_encode_event(evt, devid, devtab, info);
+    amdvi_log_event(s, evt);
+    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+            PCI_STATUS_SIG_TARGET_ABORT);
+}
+/* log an event trying to access command buffer
+ *   @addr : address that couldn't be accessed
+ */
+static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
+{
+    uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR;
+
+    amdvi_encode_event(evt, 0, addr, info);
+    amdvi_log_event(s, evt);
+    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+            PCI_STATUS_SIG_TARGET_ABORT);
+}
+/* log an illegal comand event
+ *   @addr : address of illegal command
+ */
+static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
+                                       hwaddr addr)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
+    amdvi_encode_event(evt, 0, addr, info);
+    amdvi_log_event(s, evt);
+}
+/* log an error accessing device table
+ *
+ *  @devid : device owning the table entry
+ *  @devtab : address of device table entry
+ *  @info : error flags
+ */
+static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
+                                          hwaddr addr, uint16_t info)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
+    amdvi_encode_event(evt, devid, addr, info);
+    amdvi_log_event(s, evt);
+}
+/* log an error accessing a PTE entry
+ * @addr : address that couldn't be accessed
+ */
+static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
+                                    hwaddr addr, uint16_t info)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
+    amdvi_encode_event(evt, devid, addr, info);
+    amdvi_log_event(s, evt);
+    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+             PCI_STATUS_SIG_TARGET_ABORT);
+}
+
+static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
+{
+    return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+}
+
+static guint amdvi_uint64_hash(gconstpointer v)
+{
+    return (guint)*(const uint64_t *)v;
+}
+
+static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
+                                           uint64_t devid)
+{
+    uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
+                   ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+    return g_hash_table_lookup(s->iotlb, &key);
+}
+
+static void amdvi_iotlb_reset(AMDVIState *s)
+{
+    assert(s->iotlb);
+    trace_amdvi_iotlb_reset();
+    g_hash_table_remove_all(s->iotlb);
+}
+
+static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
+                                            gpointer user_data)
+{
+    AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
+    uint16_t devid = *(uint16_t *)user_data;
+    return entry->devid == devid;
+}
+
+static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
+                                    uint64_t devid)
+{
+    uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
+                   ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+    g_hash_table_remove(s->iotlb, &key);
+}
+
+static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
+                               uint64_t gpa, IOMMUTLBEntry to_cache,
+                               uint16_t domid)
+{
+    AMDVIIOTLBEntry *entry = g_malloc(sizeof(*entry));
+    uint64_t *key = g_malloc(sizeof(key));
+    uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
+
+    /* don't cache erroneous translations */
+    if (to_cache.perm != IOMMU_NONE) {
+        trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+                PCI_FUNC(devid), gpa, to_cache.translated_addr);
+
+        if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
+            amdvi_iotlb_reset(s);
+        }
+
+        entry->domid = domid;
+        entry->perms = to_cache.perm;
+        entry->translated_addr = to_cache.translated_addr;
+        entry->page_mask = to_cache.addr_mask;
+        *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+        g_hash_table_replace(s->iotlb, key, entry);
+    }
+}
+
+static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
+{
+    /* pad the last 3 bits */
+    hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
+    uint64_t data = cpu_to_le64(cmd[1]);
+
+    if (extract64(cmd[0], 51, 8)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+    if (extract64(cmd[0], 0, 1)) {
+        if (dma_memory_write(&address_space_memory, addr, &data,
+            AMDVI_COMPLETION_DATA_SIZE)) {
+            trace_amdvi_completion_wait_fail(addr);
+        }
+    }
+    /* set completion interrupt */
+    if (extract64(cmd[0], 1, 1)) {
+        amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+        /* generate interrupt */
+        amdvi_generate_msi_interrupt(s);
+    }
+    trace_amdvi_completion_wait(addr, data);
+}
+
+/* log error without aborting since linux seems to be using reserved bits */
+static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
+{
+    uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
+
+    /* This command should invalidate internal caches of which there isn't */
+    if (extract64(cmd[0], 15, 16) || cmd[1]) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+    trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+                             PCI_FUNC(devid));
+}
+
+static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
+{
+    if (extract64(cmd[0], 15, 16) ||  extract64(cmd[0], 19, 8) ||
+        extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
+        || extract64(cmd[1], 47, 16)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+    trace_amdvi_ppr_exec();
+}
+
+static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
+{
+    if (extract64(cmd[0], 0, 60) || cmd[1]) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+
+    amdvi_iotlb_reset(s);
+    trace_amdvi_all_inval();
+}
+
+static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
+                                            gpointer user_data)
+{
+    AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
+    uint16_t domid = *(uint16_t *)user_data;
+    return entry->domid == domid;
+}
+
+/* we don't have devid - we can't remove pages by address */
+static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
+{
+    uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
+
+    if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 16, 12) ||
+        extract64(cmd[0], 3, 10)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+
+    g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
+                                &domid);
+    trace_amdvi_pages_inval(domid);
+}
+
+static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
+{
+    if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 20, 8) ||
+        extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
+        extract64(cmd[1], 5, 7)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+
+    trace_amdvi_prefetch_pages();
+}
+
+static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
+{
+    if (extract64(cmd[0], 16, 16) || cmd[1]) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+        return;
+    }
+
+    trace_amdvi_intr_inval();
+}
+
+/* FIXME: Try to work with the specified size instead of all the pages
+ * when the S bit is on
+ */
+static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
+{
+
+    uint16_t devid = extract64(cmd[0], 0, 16);
+    if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 9)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+        return;
+    }
+
+    if (extract64(cmd[1], 0, 1)) {
+        g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
+                                    &devid);
+    } else {
+        amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
+                                cpu_to_le16(extract64(cmd[1], 0, 16)));
+    }
+    trace_amdvi_iotlb_inval();
+}
+
+/* not honouring reserved bits is regarded as an illegal command */
+static void amdvi_cmdbuf_exec(AMDVIState *s)
+{
+    uint64_t cmd[2];
+
+    if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
+        cmd, AMDVI_COMMAND_SIZE)) {
+        trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
+        amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
+        return;
+    }
+
+    switch (extract64(cmd[0], 60, 4)) {
+    case AMDVI_CMD_COMPLETION_WAIT:
+        amdvi_completion_wait(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
+        amdvi_inval_devtab_entry(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_AMDVI_PAGES:
+        amdvi_inval_pages(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_IOTLB_PAGES:
+        iommu_inval_iotlb(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_INTR_TABLE:
+        amdvi_inval_inttable(s, cmd);
+        break;
+    case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
+        amdvi_prefetch_pages(s, cmd);
+        break;
+    case AMDVI_CMD_COMPLETE_PPR_REQUEST:
+        amdvi_complete_ppr(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_AMDVI_ALL:
+        amdvi_inval_all(s, cmd);
+        break;
+    default:
+        trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
+        /* log illegal command */
+        amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+}
+
+static void amdvi_cmdbuf_run(AMDVIState *s)
+{
+    if (!s->cmdbuf_enabled) {
+        trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
+        return;
+    }
+
+    /* check if there is work to do. */
+    while (s->cmdbuf_head != s->cmdbuf_tail) {
+        trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
+        amdvi_cmdbuf_exec(s);
+        s->cmdbuf_head += AMDVI_COMMAND_SIZE;
+        amdvi_writeq_raw(s, s->cmdbuf_head, AMDVI_MMIO_COMMAND_HEAD);
+
+        /* wrap head pointer */
+        if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
+            s->cmdbuf_head = 0;
+        }
+    }
+}
+
+static void amdvi_mmio_trace(hwaddr addr, unsigned size)
+{
+    uint8_t index = (addr & ~0x2000) / 8;
+
+    if ((addr & 0x2000)) {
+        /* high table */
+        index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
+        trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+    } else {
+        index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
+        trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+    }
+}
+
+static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    AMDVIState *s = opaque;
+
+    uint64_t val = -1;
+    if (addr + size > AMDVI_MMIO_SIZE) {
+        trace_amdvi_mmio_read("error: addr outside region: max ",
+                (uint64_t)AMDVI_MMIO_SIZE, addr, size);
+        return (uint64_t)-1;
+    }
+
+    if (size == 2) {
+        val = amdvi_readw(s, addr);
+    } else if (size == 4) {
+        val = amdvi_readl(s, addr);
+    } else if (size == 8) {
+        val = amdvi_readq(s, addr);
+    }
+    amdvi_mmio_trace(addr, size);
+
+    return val;
+}
+
+static void amdvi_handle_control_write(AMDVIState *s)
+{
+    unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
+    s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
+
+    s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
+    s->evtlog_enabled = s->enabled && !!(control &
+                        AMDVI_MMIO_CONTROL_EVENTLOGEN);
+
+    s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
+    s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
+    s->cmdbuf_enabled = s->enabled && !!(control &
+                        AMDVI_MMIO_CONTROL_CMDBUFLEN);
+
+    /* update the flags depending on the control register */
+    if (s->cmdbuf_enabled) {
+        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
+    } else {
+        amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
+    }
+    if (s->evtlog_enabled) {
+        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
+    } else {
+        amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
+    }
+
+    trace_amdvi_control_status(control);
+    amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_devtab_write(AMDVIState *s)
+
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
+    s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
+
+    /* set device table length */
+    s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
+                    (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
+                     AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
+}
+
+static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
+{
+    s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
+                     & AMDVI_MMIO_CMDBUF_HEAD_MASK;
+    amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
+{
+    s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
+                & AMDVI_MMIO_CMDBUF_BASE_MASK;
+    s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
+                    & AMDVI_MMIO_CMDBUF_SIZE_MASK);
+    s->cmdbuf_head = s->cmdbuf_tail = 0;
+}
+
+static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
+{
+    s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
+                     & AMDVI_MMIO_CMDBUF_TAIL_MASK;
+    amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_excllim_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
+    s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
+                    AMDVI_MMIO_EXCL_LIMIT_LOW;
+}
+
+static inline void amdvi_handle_evtbase_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
+    s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
+    s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
+                    & AMDVI_MMIO_EVTLOG_SIZE_MASK);
+}
+
+static inline void amdvi_handle_evttail_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
+    s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
+}
+
+static inline void amdvi_handle_evthead_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
+    s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
+}
+
+static inline void amdvi_handle_pprbase_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
+    s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
+    s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
+                    & AMDVI_MMIO_PPRLOG_SIZE_MASK);
+}
+
+static inline void amdvi_handle_pprhead_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
+    s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
+}
+
+static inline void amdvi_handle_pprtail_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
+    s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
+}
+
+/* FIXME: something might go wrong if System Software writes in chunks
+ * of one byte but linux writes in chunks of 4 bytes so currently it
+ * works correctly with linux but will definitely be busted if software
+ * reads/writes 8 bytes
+ */
+static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
+                                 hwaddr addr)
+{
+    if (size == 2) {
+        amdvi_writew(s, addr, val);
+    } else if (size == 4) {
+        amdvi_writel(s, addr, val);
+    } else if (size == 8) {
+        amdvi_writeq(s, addr, val);
+    }
+}
+
+static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
+                             unsigned size)
+{
+    AMDVIState *s = opaque;
+    unsigned long offset = addr & 0x07;
+
+    if (addr + size > AMDVI_MMIO_SIZE) {
+        trace_amdvi_mmio_write("error: addr outside region: max ",
+                (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
+        return;
+    }
+
+    amdvi_mmio_trace(addr, size);
+    switch (addr & ~0x07) {
+    case AMDVI_MMIO_CONTROL:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_control_write(s);
+        break;
+    case AMDVI_MMIO_DEVICE_TABLE:
+        amdvi_mmio_reg_write(s, size, val, addr);
+       /*  set device table address
+        *   This also suffers from inability to tell whether software
+        *   is done writing
+        */
+        if (offset || (size == 8)) {
+            amdvi_handle_devtab_write(s);
+        }
+        break;
+    case AMDVI_MMIO_COMMAND_HEAD:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_cmdhead_write(s);
+        break;
+    case AMDVI_MMIO_COMMAND_BASE:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        /* FIXME - make sure System Software has finished writing incase
+         * it writes in chucks less than 8 bytes in a robust way.As for
+         * now, this hacks works for the linux driver
+         */
+        if (offset || (size == 8)) {
+            amdvi_handle_cmdbase_write(s);
+        }
+        break;
+    case AMDVI_MMIO_COMMAND_TAIL:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_cmdtail_write(s);
+        break;
+    case AMDVI_MMIO_EVENT_BASE:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_evtbase_write(s);
+        break;
+    case AMDVI_MMIO_EVENT_HEAD:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_evthead_write(s);
+        break;
+    case AMDVI_MMIO_EVENT_TAIL:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_evttail_write(s);
+        break;
+    case AMDVI_MMIO_EXCL_LIMIT:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_excllim_write(s);
+        break;
+        /* PPR log base - unused for now */
+    case AMDVI_MMIO_PPR_BASE:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_pprbase_write(s);
+        break;
+        /* PPR log head - also unused for now */
+    case AMDVI_MMIO_PPR_HEAD:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_pprhead_write(s);
+        break;
+        /* PPR log tail - unused for now */
+    case AMDVI_MMIO_PPR_TAIL:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_pprtail_write(s);
+        break;
+    }
+}
+
+static inline uint64_t amdvi_get_perms(uint64_t entry)
+{
+    return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
+           AMDVI_DEV_PERM_SHIFT;
+}
+
+/* a valid entry should have V = 1 and reserved bits honoured */
+static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
+                               uint64_t *dte)
+{
+    if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
+        || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
+        || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
+        amdvi_log_illegaldevtab_error(s, devid,
+                                      s->devtab +
+                                      devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
+        return false;
+    }
+
+    return dte[0] & AMDVI_DEV_VALID;
+}
+
+/* get a device table entry given the devid */
+static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
+{
+    uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
+
+    if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
+        AMDVI_DEVTAB_ENTRY_SIZE)) {
+        trace_amdvi_dte_get_fail(s->devtab, offset);
+        /* log error accessing dte */
+        amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
+        return false;
+    }
+
+    *entry = le64_to_cpu(*entry);
+    if (!amdvi_validate_dte(s, devid, entry)) {
+        trace_amdvi_invalid_dte(entry[0]);
+        return false;
+    }
+
+    return true;
+}
+
+/* get pte translation mode */
+static inline uint8_t get_pte_translation_mode(uint64_t pte)
+{
+    return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
+}
+
+static inline uint64_t pte_override_page_mask(uint64_t pte)
+{
+    uint8_t page_mask = 12;
+    uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) ^ AMDVI_DEV_PT_ROOT_MASK;
+    /* find the first zero bit */
+    while (addr & 1) {
+        page_mask++;
+        addr = addr >> 1;
+    }
+
+    return ~((1ULL << page_mask) - 1);
+}
+
+static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
+{
+    return ~((1UL << ((oldlevel * 9) + 3)) - 1);
+}
+
+static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
+                                          uint16_t devid)
+{
+    uint64_t pte;
+
+    if (dma_memory_read(&address_space_memory, pte_addr, &pte, sizeof(pte))) {
+        trace_amdvi_get_pte_hwerror(pte_addr);
+        amdvi_log_pagetab_error(s, devid, pte_addr, 0);
+        pte = 0;
+        return pte;
+    }
+
+    pte = le64_to_cpu(pte);
+    return pte;
+}
+
+static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
+                            IOMMUTLBEntry *ret, unsigned perms,
+                            hwaddr addr)
+{
+    unsigned level, present, pte_perms, oldlevel;
+    uint64_t pte = dte[0], pte_addr, page_mask;
+
+    /* make sure the DTE has TV = 1 */
+    if (pte & AMDVI_DEV_TRANSLATION_VALID) {
+        level = get_pte_translation_mode(pte);
+        if (level >= 7) {
+            trace_amdvi_mode_invalid(level, addr);
+            return;
+        }
+        if (level == 0) {
+            goto no_remap;
+        }
+
+        /* we are at the leaf page table or page table encodes a huge page */
+        while (level > 0) {
+            pte_perms = amdvi_get_perms(pte);
+            present = pte & 1;
+            if (!present || perms != (perms & pte_perms)) {
+                amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
+                trace_amdvi_page_fault(addr);
+                return;
+            }
+
+            /* go to the next lower level */
+            pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
+            /* add offset and load pte */
+            pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
+            pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
+            if (!pte) {
+                return;
+            }
+            oldlevel = level;
+            level = get_pte_translation_mode(pte);
+            if (level == 0x7) {
+                break;
+            }
+        }
+
+        if (level == 0x7) {
+            page_mask = pte_override_page_mask(pte);
+        } else {
+            page_mask = pte_get_page_mask(oldlevel);
+        }
+
+        /* get access permissions from pte */
+        ret->iova = addr & page_mask;
+        ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
+        ret->addr_mask = ~page_mask;
+        ret->perm = amdvi_get_perms(pte);
+        return;
+    }
+no_remap:
+    ret->iova = addr & AMDVI_PAGE_MASK_4K;
+    ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
+    ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
+    ret->perm = amdvi_get_perms(pte);
+}
+
+static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
+                               bool is_write, IOMMUTLBEntry *ret)
+{
+    AMDVIState *s = as->iommu_state;
+    uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
+    AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
+    uint64_t entry[4];
+
+    if (iotlb_entry) {
+        trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+                PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
+        ret->iova = addr & ~iotlb_entry->page_mask;
+        ret->translated_addr = iotlb_entry->translated_addr;
+        ret->addr_mask = iotlb_entry->page_mask;
+        ret->perm = iotlb_entry->perms;
+        return;
+    }
+
+    /* devices with V = 0 are not translated */
+    if (!amdvi_get_dte(s, devid, entry)) {
+        goto out;
+    }
+
+    amdvi_page_walk(as, entry, ret,
+                    is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
+
+    amdvi_update_iotlb(s, devid, addr, *ret,
+                       entry[1] & AMDVI_DEV_DOMID_ID_MASK);
+    return;
+
+out:
+    ret->iova = addr & AMDVI_PAGE_MASK_4K;
+    ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
+    ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
+    ret->perm = IOMMU_RW;
+}
+
+static inline bool amdvi_is_interrupt_addr(hwaddr addr)
+{
+    return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
+}
+
+static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
+                                     bool is_write)
+{
+    AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+    AMDVIState *s = as->iommu_state;
+    IOMMUTLBEntry ret = {
+        .target_as = &address_space_memory,
+        .iova = addr,
+        .translated_addr = 0,
+        .addr_mask = ~(hwaddr)0,
+        .perm = IOMMU_NONE
+    };
+
+    if (!s->enabled) {
+        /* AMDVI disabled - corresponds to iommu=off not
+         * failure to provide any parameter
+         */
+        ret.iova = addr & AMDVI_PAGE_MASK_4K;
+        ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
+        ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
+        ret.perm = IOMMU_RW;
+        return ret;
+    } else if (amdvi_is_interrupt_addr(addr)) {
+        ret.iova = addr & AMDVI_PAGE_MASK_4K;
+        ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
+        ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
+        ret.perm = IOMMU_WO;
+        return ret;
+    }
+
+    amdvi_do_translate(as, addr, is_write, &ret);
+    trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
+            PCI_FUNC(as->devfn), addr, ret.translated_addr);
+    return ret;
+}
+
+static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    AMDVIState *s = opaque;
+    AMDVIAddressSpace **iommu_as;
+    int bus_num = pci_bus_num(bus);
+
+    iommu_as = s->address_spaces[bus_num];
+
+    /* allocate memory during the first run */
+    if (!iommu_as) {
+        iommu_as = g_malloc0(sizeof(AMDVIAddressSpace *) * PCI_DEVFN_MAX);
+        s->address_spaces[bus_num] = iommu_as;
+    }
+
+    /* set up AMD-Vi region */
+    if (!iommu_as[devfn]) {
+        iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace));
+        iommu_as[devfn]->bus_num = (uint8_t)bus_num;
+        iommu_as[devfn]->devfn = (uint8_t)devfn;
+        iommu_as[devfn]->iommu_state = s;
+
+        memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
+                                 &s->iommu_ops, "amd-iommu", UINT64_MAX);
+        address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu,
+                           "amd-iommu");
+    }
+    return &iommu_as[devfn]->as;
+}
+
+static const MemoryRegionOps mmio_mem_ops = {
+    .read = amdvi_mmio_read,
+    .write = amdvi_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = false,
+    },
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    }
+};
+
+static void amdvi_iommu_notify_started(MemoryRegion *iommu)
+{
+    AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+
+    hw_error("device %02x.%02x.%x requires iommu notifier which is not "
+             "currently supported", as->bus_num, PCI_SLOT(as->devfn),
+             PCI_FUNC(as->devfn));
+}
+
+static void amdvi_init(AMDVIState *s)
+{
+    amdvi_iotlb_reset(s);
+
+    s->iommu_ops.translate = amdvi_translate;
+    s->iommu_ops.notify_started = amdvi_iommu_notify_started;
+    s->devtab_len = 0;
+    s->cmdbuf_len = 0;
+    s->cmdbuf_head = 0;
+    s->cmdbuf_tail = 0;
+    s->evtlog_head = 0;
+    s->evtlog_tail = 0;
+    s->excl_enabled = false;
+    s->excl_allow = false;
+    s->mmio_enabled = false;
+    s->enabled = false;
+    s->ats_enabled = false;
+    s->cmdbuf_enabled = false;
+
+    /* reset MMIO */
+    memset(s->mmior, 0, AMDVI_MMIO_SIZE);
+    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
+            0xffffffffffffffef, 0);
+    amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
+
+    /* reset device ident */
+    pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD);
+    pci_config_set_prog_interface(s->pci.dev.config, 00);
+    pci_config_set_device_id(s->pci.dev.config, s->devid);
+    pci_config_set_class(s->pci.dev.config, 0x0806);
+
+    /* reset AMDVI specific capabilities, all r/o */
+    pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES);
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
+                 s->mmio.addr & ~(0xffff0000));
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
+                (s->mmio.addr & ~(0xffff)) >> 16);
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE,
+                 0xff000000);
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC,
+            AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
+}
+
+static void amdvi_reset(DeviceState *dev)
+{
+    AMDVIState *s = AMD_IOMMU_DEVICE(dev);
+
+    msi_reset(&s->pci.dev);
+    amdvi_init(s);
+}
+
+static void amdvi_realize(DeviceState *dev, Error **err)
+{
+    AMDVIState *s = AMD_IOMMU_DEVICE(dev);
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
+    PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
+    s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
+                                     amdvi_uint64_equal, g_free, g_free);
+
+    /* This device should take care of IOMMU PCI properties */
+    x86_iommu->type = TYPE_AMD;
+    qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
+    object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
+    s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
+                                         AMDVI_CAPAB_SIZE);
+    pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE);
+    pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE);
+
+    /* set up MMIO */
+    memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
+                          AMDVI_MMIO_SIZE);
+
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
+    sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
+    pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+    s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
+    msi_init(&s->pci.dev, 0, 1, true, false, err);
+    amdvi_init(s);
+}
+
+static const VMStateDescription vmstate_amdvi = {
+    .name = "amd-iommu",
+    .unmigratable = 1
+};
+
+static void amdvi_instance_init(Object *klass)
+{
+    AMDVIState *s = AMD_IOMMU_DEVICE(klass);
+
+    object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
+}
+
+static void amdvi_class_init(ObjectClass *klass, void* data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    X86IOMMUClass *dc_class = X86_IOMMU_CLASS(klass);
+
+    dc->reset = amdvi_reset;
+    dc->vmsd = &vmstate_amdvi;
+    dc->hotpluggable = false;
+    dc_class->realize = amdvi_realize;
+}
+
+static const TypeInfo amdvi = {
+    .name = TYPE_AMD_IOMMU_DEVICE,
+    .parent = TYPE_X86_IOMMU_DEVICE,
+    .instance_size = sizeof(AMDVIState),
+    .instance_init = amdvi_instance_init,
+    .class_init = amdvi_class_init
+};
+
+static const TypeInfo amdviPCI = {
+    .name = "AMDVI-PCI",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(AMDVIPCIState),
+};
+
+static void amdviPCI_register_types(void)
+{
+    type_register_static(&amdviPCI);
+    type_register_static(&amdvi);
+}
+
+type_init(amdviPCI_register_types);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
new file mode 100644
index 0000000000..884926e9e7
--- /dev/null
+++ b/hw/i386/amd_iommu.h
@@ -0,0 +1,289 @@
+/*
+ * QEMU emulation of an AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, <davidkiarie4@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef AMD_IOMMU_H_
+#define AMD_IOMMU_H_
+
+#include "hw/hw.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
+#include "hw/sysbus.h"
+#include "sysemu/dma.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/i386/x86-iommu.h"
+
+/* Capability registers */
+#define AMDVI_CAPAB_BAR_LOW           0x04
+#define AMDVI_CAPAB_BAR_HIGH          0x08
+#define AMDVI_CAPAB_RANGE             0x0C
+#define AMDVI_CAPAB_MISC              0x10
+
+#define AMDVI_CAPAB_SIZE              0x18
+#define AMDVI_CAPAB_REG_SIZE          0x04
+
+/* Capability header data */
+#define AMDVI_CAPAB_ID_SEC            0xf
+#define AMDVI_CAPAB_FLAT_EXT          (1 << 28)
+#define AMDVI_CAPAB_EFR_SUP           (1 << 27)
+#define AMDVI_CAPAB_FLAG_NPCACHE      (1 << 26)
+#define AMDVI_CAPAB_FLAG_HTTUNNEL     (1 << 25)
+#define AMDVI_CAPAB_FLAG_IOTLBSUP     (1 << 24)
+#define AMDVI_CAPAB_INIT_TYPE         (3 << 16)
+
+/* No. of used MMIO registers */
+#define AMDVI_MMIO_REGS_HIGH  8
+#define AMDVI_MMIO_REGS_LOW   7
+
+/* MMIO registers */
+#define AMDVI_MMIO_DEVICE_TABLE       0x0000
+#define AMDVI_MMIO_COMMAND_BASE       0x0008
+#define AMDVI_MMIO_EVENT_BASE         0x0010
+#define AMDVI_MMIO_CONTROL            0x0018
+#define AMDVI_MMIO_EXCL_BASE          0x0020
+#define AMDVI_MMIO_EXCL_LIMIT         0x0028
+#define AMDVI_MMIO_EXT_FEATURES       0x0030
+#define AMDVI_MMIO_COMMAND_HEAD       0x2000
+#define AMDVI_MMIO_COMMAND_TAIL       0x2008
+#define AMDVI_MMIO_EVENT_HEAD         0x2010
+#define AMDVI_MMIO_EVENT_TAIL         0x2018
+#define AMDVI_MMIO_STATUS             0x2020
+#define AMDVI_MMIO_PPR_BASE           0x0038
+#define AMDVI_MMIO_PPR_HEAD           0x2030
+#define AMDVI_MMIO_PPR_TAIL           0x2038
+
+#define AMDVI_MMIO_SIZE               0x4000
+
+#define AMDVI_MMIO_DEVTAB_SIZE_MASK   ((1ULL << 12) - 1)
+#define AMDVI_MMIO_DEVTAB_BASE_MASK   (((1ULL << 52) - 1) & ~ \
+                                       AMDVI_MMIO_DEVTAB_SIZE_MASK)
+#define AMDVI_MMIO_DEVTAB_ENTRY_SIZE  32
+#define AMDVI_MMIO_DEVTAB_SIZE_UNIT   4096
+
+/* some of this are similar but just for readability */
+#define AMDVI_MMIO_CMDBUF_SIZE_BYTE       (AMDVI_MMIO_COMMAND_BASE + 7)
+#define AMDVI_MMIO_CMDBUF_SIZE_MASK       0x0f
+#define AMDVI_MMIO_CMDBUF_BASE_MASK       AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_CMDBUF_HEAD_MASK       (((1ULL << 19) - 1) & ~0x0f)
+#define AMDVI_MMIO_CMDBUF_TAIL_MASK       AMDVI_MMIO_EVTLOG_HEAD_MASK
+
+#define AMDVI_MMIO_EVTLOG_SIZE_BYTE       (AMDVI_MMIO_EVENT_BASE + 7)
+#define AMDVI_MMIO_EVTLOG_SIZE_MASK       AMDVI_MMIO_CMDBUF_SIZE_MASK
+#define AMDVI_MMIO_EVTLOG_BASE_MASK       AMDVI_MMIO_CMDBUF_BASE_MASK
+#define AMDVI_MMIO_EVTLOG_HEAD_MASK       (((1ULL << 19) - 1) & ~0x0f)
+#define AMDVI_MMIO_EVTLOG_TAIL_MASK       AMDVI_MMIO_EVTLOG_HEAD_MASK
+
+#define AMDVI_MMIO_PPRLOG_SIZE_BYTE       (AMDVI_MMIO_EVENT_BASE + 7)
+#define AMDVI_MMIO_PPRLOG_HEAD_MASK       AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_PPRLOG_TAIL_MASK       AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_PPRLOG_BASE_MASK       AMDVI_MMIO_EVTLOG_BASE_MASK
+#define AMDVI_MMIO_PPRLOG_SIZE_MASK       AMDVI_MMIO_EVTLOG_SIZE_MASK
+
+#define AMDVI_MMIO_EXCL_ENABLED_MASK      (1ULL << 0)
+#define AMDVI_MMIO_EXCL_ALLOW_MASK        (1ULL << 1)
+#define AMDVI_MMIO_EXCL_LIMIT_MASK        AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_EXCL_LIMIT_LOW         0xfff
+
+/* mmio control register flags */
+#define AMDVI_MMIO_CONTROL_AMDVIEN        (1ULL << 0)
+#define AMDVI_MMIO_CONTROL_HTTUNEN        (1ULL << 1)
+#define AMDVI_MMIO_CONTROL_EVENTLOGEN     (1ULL << 2)
+#define AMDVI_MMIO_CONTROL_EVENTINTEN     (1ULL << 3)
+#define AMDVI_MMIO_CONTROL_COMWAITINTEN   (1ULL << 4)
+#define AMDVI_MMIO_CONTROL_CMDBUFLEN      (1ULL << 12)
+
+/* MMIO status register bits */
+#define AMDVI_MMIO_STATUS_CMDBUF_RUN  (1 << 4)
+#define AMDVI_MMIO_STATUS_EVT_RUN     (1 << 3)
+#define AMDVI_MMIO_STATUS_COMP_INT    (1 << 2)
+#define AMDVI_MMIO_STATUS_EVT_OVF     (1 << 0)
+
+#define AMDVI_CMDBUF_ID_BYTE              0x07
+#define AMDVI_CMDBUF_ID_RSHIFT            4
+
+#define AMDVI_CMD_COMPLETION_WAIT         0x01
+#define AMDVI_CMD_INVAL_DEVTAB_ENTRY      0x02
+#define AMDVI_CMD_INVAL_AMDVI_PAGES       0x03
+#define AMDVI_CMD_INVAL_IOTLB_PAGES       0x04
+#define AMDVI_CMD_INVAL_INTR_TABLE        0x05
+#define AMDVI_CMD_PREFETCH_AMDVI_PAGES    0x06
+#define AMDVI_CMD_COMPLETE_PPR_REQUEST    0x07
+#define AMDVI_CMD_INVAL_AMDVI_ALL         0x08
+
+#define AMDVI_DEVTAB_ENTRY_SIZE           32
+
+/* Device table entry bits 0:63 */
+#define AMDVI_DEV_VALID                   (1ULL << 0)
+#define AMDVI_DEV_TRANSLATION_VALID       (1ULL << 1)
+#define AMDVI_DEV_MODE_MASK               0x7
+#define AMDVI_DEV_MODE_RSHIFT             9
+#define AMDVI_DEV_PT_ROOT_MASK            0xffffffffff000
+#define AMDVI_DEV_PT_ROOT_RSHIFT          12
+#define AMDVI_DEV_PERM_SHIFT              61
+#define AMDVI_DEV_PERM_READ               (1ULL << 61)
+#define AMDVI_DEV_PERM_WRITE              (1ULL << 62)
+
+/* Device table entry bits 64:127 */
+#define AMDVI_DEV_DOMID_ID_MASK          ((1ULL << 16) - 1)
+
+/* Event codes and flags, as stored in the info field */
+#define AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY  (0x1U << 12)
+#define AMDVI_EVENT_IOPF                  (0x2U << 12)
+#define   AMDVI_EVENT_IOPF_I              (1U << 3)
+#define AMDVI_EVENT_DEV_TAB_HW_ERROR      (0x3U << 12)
+#define AMDVI_EVENT_PAGE_TAB_HW_ERROR     (0x4U << 12)
+#define AMDVI_EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 12)
+#define AMDVI_EVENT_COMMAND_HW_ERROR      (0x6U << 12)
+
+#define AMDVI_EVENT_LEN                  16
+#define AMDVI_PERM_READ             (1 << 0)
+#define AMDVI_PERM_WRITE            (1 << 1)
+
+#define AMDVI_FEATURE_PREFETCH            (1ULL << 0) /* page prefetch       */
+#define AMDVI_FEATURE_PPR                 (1ULL << 1) /* PPR Support         */
+#define AMDVI_FEATURE_GT                  (1ULL << 4) /* Guest Translation   */
+#define AMDVI_FEATURE_IA                  (1ULL << 6) /* inval all support   */
+#define AMDVI_FEATURE_GA                  (1ULL << 7) /* guest VAPIC support */
+#define AMDVI_FEATURE_HE                  (1ULL << 8) /* hardware error regs */
+#define AMDVI_FEATURE_PC                  (1ULL << 9) /* Perf counters       */
+
+/* reserved DTE bits */
+#define AMDVI_DTE_LOWER_QUAD_RESERVED  0x80300000000000fc
+#define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100
+#define AMDVI_DTE_UPPER_QUAD_RESERVED  0x08f0000000000000
+
+/* AMDVI paging mode */
+#define AMDVI_GATS_MODE                 (6ULL <<  12)
+#define AMDVI_HATS_MODE                 (6ULL <<  10)
+
+/* IOTLB */
+#define AMDVI_IOTLB_MAX_SIZE 1024
+#define AMDVI_DEVID_SHIFT    36
+
+/* extended feature support */
+#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
+        AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
+        AMDVI_GATS_MODE | AMDVI_HATS_MODE)
+
+/* capabilities header */
+#define AMDVI_CAPAB_FEATURES (AMDVI_CAPAB_FLAT_EXT | \
+        AMDVI_CAPAB_FLAG_NPCACHE | AMDVI_CAPAB_FLAG_IOTLBSUP \
+        | AMDVI_CAPAB_ID_SEC | AMDVI_CAPAB_INIT_TYPE | \
+        AMDVI_CAPAB_FLAG_HTTUNNEL |  AMDVI_CAPAB_EFR_SUP)
+
+/* AMDVI default address */
+#define AMDVI_BASE_ADDR 0xfed80000
+
+/* page management constants */
+#define AMDVI_PAGE_SHIFT 12
+#define AMDVI_PAGE_SIZE  (1ULL << AMDVI_PAGE_SHIFT)
+
+#define AMDVI_PAGE_SHIFT_4K 12
+#define AMDVI_PAGE_MASK_4K  (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1))
+
+#define AMDVI_MAX_VA_ADDR          (48UL << 5)
+#define AMDVI_MAX_PH_ADDR          (40UL << 8)
+#define AMDVI_MAX_GVA_ADDR         (48UL << 15)
+
+/* Completion Wait data size */
+#define AMDVI_COMPLETION_DATA_SIZE    8
+
+#define AMDVI_COMMAND_SIZE   16
+/* Completion Wait data size */
+#define AMDVI_COMPLETION_DATA_SIZE    8
+
+#define AMDVI_COMMAND_SIZE   16
+
+#define AMDVI_INT_ADDR_FIRST 0xfee00000
+#define AMDVI_INT_ADDR_LAST  0xfeefffff
+
+#define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
+#define AMD_IOMMU_DEVICE(obj)\
+    OBJECT_CHECK(AMDVIState, (obj), TYPE_AMD_IOMMU_DEVICE)
+
+#define TYPE_AMD_IOMMU_PCI "AMDVI-PCI"
+
+typedef struct AMDVIAddressSpace AMDVIAddressSpace;
+
+/* functions to steal PCI config space */
+typedef struct AMDVIPCIState {
+    PCIDevice dev;               /* The PCI device itself        */
+} AMDVIPCIState;
+
+typedef struct AMDVIState {
+    X86IOMMUState iommu;        /* IOMMU bus device             */
+    AMDVIPCIState pci;          /* IOMMU PCI device             */
+
+    uint32_t version;
+    uint32_t capab_offset;       /* capability offset pointer    */
+
+    uint64_t mmio_addr;
+
+    uint32_t devid;              /* auto-assigned devid          */
+
+    bool enabled;                /* IOMMU enabled                */
+    bool ats_enabled;            /* address translation enabled  */
+    bool cmdbuf_enabled;         /* command buffer enabled       */
+    bool evtlog_enabled;         /* event log enabled            */
+    bool excl_enabled;
+
+    hwaddr devtab;               /* base address device table    */
+    size_t devtab_len;           /* device table length          */
+
+    hwaddr cmdbuf;               /* command buffer base address  */
+    uint64_t cmdbuf_len;         /* command buffer length        */
+    uint32_t cmdbuf_head;        /* current IOMMU read position  */
+    uint32_t cmdbuf_tail;        /* next Software write position */
+    bool completion_wait_intr;
+
+    hwaddr evtlog;               /* base address event log       */
+    bool evtlog_intr;
+    uint32_t evtlog_len;         /* event log length             */
+    uint32_t evtlog_head;        /* current IOMMU write position */
+    uint32_t evtlog_tail;        /* current Software read position */
+
+    /* unused for now */
+    hwaddr excl_base;            /* base DVA - IOMMU exclusion range */
+    hwaddr excl_limit;           /* limit of IOMMU exclusion range   */
+    bool excl_allow;             /* translate accesses to the exclusion range */
+    bool excl_enable;            /* exclusion range enabled          */
+
+    hwaddr ppr_log;              /* base address ppr log */
+    uint32_t pprlog_len;         /* ppr log len  */
+    uint32_t pprlog_head;        /* ppr log head */
+    uint32_t pprlog_tail;        /* ppr log tail */
+
+    MemoryRegion mmio;                 /* MMIO region                  */
+    uint8_t mmior[AMDVI_MMIO_SIZE];    /* read/write MMIO              */
+    uint8_t w1cmask[AMDVI_MMIO_SIZE];  /* read/write 1 clear mask      */
+    uint8_t romask[AMDVI_MMIO_SIZE];   /* MMIO read/only mask          */
+    bool mmio_enabled;
+
+    /* IOMMU function */
+    MemoryRegionIOMMUOps iommu_ops;
+
+    /* for each served device */
+    AMDVIAddressSpace **address_spaces[PCI_BUS_MAX];
+
+    /* IOTLB */
+    GHashTable *iotlb;
+} AMDVIState;
+
+#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 28c31a2cdf..d6e02c821a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2453,6 +2453,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
 
     VTD_DPRINTF(GENERAL, "");
+    x86_iommu->type = TYPE_INTEL;
     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
                           "intel_iommu", DMAR_REG_SIZE);
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 5b99eba7b9..1938b988d9 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -13,3 +13,32 @@ mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
 
 # hw/i386/x86-iommu.c
 x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
+
+# hw/i386/amd_iommu.c
+amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
+amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint64_t gpa, uint64_t txaddr) " update iotlb domid 0x%"PRIx16" devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_completion_wait_fail(uint64_t addr) "error: fail to write at address 0x%"PRIx64
+amdvi_mmio_write(const char *reg, uint64_t addr, unsigned size, uint64_t val, uint64_t offset) "%s write addr 0x%"PRIx64", size %u, val 0x%"PRIx64", offset 0x%"PRIx64
+amdvi_mmio_read(const char *reg, uint64_t addr, unsigned size, uint64_t offset) "%s read addr 0x%"PRIx64", size %u offset 0x%"PRIx64
+amdvi_command_error(uint64_t status) "error: Executing commands with command buffer disabled 0x%"PRIx64
+amdvi_command_read_fail(uint64_t addr, uint32_t head) "error: fail to access memory at 0x%"PRIx64" + 0x%"PRIx32
+amdvi_command_exec(uint32_t head, uint32_t tail, uint64_t buf) "command buffer head at 0x%"PRIx32" command buffer tail at 0x%"PRIx32" command buffer base at 0x%"PRIx64
+amdvi_unhandled_command(uint8_t type) "unhandled command 0x%"PRIx8
+amdvi_intr_inval(void) "Interrupt table invalidated"
+amdvi_iotlb_inval(void) "IOTLB pages invalidated"
+amdvi_prefetch_pages(void) "Pre-fetch of AMD-Vi pages requested"
+amdvi_pages_inval(uint16_t domid) "AMD-Vi pages for domain 0x%"PRIx16 " invalidated"
+amdvi_all_inval(void) "Invalidation of all AMD-Vi cache requested "
+amdvi_ppr_exec(void) "Execution of PPR queue requested "
+amdvi_devtab_inval(uint8_t bus, uint8_t slot, uint8_t func) "device table entry for devid: %02x:%02x.%x invalidated"
+amdvi_completion_wait(uint64_t addr, uint64_t data) "completion wait requested with store address 0x%"PRIx64" and store data 0x%"PRIx64
+amdvi_control_status(uint64_t val) "MMIO_STATUS state 0x%"PRIx64
+amdvi_iotlb_reset(void) "IOTLB exceed size limit - reset "
+amdvi_completion_wait_exec(uint64_t addr, uint64_t data) "completion wait requested with store address 0x%"PRIx64" and store data 0x%"PRIx64
+amdvi_dte_get_fail(uint64_t addr, uint32_t offset) "error: failed to access Device Entry devtab 0x%"PRIx64" offset 0x%"PRIx32
+amdvi_invalid_dte(uint64_t addr) "PTE entry at 0x%"PRIx64" is invalid "
+amdvi_get_pte_hwerror(uint64_t addr) "hardware error eccessing PTE at addr 0x%"PRIx64
+amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: translation level 0x%"PRIx8" translating addr 0x%"PRIx64
+amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical address 0x%"PRIx64
+amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index ce26b2a71d..2278af7c32 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -71,6 +71,11 @@ X86IOMMUState *x86_iommu_get_default(void)
     return x86_iommu_default;
 }
 
+IommuType x86_iommu_get_type(void)
+{
+    return x86_iommu_default->type;
+}
+
 static void x86_iommu_realize(DeviceState *dev, Error **errp)
 {
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
@@ -79,6 +84,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
     if (x86_class->realize) {
         x86_class->realize(dev, errp);
     }
+
     x86_iommu_set_default(X86_IOMMU_DEVICE(dev));
 }
 
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fcf3358d6c..18ce333457 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -303,6 +303,10 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 
     virtqueue_unmap_sg(vq, elem, len);
 
+    if (unlikely(vq->vdev->broken)) {
+        return;
+    }
+
     idx = (idx + vq->used_idx) % vq->vring.num;
 
     uelem.id = elem->index;
@@ -313,6 +317,12 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_flush(VirtQueue *vq, unsigned int count)
 {
     uint16_t old, new;
+
+    if (unlikely(vq->vdev->broken)) {
+        vq->inuse -= count;
+        return;
+    }
+
     /* Make sure buffer is written before we update index. */
     smp_wmb();
     trace_virtqueue_flush(vq, count);
@@ -337,9 +347,9 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 
     /* Check it isn't doing very strange things with descriptor numbers. */
     if (num_heads > vq->vring.num) {
-        error_report("Guest moved used index from %u to %u",
+        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
                      idx, vq->shadow_avail_idx);
-        exit(1);
+        return -EINVAL;
     }
     /* On success, callers read a descriptor at vq->last_avail_idx.
      * Make sure descriptor read does not bypass avail index read. */
@@ -350,45 +360,49 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
     return num_heads;
 }
 
-static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
+static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
+                               unsigned int *head)
 {
-    unsigned int head;
-
     /* Grab the next descriptor number they're advertising, and increment
      * the index we've seen. */
-    head = vring_avail_ring(vq, idx % vq->vring.num);
+    *head = vring_avail_ring(vq, idx % vq->vring.num);
 
     /* If their number is silly, that's a fatal mistake. */
-    if (head >= vq->vring.num) {
-        error_report("Guest says index %u is available", head);
-        exit(1);
+    if (*head >= vq->vring.num) {
+        virtio_error(vq->vdev, "Guest says index %u is available", *head);
+        return false;
     }
 
-    return head;
+    return true;
 }
 
-static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
-                                         hwaddr desc_pa, unsigned int max)
-{
-    unsigned int next;
+enum {
+    VIRTQUEUE_READ_DESC_ERROR = -1,
+    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
+    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
+};
 
+static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
+                                    hwaddr desc_pa, unsigned int max,
+                                    unsigned int *next)
+{
     /* If this descriptor says it doesn't chain, we're done. */
     if (!(desc->flags & VRING_DESC_F_NEXT)) {
-        return max;
+        return VIRTQUEUE_READ_DESC_DONE;
     }
 
     /* Check they're not leading us off end of descriptors. */
-    next = desc->next;
+    *next = desc->next;
     /* Make sure compiler knows to grab that: we don't want it changing! */
     smp_wmb();
 
-    if (next >= max) {
-        error_report("Desc next is %u", next);
-        exit(1);
+    if (*next >= max) {
+        virtio_error(vdev, "Desc next is %u", *next);
+        return VIRTQUEUE_READ_DESC_ERROR;
     }
 
-    vring_desc_read(vdev, desc, desc_pa, next);
-    return next;
+    vring_desc_read(vdev, desc, desc_pa, *next);
+    return VIRTQUEUE_READ_DESC_MORE;
 }
 
 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
@@ -397,33 +411,38 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
 {
     unsigned int idx;
     unsigned int total_bufs, in_total, out_total;
+    int rc;
 
     idx = vq->last_avail_idx;
 
     total_bufs = in_total = out_total = 0;
-    while (virtqueue_num_heads(vq, idx)) {
+    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
         VirtIODevice *vdev = vq->vdev;
         unsigned int max, num_bufs, indirect = 0;
         VRingDesc desc;
         hwaddr desc_pa;
-        int i;
+        unsigned int i;
 
         max = vq->vring.num;
         num_bufs = total_bufs;
-        i = virtqueue_get_head(vq, idx++);
+
+        if (!virtqueue_get_head(vq, idx++, &i)) {
+            goto err;
+        }
+
         desc_pa = vq->vring.desc;
         vring_desc_read(vdev, &desc, desc_pa, i);
 
         if (desc.flags & VRING_DESC_F_INDIRECT) {
             if (desc.len % sizeof(VRingDesc)) {
-                error_report("Invalid size for indirect buffer table");
-                exit(1);
+                virtio_error(vdev, "Invalid size for indirect buffer table");
+                goto err;
             }
 
             /* If we've got too many, that implies a descriptor loop. */
             if (num_bufs >= max) {
-                error_report("Looped descriptor");
-                exit(1);
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
             }
 
             /* loop over the indirect descriptor table */
@@ -437,8 +456,8 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
         do {
             /* If we've got too many, that implies a descriptor loop. */
             if (++num_bufs > max) {
-                error_report("Looped descriptor");
-                exit(1);
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
             }
 
             if (desc.flags & VRING_DESC_F_WRITE) {
@@ -449,13 +468,24 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
                 goto done;
             }
-        } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
+
+            rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+        } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+            goto err;
+        }
 
         if (!indirect)
             total_bufs = num_bufs;
         else
             total_bufs++;
     }
+
+    if (rc < 0) {
+        goto err;
+    }
+
 done:
     if (in_bytes) {
         *in_bytes = in_total;
@@ -463,6 +493,11 @@ done:
     if (out_bytes) {
         *out_bytes = out_total;
     }
+    return;
+
+err:
+    in_total = out_total = 0;
+    goto done;
 }
 
 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
@@ -474,27 +509,35 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
     return in_bytes <= in_total && out_bytes <= out_total;
 }
 
-static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
+static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
+                               hwaddr *addr, struct iovec *iov,
                                unsigned int max_num_sg, bool is_write,
                                hwaddr pa, size_t sz)
 {
+    bool ok = false;
     unsigned num_sg = *p_num_sg;
     assert(num_sg <= max_num_sg);
 
     if (!sz) {
-        error_report("virtio: zero sized buffers are not allowed");
-        exit(1);
+        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
+        goto out;
     }
 
     while (sz) {
         hwaddr len = sz;
 
         if (num_sg == max_num_sg) {
-            error_report("virtio: too many write descriptors in indirect table");
-            exit(1);
+            virtio_error(vdev, "virtio: too many write descriptors in "
+                               "indirect table");
+            goto out;
         }
 
         iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
+        if (!iov[num_sg].iov_base) {
+            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
+            goto out;
+        }
+
         iov[num_sg].iov_len = len;
         addr[num_sg] = pa;
 
@@ -502,7 +545,28 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove
         pa += len;
         num_sg++;
     }
+    ok = true;
+
+out:
     *p_num_sg = num_sg;
+    return ok;
+}
+
+/* Only used by error code paths before we have a VirtQueueElement (therefore
+ * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
+ * yet.
+ */
+static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
+                                    struct iovec *iov)
+{
+    unsigned int i;
+
+    for (i = 0; i < out_num + in_num; i++) {
+        int is_write = i >= out_num;
+
+        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
+        iov++;
+    }
 }
 
 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
@@ -577,7 +641,11 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
     hwaddr addr[VIRTQUEUE_MAX_SIZE];
     struct iovec iov[VIRTQUEUE_MAX_SIZE];
     VRingDesc desc;
+    int rc;
 
+    if (unlikely(vdev->broken)) {
+        return NULL;
+    }
     if (virtio_queue_empty(vq)) {
         return NULL;
     }
@@ -591,20 +659,24 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
     max = vq->vring.num;
 
     if (vq->inuse >= vq->vring.num) {
-        error_report("Virtqueue size exceeded");
-        exit(1);
+        virtio_error(vdev, "Virtqueue size exceeded");
+        return NULL;
+    }
+
+    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
+        return NULL;
     }
 
-    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
         vring_set_avail_event(vq, vq->last_avail_idx);
     }
 
+    i = head;
     vring_desc_read(vdev, &desc, desc_pa, i);
     if (desc.flags & VRING_DESC_F_INDIRECT) {
         if (desc.len % sizeof(VRingDesc)) {
-            error_report("Invalid size for indirect buffer table");
-            exit(1);
+            virtio_error(vdev, "Invalid size for indirect buffer table");
+            return NULL;
         }
 
         /* loop over the indirect descriptor table */
@@ -616,24 +688,38 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
 
     /* Collect all the descriptors */
     do {
+        bool map_ok;
+
         if (desc.flags & VRING_DESC_F_WRITE) {
-            virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
-                               VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
+            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
+                                        iov + out_num,
+                                        VIRTQUEUE_MAX_SIZE - out_num, true,
+                                        desc.addr, desc.len);
         } else {
             if (in_num) {
-                error_report("Incorrect order for descriptors");
-                exit(1);
+                virtio_error(vdev, "Incorrect order for descriptors");
+                goto err_undo_map;
             }
-            virtqueue_map_desc(&out_num, addr, iov,
-                               VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
+            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
+                                        VIRTQUEUE_MAX_SIZE, false,
+                                        desc.addr, desc.len);
+        }
+        if (!map_ok) {
+            goto err_undo_map;
         }
 
         /* If we've got too many, that implies a descriptor loop. */
         if ((in_num + out_num) > max) {
-            error_report("Looped descriptor");
-            exit(1);
+            virtio_error(vdev, "Looped descriptor");
+            goto err_undo_map;
         }
-    } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
+
+        rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+    } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+        goto err_undo_map;
+    }
 
     /* Now copy what we have collected and mapped */
     elem = virtqueue_alloc_element(sz, out_num, in_num);
@@ -651,6 +737,10 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
 
     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
     return elem;
+
+err_undo_map:
+    virtqueue_undo_map_desc(out_num, in_num, iov);
+    return NULL;
 }
 
 /* Reading and writing a structure directly to QEMUFile is *awful*, but
@@ -742,6 +832,10 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 
+    if (unlikely(vdev->broken)) {
+        return;
+    }
+
     if (k->notify) {
         k->notify(qbus->parent, vector);
     }
@@ -825,6 +919,7 @@ void virtio_reset(void *opaque)
         k->reset(vdev);
     }
 
+    vdev->broken = false;
     vdev->guest_features = 0;
     vdev->queue_sel = 0;
     vdev->status = 0;
@@ -1132,6 +1227,10 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
     if (vq->vring.desc && vq->handle_output) {
         VirtIODevice *vdev = vq->vdev;
 
+        if (unlikely(vdev->broken)) {
+            return;
+        }
+
         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
         vq->handle_output(vdev, vq);
     }
@@ -1316,6 +1415,13 @@ static bool virtio_extra_state_needed(void *opaque)
         k->has_extra_state(qbus->parent);
 }
 
+static bool virtio_broken_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return vdev->broken;
+}
+
 static const VMStateDescription vmstate_virtqueue = {
     .name = "virtqueue_state",
     .version_id = 1,
@@ -1430,6 +1536,17 @@ static const VMStateDescription vmstate_virtio_64bit_features = {
     }
 };
 
+static const VMStateDescription vmstate_virtio_broken = {
+    .name = "virtio/broken",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_broken_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(broken, VirtIODevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_virtio = {
     .name = "virtio",
     .version_id = 1,
@@ -1443,6 +1560,7 @@ static const VMStateDescription vmstate_virtio = {
         &vmstate_virtio_64bit_features,
         &vmstate_virtio_virtqueues,
         &vmstate_virtio_ringsize,
+        &vmstate_virtio_broken,
         &vmstate_virtio_extra_state,
         NULL
     }
@@ -1608,7 +1726,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
                          "inconsistent with Host index 0x%x",
                          i, vdev->vq[i].last_avail_idx);
                 return -1;
-	}
+        }
         if (k->load_queue) {
             ret = k->load_queue(qbus->parent, i, f);
             if (ret)
@@ -1753,6 +1871,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
     vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
     vdev->vm_running = runstate_is_running();
+    vdev->broken = false;
     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
         vdev->vq[i].vdev = vdev;
@@ -1939,6 +2058,22 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
     vdev->bus_name = g_strdup(bus_name);
 }
 
+void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vreport(fmt, ap);
+    va_end(ap);
+
+    vdev->broken = true;
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
+        virtio_notify_config(vdev);
+    }
+}
+
 static void virtio_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index e5f087803f..559326cbd5 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -367,6 +367,7 @@ Aml *aml_sizeof(Aml *arg);
 Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
 Aml *aml_object_type(Aml *object);
 
+void build_append_int_noprefix(GArray *table, uint64_t value, int size);
 void
 build_header(BIOSLinker *linker, GArray *table_data,
              AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index ab8e319505..29a6c9b60b 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -367,17 +367,15 @@ int e820_get_num_entries(void);
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
 #define PC_COMPAT_2_8 \
+
+#define PC_COMPAT_2_7 \
+    HW_COMPAT_2_7 \
     {\
         .driver   = TYPE_X86_CPU,\
         .property = "l3-cache",\
         .value    = "off",\
     },
 
-
-#define PC_COMPAT_2_7 \
-    PC_COMPAT_2_8 \
-    HW_COMPAT_2_7
-
 #define PC_COMPAT_2_6 \
     HW_COMPAT_2_6 \
     {\
@@ -405,7 +403,6 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
     },
 
 #define PC_COMPAT_2_5 \
-    PC_COMPAT_2_6 \
     HW_COMPAT_2_5
 
 /* Helper for setting model-id for CPU models that changed model-id
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index c48e8dd597..0c89d9835b 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -37,6 +37,12 @@
 typedef struct X86IOMMUState X86IOMMUState;
 typedef struct X86IOMMUClass X86IOMMUClass;
 
+typedef enum IommuType {
+    TYPE_INTEL,
+    TYPE_AMD,
+    TYPE_NONE
+} IommuType;
+
 struct X86IOMMUClass {
     SysBusDeviceClass parent;
     /* Intel/AMD specific realize() hook */
@@ -67,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;
 struct X86IOMMUState {
     SysBusDevice busdev;
     bool intr_supported;        /* Whether vIOMMU supports IR */
+    IommuType type;             /* IOMMU type - AMD/Intel     */
     QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
 };
 
@@ -76,6 +83,11 @@ struct X86IOMMUState {
  */
 X86IOMMUState *x86_iommu_get_default(void);
 
+/*
+ * x86_iommu_get_type - get IOMMU type
+ */
+IommuType x86_iommu_get_type(void);
+
 /**
  * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry
  *                                   Cache) notifiers
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e8b83bbb1e..772692f1b2 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -13,9 +13,12 @@
 /* PCI bus */
 
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_BUS_NUM(x)          (((x) >> 8) & 0xff)
 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
 #define PCI_BUILD_BDF(bus, devfn)     ((bus << 8) | (devfn))
+#define PCI_BUS_MAX             256
+#define PCI_DEVFN_MAX           256
 #define PCI_SLOT_MAX            32
 #define PCI_FUNC_MAX            8
 
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index f05559d569..888c8debe6 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -87,6 +87,7 @@ struct VirtIODevice
     VirtQueue *vq;
     uint16_t device_id;
     bool vm_running;
+    bool broken; /* device in invalid state, needs reset */
     VMChangeStateEntry *vmstate;
     char *bus_name;
     uint8_t device_endian;
@@ -135,6 +136,8 @@ void virtio_init(VirtIODevice *vdev, const char *name,
                          uint16_t device_id, size_t config_size);
 void virtio_cleanup(VirtIODevice *vdev);
 
+void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+
 /* Set the child bus name. */
 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
 
diff --git a/tests/Makefile.include b/tests/Makefile.include
index d8101b3543..2aa78c1820 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -656,7 +656,7 @@ tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
 tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
 tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
 tests/postcopy-test$(EXESUF): tests/postcopy-test.o
-tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y)
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y) $(libqos-pc-obj-y)
 tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
 tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index b89a551126..a39846e6fd 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -20,6 +20,11 @@
 #include "libqos/pci-pc.h"
 #include "libqos/virtio-pci.h"
 
+#include "libqos/pci-pc.h"
+#include "libqos/virtio-pci.h"
+#include "libqos/malloc-pc.h"
+#include "hw/virtio/virtio-net.h"
+
 #include <linux/vhost.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_net.h>
@@ -50,6 +55,7 @@
 #define VHOST_MEMORY_MAX_NREGIONS    8
 
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_USER_PROTOCOL_F_MQ 0
 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
 
 #define VHOST_LOG_PAGE 0x1000
@@ -72,6 +78,7 @@ typedef enum VhostUserRequest {
     VHOST_USER_SET_VRING_ERR = 14,
     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+    VHOST_USER_GET_QUEUE_NUM = 17,
     VHOST_USER_SET_VRING_ENABLE = 18,
     VHOST_USER_MAX
 } VhostUserRequest;
@@ -123,6 +130,13 @@ static VhostUserMsg m __attribute__ ((unused));
 #define VHOST_USER_VERSION    (0x1)
 /*****************************************************************************/
 
+enum {
+    TEST_FLAGS_OK,
+    TEST_FLAGS_DISCONNECT,
+    TEST_FLAGS_BAD,
+    TEST_FLAGS_END,
+};
+
 typedef struct TestServer {
     gchar *socket_path;
     gchar *mig_path;
@@ -135,6 +149,9 @@ typedef struct TestServer {
     CompatGCond data_cond;
     int log_fd;
     uint64_t rings;
+    bool test_fail;
+    int test_flags;
+    int queues;
 } TestServer;
 
 static const char *tmpfs;
@@ -249,6 +266,12 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
     uint8_t *p = (uint8_t *) &msg;
     int fd;
 
+    if (s->test_fail) {
+        qemu_chr_disconnect(chr);
+        /* now switch to non-failure */
+        s->test_fail = false;
+    }
+
     if (size != VHOST_USER_HDR_SIZE) {
         g_test_message("Wrong message size received %d\n", size);
         return;
@@ -274,6 +297,13 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         msg.size = sizeof(m.payload.u64);
         msg.payload.u64 = 0x1ULL << VHOST_F_LOG_ALL |
             0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+        if (s->queues > 1) {
+            msg.payload.u64 |= 0x1ULL << VIRTIO_NET_F_MQ;
+        }
+        if (s->test_flags >= TEST_FLAGS_BAD) {
+            msg.payload.u64 = 0;
+            s->test_flags = TEST_FLAGS_END;
+        }
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
@@ -281,6 +311,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
     case VHOST_USER_SET_FEATURES:
 	g_assert_cmpint(msg.payload.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
 			!=, 0ULL);
+        if (s->test_flags == TEST_FLAGS_DISCONNECT) {
+            qemu_chr_disconnect(chr);
+            s->test_flags = TEST_FLAGS_BAD;
+        }
         break;
 
     case VHOST_USER_GET_PROTOCOL_FEATURES:
@@ -288,6 +322,9 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         msg.flags |= VHOST_USER_REPLY_MASK;
         msg.size = sizeof(m.payload.u64);
         msg.payload.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+        if (s->queues > 1) {
+            msg.payload.u64 |= 1 << VHOST_USER_PROTOCOL_F_MQ;
+        }
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
@@ -300,7 +337,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
 
-        assert(msg.payload.state.index < 2);
+        assert(msg.payload.state.index < s->queues * 2);
         s->rings &= ~(0x1ULL << msg.payload.state.index);
         break;
 
@@ -340,10 +377,18 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         break;
 
     case VHOST_USER_SET_VRING_BASE:
-        assert(msg.payload.state.index < 2);
+        assert(msg.payload.state.index < s->queues * 2);
         s->rings |= 0x1ULL << msg.payload.state.index;
         break;
 
+    case VHOST_USER_GET_QUEUE_NUM:
+        msg.flags |= VHOST_USER_REPLY_MASK;
+        msg.size = sizeof(m.payload.u64);
+        msg.payload.u64 = s->queues;
+        p = (uint8_t *) &msg;
+        qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
+        break;
+
     default:
         break;
     }
@@ -390,10 +435,21 @@ static TestServer *test_server_new(const gchar *name)
     g_cond_init(&server->data_cond);
 
     server->log_fd = -1;
+    server->queues = 1;
 
     return server;
 }
 
+static void chr_event(void *opaque, int event)
+{
+    TestServer *s = opaque;
+
+    if (s->test_flags == TEST_FLAGS_END &&
+        event == CHR_EVENT_CLOSED) {
+        s->test_flags = TEST_FLAGS_OK;
+    }
+}
+
 static void test_server_create_chr(TestServer *server, const gchar *opt)
 {
     gchar *chr_path;
@@ -402,7 +458,8 @@ static void test_server_create_chr(TestServer *server, const gchar *opt)
     server->chr = qemu_chr_new(server->chr_name, chr_path, NULL);
     g_free(chr_path);
 
-    qemu_chr_add_handlers(server->chr, chr_can_read, chr_read, NULL, server);
+    qemu_chr_add_handlers(server->chr, chr_can_read, chr_read,
+                          chr_event, server);
 }
 
 static void test_server_listen(TestServer *server)
@@ -641,7 +698,6 @@ static void test_migrate(void)
     global_qtest = global;
 }
 
-#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 static void wait_for_rings_started(TestServer *s, size_t count)
 {
     gint64 end_time;
@@ -659,6 +715,7 @@ static void wait_for_rings_started(TestServer *s, size_t count)
     g_mutex_unlock(&s->data_mutex);
 }
 
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 static gboolean
 reconnect_cb(gpointer user_data)
 {
@@ -715,8 +772,144 @@ static void test_reconnect(void)
     g_test_trap_assert_passed();
     g_free(path);
 }
+
+static void test_connect_fail_subprocess(void)
+{
+    TestServer *s = test_server_new("connect-fail");
+    char *cmd;
+
+    s->test_fail = true;
+    g_thread_new("connect", connect_thread, s);
+    cmd = GET_QEMU_CMDE(s, 2, ",server", "");
+    qtest_start(cmd);
+    g_free(cmd);
+
+    init_virtio_dev(s);
+    wait_for_fds(s);
+    wait_for_rings_started(s, 2);
+
+    qtest_end();
+    test_server_free(s);
+}
+
+static void test_connect_fail(void)
+{
+    gchar *path = g_strdup_printf("/%s/vhost-user/connect-fail/subprocess",
+                                  qtest_get_arch());
+    g_test_trap_subprocess(path, 0, 0);
+    g_test_trap_assert_passed();
+    g_free(path);
+}
+
+static void test_flags_mismatch_subprocess(void)
+{
+    TestServer *s = test_server_new("flags-mismatch");
+    char *cmd;
+
+    s->test_flags = TEST_FLAGS_DISCONNECT;
+    g_thread_new("connect", connect_thread, s);
+    cmd = GET_QEMU_CMDE(s, 2, ",server", "");
+    qtest_start(cmd);
+    g_free(cmd);
+
+    init_virtio_dev(s);
+    wait_for_fds(s);
+    wait_for_rings_started(s, 2);
+
+    qtest_end();
+    test_server_free(s);
+}
+
+static void test_flags_mismatch(void)
+{
+    gchar *path = g_strdup_printf("/%s/vhost-user/flags-mismatch/subprocess",
+                                  qtest_get_arch());
+    g_test_trap_subprocess(path, 0, 0);
+    g_test_trap_assert_passed();
+    g_free(path);
+}
+
 #endif
 
+static QVirtioPCIDevice *virtio_net_pci_init(QPCIBus *bus, int slot)
+{
+    QVirtioPCIDevice *dev;
+
+    dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
+    g_assert(dev != NULL);
+    g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_NET);
+
+    qvirtio_pci_device_enable(dev);
+    qvirtio_reset(&qvirtio_pci, &dev->vdev);
+    qvirtio_set_acknowledge(&qvirtio_pci, &dev->vdev);
+    qvirtio_set_driver(&qvirtio_pci, &dev->vdev);
+
+    return dev;
+}
+
+static void driver_init(const QVirtioBus *bus, QVirtioDevice *dev)
+{
+    uint32_t features;
+
+    features = qvirtio_get_features(bus, dev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                            (1u << VIRTIO_RING_F_EVENT_IDX));
+    qvirtio_set_features(bus, dev, features);
+
+    qvirtio_set_driver_ok(bus, dev);
+}
+
+#define PCI_SLOT                0x04
+
+static void test_multiqueue(void)
+{
+    const int queues = 2;
+    TestServer *s = test_server_new("mq");
+    QVirtioPCIDevice *dev;
+    QPCIBus *bus;
+    QVirtQueuePCI *vq[queues * 2];
+    QGuestAllocator *alloc;
+    char *cmd;
+    int i;
+
+    s->queues = queues;
+    test_server_listen(s);
+
+    cmd = g_strdup_printf(QEMU_CMD_MEM QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d "
+                          "-device virtio-net-pci,netdev=net0,mq=on,vectors=%d",
+                          512, 512, root, s->chr_name,
+                          s->socket_path, "", s->chr_name,
+                          queues, queues * 2 + 2);
+    qtest_start(cmd);
+    g_free(cmd);
+
+    bus = qpci_init_pc();
+    dev = virtio_net_pci_init(bus, PCI_SLOT);
+
+    alloc = pc_alloc_init();
+    for (i = 0; i < queues * 2; i++) {
+        vq[i] = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
+                                              alloc, i);
+    }
+
+    driver_init(&qvirtio_pci, &dev->vdev);
+    wait_for_rings_started(s, queues * 2);
+
+    /* End test */
+    for (i = 0; i < queues * 2; i++) {
+        qvirtqueue_cleanup(&qvirtio_pci, &vq[i]->vq, alloc);
+    }
+    pc_alloc_uninit(alloc);
+    qvirtio_pci_device_disable(dev);
+    g_free(dev->pdev);
+    g_free(dev);
+    qpci_free_pc(bus);
+    qtest_end();
+
+    test_server_free(s);
+}
+
 int main(int argc, char **argv)
 {
     QTestState *s = NULL;
@@ -762,10 +955,17 @@ int main(int argc, char **argv)
 
     qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem);
     qtest_add_func("/vhost-user/migrate", test_migrate);
+    qtest_add_func("/vhost-user/multiqueue", test_multiqueue);
 #ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
     qtest_add_func("/vhost-user/reconnect/subprocess",
                    test_reconnect_subprocess);
     qtest_add_func("/vhost-user/reconnect", test_reconnect);
+    qtest_add_func("/vhost-user/connect-fail/subprocess",
+                   test_connect_fail_subprocess);
+    qtest_add_func("/vhost-user/connect-fail", test_connect_fail);
+    qtest_add_func("/vhost-user/flags-mismatch/subprocess",
+                   test_flags_mismatch_subprocess);
+    qtest_add_func("/vhost-user/flags-mismatch", test_flags_mismatch);
 #endif
 
     ret = g_test_run();