summary refs log tree commit diff stats
path: root/hw/misc/vfio.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/misc/vfio.c')
-rw-r--r--hw/misc/vfio.c139
1 files changed, 134 insertions, 5 deletions
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index b170fd245d..7437c2e3c3 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -160,10 +160,18 @@ typedef struct VFIOContainer {
         };
         void (*release)(struct VFIOContainer *);
     } iommu_data;
+    QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
     QLIST_HEAD(, VFIOGroup) group_list;
     QLIST_ENTRY(VFIOContainer) next;
 } VFIOContainer;
 
+typedef struct VFIOGuestIOMMU {
+    VFIOContainer *container;
+    MemoryRegion *iommu;
+    Notifier n;
+    QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
+} VFIOGuestIOMMU;
+
 /* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
 typedef struct VFIOMSIXInfo {
     uint8_t table_bar;
@@ -2383,7 +2391,8 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
 
 static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 {
-    return !memory_region_is_ram(section->mr) ||
+    return (!memory_region_is_ram(section->mr) &&
+            !memory_region_is_iommu(section->mr)) ||
            /*
             * Sizing an enabled 64-bit BAR can cause spurious mappings to
             * addresses in the upper part of the 64-bit address space.  These
@@ -2393,6 +2402,65 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
            section->offset_within_address_space & (1ULL << 63);
 }
 
+static void vfio_iommu_map_notify(Notifier *n, void *data)
+{
+    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
+    VFIOContainer *container = giommu->container;
+    IOMMUTLBEntry *iotlb = data;
+    MemoryRegion *mr;
+    hwaddr xlat;
+    hwaddr len = iotlb->addr_mask + 1;
+    void *vaddr;
+    int ret;
+
+    DPRINTF("iommu map @ %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
+            iotlb->iova, iotlb->iova + iotlb->addr_mask);
+
+    /*
+     * The IOMMU TLB entry we have just covers translation through
+     * this IOMMU to its immediate target.  We need to translate
+     * it the rest of the way through to memory.
+     */
+    mr = address_space_translate(&address_space_memory,
+                                 iotlb->translated_addr,
+                                 &xlat, &len, iotlb->perm & IOMMU_WO);
+    if (!memory_region_is_ram(mr)) {
+        DPRINTF("iommu map to non memory area %"HWADDR_PRIx"\n",
+                xlat);
+        return;
+    }
+    /*
+     * Translation truncates length to the IOMMU page size,
+     * check that it did not truncate too much.
+     */
+    if (len & iotlb->addr_mask) {
+        DPRINTF("iommu has granularity incompatible with target AS\n");
+        return;
+    }
+
+    if (iotlb->perm != IOMMU_NONE) {
+        vaddr = memory_region_get_ram_ptr(mr) + xlat;
+
+        ret = vfio_dma_map(container, iotlb->iova,
+                           iotlb->addr_mask + 1, vaddr,
+                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
+        if (ret) {
+            error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
+                         "0x%"HWADDR_PRIx", %p) = %d (%m)",
+                         container, iotlb->iova,
+                         iotlb->addr_mask + 1, vaddr, ret);
+        }
+    } else {
+        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
+        if (ret) {
+            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+                         "0x%"HWADDR_PRIx") = %d (%m)",
+                         container, iotlb->iova,
+                         iotlb->addr_mask + 1, ret);
+        }
+    }
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
@@ -2403,8 +2471,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
     void *vaddr;
     int ret;
 
-    assert(!memory_region_is_iommu(section->mr));
-
     if (vfio_listener_skipped_section(section)) {
         DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
                 section->offset_within_address_space,
@@ -2428,15 +2494,57 @@ static void vfio_listener_region_add(MemoryListener *listener,
         return;
     }
 
+    memory_region_ref(section->mr);
+
+    if (memory_region_is_iommu(section->mr)) {
+        VFIOGuestIOMMU *giommu;
+
+        DPRINTF("region_add [iommu] %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
+                iova, int128_get64(int128_sub(llend, int128_one())));
+        /*
+         * FIXME: We should do some checking to see if the
+         * capabilities of the host VFIO IOMMU are adequate to model
+         * the guest IOMMU
+         *
+         * FIXME: For VFIO iommu types which have KVM acceleration to
+         * avoid bouncing all map/unmaps through qemu this way, this
+         * would be the right place to wire that up (tell the KVM
+         * device emulation the VFIO iommu handles to use).
+         */
+        /*
+         * This assumes that the guest IOMMU is empty of
+         * mappings at this point.
+         *
+         * One way of doing this is:
+         * 1. Avoid sharing IOMMUs between emulated devices or different
+         * IOMMU groups.
+         * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if
+         * there are some mappings in IOMMU.
+         *
+         * VFIO on SPAPR does that. Other IOMMU models may do that different,
+         * they must make sure there are no existing mappings or
+         * loop through existing mappings to map them into VFIO.
+         */
+        giommu = g_malloc0(sizeof(*giommu));
+        giommu->iommu = section->mr;
+        giommu->container = container;
+        giommu->n.notify = vfio_iommu_map_notify;
+        QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
+        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+
+        return;
+    }
+
+    /* Here we assume that memory_region_is_ram(section->mr)==true */
+
     end = int128_get64(llend);
     vaddr = memory_region_get_ram_ptr(section->mr) +
             section->offset_within_region +
             (iova - section->offset_within_address_space);
 
-    DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
+    DPRINTF("region_add [ram] %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
             iova, end - 1, vaddr);
 
-    memory_region_ref(section->mr);
     ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
     if (ret) {
         error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
@@ -2480,6 +2588,27 @@ static void vfio_listener_region_del(MemoryListener *listener,
         return;
     }
 
+    if (memory_region_is_iommu(section->mr)) {
+        VFIOGuestIOMMU *giommu;
+
+        QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
+            if (giommu->iommu == section->mr) {
+                memory_region_unregister_iommu_notifier(&giommu->n);
+                QLIST_REMOVE(giommu, giommu_next);
+                g_free(giommu);
+                break;
+            }
+        }
+
+        /*
+         * FIXME: We assume the one big unmap below is adequate to
+         * remove any individual page mappings in the IOMMU which
+         * might have been copied into VFIO. This works for a page table
+         * based IOMMU where a big unmap flattens a large range of IO-PTEs.
+         * That may not be true for all IOMMU types.
+         */
+    }
+
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
     end = (section->offset_within_address_space + int128_get64(section->size)) &
           TARGET_PAGE_MASK;