diff options
| author | Alejandro Jimenez <alejandro.j.jimenez@oracle.com> | 2025-09-19 21:35:00 +0000 |
|---|---|---|
| committer | Michael S. Tsirkin <mst@redhat.com> | 2025-10-05 16:13:00 -0400 |
| commit | a74bb3110a5b7b5da51ada160732cfc2c83d80d7 (patch) | |
| tree | c77c68026824eaa2ff1802a6eb419f66e7420615 /hw/i386/amd_iommu.c | |
| parent | 0f13cc5642ea81fc952ef20515891d12c5193d24 (diff) | |
| download | focaccia-qemu-a74bb3110a5b7b5da51ada160732cfc2c83d80d7.tar.gz focaccia-qemu-a74bb3110a5b7b5da51ada160732cfc2c83d80d7.zip | |
amd_iommu: Add helpers to walk AMD v1 Page Table format
The current amdvi_page_walk() is designed to be called by the replay() method. Rather than drastically altering it, introduce helpers to fetch guest PTEs that will be used by a page walker implementation. Signed-off-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Message-ID: <20250919213515.917111-8-alejandro.j.jimenez@oracle.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Diffstat (limited to 'hw/i386/amd_iommu.c')
| -rw-r--r-- | hw/i386/amd_iommu.c | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 29ed3f0ef2..c25981ff93 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -87,6 +87,8 @@ typedef enum AMDVIFaultReason { AMDVI_FR_DTE_RTR_ERR = 1, /* Failure to retrieve DTE */ AMDVI_FR_DTE_V, /* DTE[V] = 0 */ AMDVI_FR_DTE_TV, /* DTE[TV] = 0 */ + AMDVI_FR_PT_ROOT_INV, /* Page Table Root ptr invalid */ + AMDVI_FR_PT_ENTRY_INV, /* Failure to read PTE from guest memory */ } AMDVIFaultReason; uint64_t amdvi_extended_feature_register(AMDVIState *s) @@ -558,6 +560,127 @@ static int amdvi_as_to_dte(AMDVIAddressSpace *as, uint64_t *dte) return 0; } +/* + * For a PTE encoding a large page, return the page size it encodes as described + * by the AMD IOMMU Specification Table 14: Example Page Size Encodings. + * No need to adjust the value of the PTE to point to the first PTE in the large + * page since the encoding guarantees all "base" PTEs in the large page are the + * same. + */ +static uint64_t large_pte_page_size(uint64_t pte) +{ + assert(PTE_NEXT_LEVEL(pte) == 7); + + /* Determine size of the large/contiguous page encoded in the PTE */ + return PTE_LARGE_PAGE_SIZE(pte); +} + +/* + * Helper function to fetch a PTE using AMD v1 pgtable format. + * On successful page walk, returns 0 and pte parameter points to a valid PTE. + * On failure, returns: + * -AMDVI_FR_PT_ROOT_INV: A page walk is not possible due to conditions like DTE + * with invalid permissions, Page Table Root can not be read from DTE, or a + * larger IOVA than supported by page table level encoded in DTE[Mode]. + * -AMDVI_FR_PT_ENTRY_INV: A PTE could not be read from guest memory during a + * page table walk. This means that the DTE has valid data, but one of the + * lower level entries in the Page Table could not be read. + */ +static int __attribute__((unused)) +fetch_pte(AMDVIAddressSpace *as, hwaddr address, uint64_t dte, uint64_t *pte, + hwaddr *page_size) +{ + IOMMUAccessFlags perms = amdvi_get_perms(dte); + + uint8_t level, mode; + uint64_t pte_addr; + + *pte = dte; + *page_size = 0; + + if (perms == IOMMU_NONE) { + return -AMDVI_FR_PT_ROOT_INV; + } + + /* + * The Linux kernel driver initializes the default mode to 3, corresponding + * to a 39-bit GPA space, where each entry in the pagetable translates to a + * 1GB (2^30) page size. + */ + level = mode = get_pte_translation_mode(dte); + assert(mode > 0 && mode < 7); + + /* + * If IOVA is larger than the max supported by the current pgtable level, + * there is nothing to do. + */ + if (address > PT_LEVEL_MAX_ADDR(mode - 1)) { + /* IOVA too large for the current DTE */ + return -AMDVI_FR_PT_ROOT_INV; + } + + do { + level -= 1; + + /* Update the page_size */ + *page_size = PTE_LEVEL_PAGE_SIZE(level); + + /* Permission bits are ANDed at every level, including the DTE */ + perms &= amdvi_get_perms(*pte); + if (perms == IOMMU_NONE) { + return 0; + } + + /* Not Present */ + if (!IOMMU_PTE_PRESENT(*pte)) { + return 0; + } + + /* Large or Leaf PTE found */ + if (PTE_NEXT_LEVEL(*pte) == 7 || PTE_NEXT_LEVEL(*pte) == 0) { + /* Leaf PTE found */ + break; + } + + /* + * Index the pgtable using the IOVA bits corresponding to current level + * and walk down to the lower level. + */ + pte_addr = NEXT_PTE_ADDR(*pte, level, address); + *pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn); + + if (*pte == (uint64_t)-1) { + /* + * A returned PTE of -1 indicates a failure to read the page table + * entry from guest memory. + */ + if (level == mode - 1) { + /* Failure to retrieve the Page Table from Root Pointer */ + *page_size = 0; + return -AMDVI_FR_PT_ROOT_INV; + } else { + /* Failure to read PTE. Page walk skips a page_size chunk */ + return -AMDVI_FR_PT_ENTRY_INV; + } + } + } while (level > 0); + + assert(PTE_NEXT_LEVEL(*pte) == 0 || PTE_NEXT_LEVEL(*pte) == 7 || + level == 0); + /* + * Page walk ends when Next Level field on PTE shows that either a leaf PTE + * or a series of large PTEs have been reached. In the latter case, even if + * the range starts in the middle of a contiguous page, the returned PTE + * must be the first PTE of the series. + */ + if (PTE_NEXT_LEVEL(*pte) == 7) { + /* Update page_size with the large PTE page size */ + *page_size = large_pte_page_size(*pte); + } + + return 0; +} + /* log error without aborting since linux seems to be using reserved bits */ static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd) { |