From 03f4995781a64e106e6f73864a1e9c4163dac53b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 Nov 2013 17:14:36 +0100 Subject: split definitions for exec.c and translate-all.c radix trees The exec.c and translate-all.c radix trees are quite different, and the exec.c one in particular is not limited to the CPU---it can be used also by devices that do DMA, and in that case the address space is not limited to TARGET_PHYS_ADDR_SPACE_BITS bits. We want to make exec.c's radix trees 64-bit wide. As a first step, stop sharing the constants between exec.c and translate-all.c. exec.c gets P_L2_* constants, translate-all.c gets V_L2_*, for consistency with the existing V_L1_* symbols. Though actually in the softmmu case translate-all.c is also indexed by physical addresses... This patch has no semantic change. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- exec.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index f4b9ef25f5..060f3f3430 100644 --- a/exec.c +++ b/exec.c @@ -88,7 +88,15 @@ struct PhysPageEntry { uint16_t ptr : 15; }; -typedef PhysPageEntry Node[L2_SIZE]; +/* Size of the L2 (and L3, etc) page tables. */ +#define ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS + +#define P_L2_BITS 10 +#define P_L2_SIZE (1 << P_L2_BITS) + +#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1) + +typedef PhysPageEntry Node[P_L2_SIZE]; struct AddressSpaceDispatch { /* This is a multi-level map on the physical address space. @@ -155,7 +163,7 @@ static uint16_t phys_map_node_alloc(void) ret = next_map.nodes_nb++; assert(ret != PHYS_MAP_NODE_NIL); assert(ret != next_map.nodes_nb_alloc); - for (i = 0; i < L2_SIZE; ++i) { + for (i = 0; i < P_L2_SIZE; ++i) { next_map.nodes[ret][i].is_leaf = 0; next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; } @@ -168,13 +176,13 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, { PhysPageEntry *p; int i; - hwaddr step = (hwaddr)1 << (level * L2_BITS); + hwaddr step = (hwaddr)1 << (level * P_L2_BITS); if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) { lp->ptr = phys_map_node_alloc(); p = next_map.nodes[lp->ptr]; if (level == 0) { - for (i = 0; i < L2_SIZE; i++) { + for (i = 0; i < P_L2_SIZE; i++) { p[i].is_leaf = 1; p[i].ptr = PHYS_SECTION_UNASSIGNED; } @@ -182,9 +190,9 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, } else { p = next_map.nodes[lp->ptr]; } - lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)]; + lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)]; - while (*nb && lp < &p[L2_SIZE]) { + while (*nb && lp < &p[P_L2_SIZE]) { if ((*index & (step - 1)) == 0 && *nb >= step) { lp->is_leaf = true; lp->ptr = leaf; @@ -218,7 +226,7 @@ static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index, return §ions[PHYS_SECTION_UNASSIGNED]; } p = nodes[lp.ptr]; - lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)]; + lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)]; } return §ions[lp.ptr]; } @@ -1778,7 +1786,12 @@ void address_space_destroy_dispatch(AddressSpace *as) static void memory_map_init(void) { system_memory = g_malloc(sizeof(*system_memory)); - memory_region_init(system_memory, NULL, "system", INT64_MAX); + + assert(ADDR_SPACE_BITS <= 64); + + memory_region_init(system_memory, NULL, "system", + ADDR_SPACE_BITS == 64 ? + UINT64_MAX : (0x1ULL << ADDR_SPACE_BITS)); address_space_init(&address_space_memory, system_memory, "memory"); system_io = g_malloc(sizeof(*system_io)); -- cgit 1.4.1 From 9736e55b78dc49b7f3a265932ab32ed360f633e4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 11 Nov 2013 14:42:43 +0200 Subject: exec: replace leaf with skip In preparation for dynamic radix tree depth support, rename is_leaf field to skip, telling us how many bits to skip to next level. Set to 0 for leaf. Signed-off-by: Michael S. Tsirkin --- exec.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index 060f3f3430..e3e5bc0ca2 100644 --- a/exec.c +++ b/exec.c @@ -83,8 +83,9 @@ int use_icount; typedef struct PhysPageEntry PhysPageEntry; struct PhysPageEntry { - uint16_t is_leaf : 1; - /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */ + /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */ + uint16_t skip : 1; + /* index into phys_sections (!skip) or phys_map_nodes (skip) */ uint16_t ptr : 15; }; @@ -164,7 +165,7 @@ static uint16_t phys_map_node_alloc(void) assert(ret != PHYS_MAP_NODE_NIL); assert(ret != next_map.nodes_nb_alloc); for (i = 0; i < P_L2_SIZE; ++i) { - next_map.nodes[ret][i].is_leaf = 0; + next_map.nodes[ret][i].skip = 1; next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; } return ret; @@ -178,12 +179,12 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, int i; hwaddr step = (hwaddr)1 << (level * P_L2_BITS); - if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) { + if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) { lp->ptr = phys_map_node_alloc(); p = next_map.nodes[lp->ptr]; if (level == 0) { for (i = 0; i < P_L2_SIZE; i++) { - p[i].is_leaf = 1; + p[i].skip = 0; p[i].ptr = PHYS_SECTION_UNASSIGNED; } } @@ -194,7 +195,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, while (*nb && lp < &p[P_L2_SIZE]) { if ((*index & (step - 1)) == 0 && *nb >= step) { - lp->is_leaf = true; + lp->skip = 0; lp->ptr = leaf; *index += step; *nb -= step; @@ -221,7 +222,7 @@ static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index, PhysPageEntry *p; int i; - for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) { + for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) { if (lp.ptr == PHYS_MAP_NODE_NIL) { return §ions[PHYS_SECTION_UNASSIGNED]; } @@ -1681,7 +1682,7 @@ static void mem_begin(MemoryListener *listener) AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1); - d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 }; + d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; d->as = as; as->next_dispatch = d; } -- cgit 1.4.1 From 8b795765db36544da6193fb64e4e0f1dc55aaa36 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 11 Nov 2013 14:51:56 +0200 Subject: exec: extend skip field to 6 bit, page entry to 32 bit Extend skip to 6 bit. As page entry doesn't fit in 16 bit any longer anyway, extend it to 32 bit. This doubles node map memory requirements, but follow-up patches will save this memory. Signed-off-by: Michael S. Tsirkin --- exec.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index e3e5bc0ca2..154ae9793d 100644 --- a/exec.c +++ b/exec.c @@ -84,11 +84,13 @@ typedef struct PhysPageEntry PhysPageEntry; struct PhysPageEntry { /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */ - uint16_t skip : 1; + uint32_t skip : 6; /* index into phys_sections (!skip) or phys_map_nodes (skip) */ - uint16_t ptr : 15; + uint32_t ptr : 26; }; +#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6) + /* Size of the L2 (and L3, etc) page tables. */ #define ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS @@ -134,8 +136,6 @@ typedef struct PhysPageMap { static PhysPageMap *prev_map; static PhysPageMap next_map; -#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) - static void io_mem_init(void); static void memory_map_init(void); @@ -156,10 +156,10 @@ static void phys_map_node_reserve(unsigned nodes) } } -static uint16_t phys_map_node_alloc(void) +static uint32_t phys_map_node_alloc(void) { unsigned i; - uint16_t ret; + uint32_t ret; ret = next_map.nodes_nb++; assert(ret != PHYS_MAP_NODE_NIL); -- cgit 1.4.1 From 97115a8d4500abeb090b968f01605e0bdafcdfd3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 13 Nov 2013 20:08:19 +0200 Subject: exec: pass hw address to phys_page_find callers always shift by target page bits so let's just do this internally. Signed-off-by: Michael S. Tsirkin --- exec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index 154ae9793d..b528dad76a 100644 --- a/exec.c +++ b/exec.c @@ -216,10 +216,11 @@ static void phys_page_set(AddressSpaceDispatch *d, phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); } -static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index, +static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr, Node *nodes, MemoryRegionSection *sections) { PhysPageEntry *p; + hwaddr index = addr >> TARGET_PAGE_BITS; int i; for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) { @@ -245,8 +246,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, MemoryRegionSection *section; subpage_t *subpage; - section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS, - d->nodes, d->sections); + section = phys_page_find(d->phys_map, addr, d->nodes, d->sections); if (resolve_subpage && section->mr->subpage) { subpage = container_of(section->mr, subpage_t, iomem); section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; @@ -802,7 +802,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti subpage_t *subpage; hwaddr base = section->offset_within_address_space & TARGET_PAGE_MASK; - MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS, + MemoryRegionSection *existing = phys_page_find(d->phys_map, base, next_map.nodes, next_map.sections); MemoryRegionSection subsection = { .offset_within_address_space = base, -- cgit 1.4.1 From b35ba30f8fa235c779d876ee299b80a2d501d204 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 11 Nov 2013 17:52:07 +0200 Subject: exec: memory radix tree page level compression At the moment, memory radix tree is already variable width, but it can only skip the low bits of address. This is efficient if we have huge memory regions but inefficient if we are only using a tiny portion of the address space. After we have built up the map, detect configurations where a single L2 entry is valid. We then speed up the lookup by skipping one or more levels. In case any levels were skipped, we might end up in a valid section instead of erroring out. We handle this by checking that the address is in range of the resulting section. Signed-off-by: Michael S. Tsirkin --- exec.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index b528dad76a..7e5ce9394c 100644 --- a/exec.c +++ b/exec.c @@ -51,6 +51,8 @@ #include "exec/memory-internal.h" +#include "qemu/range.h" + //#define DEBUG_SUBPAGE #if !defined(CONFIG_USER_ONLY) @@ -216,6 +218,68 @@ static void phys_page_set(AddressSpaceDispatch *d, phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); } +/* Compact a non leaf page entry. Simply detect that the entry has a single child, + * and update our entry so we can skip it and go directly to the destination. + */ +static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted) +{ + unsigned valid_ptr = P_L2_SIZE; + int valid = 0; + PhysPageEntry *p; + int i; + + if (lp->ptr == PHYS_MAP_NODE_NIL) { + return; + } + + p = nodes[lp->ptr]; + for (i = 0; i < P_L2_SIZE; i++) { + if (p[i].ptr == PHYS_MAP_NODE_NIL) { + continue; + } + + valid_ptr = i; + valid++; + if (p[i].skip) { + phys_page_compact(&p[i], nodes, compacted); + } + } + + /* We can only compress if there's only one child. */ + if (valid != 1) { + return; + } + + assert(valid_ptr < P_L2_SIZE); + + /* Don't compress if it won't fit in the # of bits we have. */ + if (lp->skip + p[valid_ptr].skip >= (1 << 3)) { + return; + } + + lp->ptr = p[valid_ptr].ptr; + if (!p[valid_ptr].skip) { + /* If our only child is a leaf, make this a leaf. */ + /* By design, we should have made this node a leaf to begin with so we + * should never reach here. + * But since it's so simple to handle this, let's do it just in case we + * change this rule. + */ + lp->skip = 0; + } else { + lp->skip += p[valid_ptr].skip; + } +} + +static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb) +{ + DECLARE_BITMAP(compacted, nodes_nb); + + if (d->phys_map.skip) { + phys_page_compact(&d->phys_map, d->nodes, compacted); + } +} + static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr, Node *nodes, MemoryRegionSection *sections) { @@ -230,7 +294,14 @@ static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr, p = nodes[lp.ptr]; lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)]; } - return §ions[lp.ptr]; + + if (sections[lp.ptr].size.hi || + range_covers_byte(sections[lp.ptr].offset_within_address_space, + sections[lp.ptr].size.lo, addr)) { + return §ions[lp.ptr]; + } else { + return §ions[PHYS_SECTION_UNASSIGNED]; + } } bool memory_region_is_unassigned(MemoryRegion *mr) @@ -1696,6 +1767,8 @@ static void mem_commit(MemoryListener *listener) next->nodes = next_map.nodes; next->sections = next_map.sections; + phys_page_compact_all(next, next_map.nodes_nb); + as->dispatch = next; g_free(cur); } -- cgit 1.4.1 From 57271d63c4d93352406704d540453c43a4a241a7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 Nov 2013 17:14:37 +0100 Subject: exec: make address spaces 64-bit wide As an alternative to commit 818f86b (exec: limit system memory size, 2013-11-04) let's just make all address spaces 64-bit wide. This eliminates problems with phys_page_find ignoring bits above TARGET_PHYS_ADDR_SPACE_BITS and address_space_translate_internal consequently messing up the computations. In Luiz's reported crash, at startup gdb attempts to read from address 0xffffffffffffffe6 to 0xffffffffffffffff inclusive. The region it gets is the newly introduced master abort region, which is as big as the PCI address space (see pci_bus_init). Due to a typo that's only 2^63-1, not 2^64. But we get it anyway because phys_page_find ignores the upper bits of the physical address. In address_space_translate_internal then diff = int128_sub(section->mr->size, int128_make64(addr)); *plen = int128_get64(int128_min(diff, int128_make64(*plen))); diff becomes negative, and int128_get64 booms. The size of the PCI address space region should be fixed anyway. Reported-by: Luiz Capitulino Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- exec.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index 7e5ce9394c..f907f5f4f5 100644 --- a/exec.c +++ b/exec.c @@ -94,7 +94,7 @@ struct PhysPageEntry { #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6) /* Size of the L2 (and L3, etc) page tables. */ -#define ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS +#define ADDR_SPACE_BITS 64 #define P_L2_BITS 10 #define P_L2_SIZE (1 << P_L2_BITS) @@ -1861,11 +1861,7 @@ static void memory_map_init(void) { system_memory = g_malloc(sizeof(*system_memory)); - assert(ADDR_SPACE_BITS <= 64); - - memory_region_init(system_memory, NULL, "system", - ADDR_SPACE_BITS == 64 ? - UINT64_MAX : (0x1ULL << ADDR_SPACE_BITS)); + memory_region_init(system_memory, NULL, "system", UINT64_MAX); address_space_init(&address_space_memory, system_memory, "memory"); system_io = g_malloc(sizeof(*system_io)); -- cgit 1.4.1 From 026736cebfe0e4a96f0761a2bae62cca92ce2a4e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 13 Nov 2013 20:13:03 +0200 Subject: exec: reduce L2_PAGE_SIZE With the single exception of ppc with 16M pages, we get the same number of levels with L2_PAGE_SIZE = 10 as with L2_PAGE_SIZE = 9. by doing this we reduce memory footprint of a single level in the node memory map by 2x without runtime overhead. Signed-off-by: Michael S. Tsirkin --- exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index f907f5f4f5..67a073c540 100644 --- a/exec.c +++ b/exec.c @@ -96,7 +96,7 @@ struct PhysPageEntry { /* Size of the L2 (and L3, etc) page tables. */ #define ADDR_SPACE_BITS 64 -#define P_L2_BITS 10 +#define P_L2_BITS 9 #define P_L2_SIZE (1 << P_L2_BITS) #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1) -- cgit 1.4.1 From 53cb28cbfea038f8ad50132dc8a684e638c7d48b Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Sun, 1 Dec 2013 14:02:23 +0200 Subject: exec: separate sections and nodes per address space Every address space has its own nodes and sections, but it uses the same global arrays of nodes/section. This limits the number of devices that can be attached to the guest to 20-30 devices. It happens because: - The sections array is limited to 2^12 entries. - The main memory has at least 100 sections. - Each device address space is actually an alias to main memory, multiplying its number of nodes/sections. Remove the limitation by using separate arrays of nodes and sections for each address space. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- exec.c | 155 ++++++++++++++++++++++++++++------------------------------------- 1 file changed, 66 insertions(+), 89 deletions(-) (limited to 'exec.c') diff --git a/exec.c b/exec.c index 67a073c540..00526d18c0 100644 --- a/exec.c +++ b/exec.c @@ -103,13 +103,21 @@ struct PhysPageEntry { typedef PhysPageEntry Node[P_L2_SIZE]; +typedef struct PhysPageMap { + unsigned sections_nb; + unsigned sections_nb_alloc; + unsigned nodes_nb; + unsigned nodes_nb_alloc; + Node *nodes; + MemoryRegionSection *sections; +} PhysPageMap; + struct AddressSpaceDispatch { /* This is a multi-level map on the physical address space. * The bottom level has pointers to MemoryRegionSections. */ PhysPageEntry phys_map; - Node *nodes; - MemoryRegionSection *sections; + PhysPageMap map; AddressSpace *as; }; @@ -126,18 +134,6 @@ typedef struct subpage_t { #define PHYS_SECTION_ROM 2 #define PHYS_SECTION_WATCH 3 -typedef struct PhysPageMap { - unsigned sections_nb; - unsigned sections_nb_alloc; - unsigned nodes_nb; - unsigned nodes_nb_alloc; - Node *nodes; - MemoryRegionSection *sections; -} PhysPageMap; - -static PhysPageMap *prev_map; -static PhysPageMap next_map; - static void io_mem_init(void); static void memory_map_init(void); @@ -146,35 +142,32 @@ static MemoryRegion io_mem_watch; #if !defined(CONFIG_USER_ONLY) -static void phys_map_node_reserve(unsigned nodes) +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) { - if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) { - next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2, - 16); - next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc, - next_map.nodes_nb + nodes); - next_map.nodes = g_renew(Node, next_map.nodes, - next_map.nodes_nb_alloc); + if (map->nodes_nb + nodes > map->nodes_nb_alloc) { + map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16); + map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes); + map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); } } -static uint32_t phys_map_node_alloc(void) +static uint32_t phys_map_node_alloc(PhysPageMap *map) { unsigned i; uint32_t ret; - ret = next_map.nodes_nb++; + ret = map->nodes_nb++; assert(ret != PHYS_MAP_NODE_NIL); - assert(ret != next_map.nodes_nb_alloc); + assert(ret != map->nodes_nb_alloc); for (i = 0; i < P_L2_SIZE; ++i) { - next_map.nodes[ret][i].skip = 1; - next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; + map->nodes[ret][i].skip = 1; + map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; } return ret; } -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, - hwaddr *nb, uint16_t leaf, +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, + hwaddr *index, hwaddr *nb, uint16_t leaf, int level) { PhysPageEntry *p; @@ -182,8 +175,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, hwaddr step = (hwaddr)1 << (level * P_L2_BITS); if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) { - lp->ptr = phys_map_node_alloc(); - p = next_map.nodes[lp->ptr]; + lp->ptr = phys_map_node_alloc(map); + p = map->nodes[lp->ptr]; if (level == 0) { for (i = 0; i < P_L2_SIZE; i++) { p[i].skip = 0; @@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, } } } else { - p = next_map.nodes[lp->ptr]; + p = map->nodes[lp->ptr]; } lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)]; @@ -202,7 +195,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, *index += step; *nb -= step; } else { - phys_page_set_level(lp, index, nb, leaf, level - 1); + phys_page_set_level(map, lp, index, nb, leaf, level - 1); } ++lp; } @@ -213,9 +206,9 @@ static void phys_page_set(AddressSpaceDispatch *d, uint16_t leaf) { /* Wildly overreserve - it doesn't matter much. */ - phys_map_node_reserve(3 * P_L2_LEVELS); + phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS); - phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); + phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); } /* Compact a non leaf page entry. Simply detect that the entry has a single child, @@ -276,7 +269,7 @@ static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb) DECLARE_BITMAP(compacted, nodes_nb); if (d->phys_map.skip) { - phys_page_compact(&d->phys_map, d->nodes, compacted); + phys_page_compact(&d->phys_map, d->map.nodes, compacted); } } @@ -317,10 +310,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, MemoryRegionSection *section; subpage_t *subpage; - section = phys_page_find(d->phys_map, addr, d->nodes, d->sections); + section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections); if (resolve_subpage && section->mr->subpage) { subpage = container_of(section->mr, subpage_t, iomem); - section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; + section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; } return section; } @@ -788,7 +781,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env, iotlb |= PHYS_SECTION_ROM; } } else { - iotlb = section - address_space_memory.dispatch->sections; + iotlb = section - address_space_memory.dispatch->map.sections; iotlb += xlat; } @@ -827,23 +820,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t)) phys_mem_alloc = alloc; } -static uint16_t phys_section_add(MemoryRegionSection *section) +static uint16_t phys_section_add(PhysPageMap *map, + MemoryRegionSection *section) { /* The physical section number is ORed with a page-aligned * pointer to produce the iotlb entries. Thus it should * never overflow into the page-aligned value. */ - assert(next_map.sections_nb < TARGET_PAGE_SIZE); + assert(map->sections_nb < TARGET_PAGE_SIZE); - if (next_map.sections_nb == next_map.sections_nb_alloc) { - next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2, - 16); - next_map.sections = g_renew(MemoryRegionSection, next_map.sections, - next_map.sections_nb_alloc); + if (map->sections_nb == map->sections_nb_alloc) { + map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16); + map->sections = g_renew(MemoryRegionSection, map->sections, + map->sections_nb_alloc); } - next_map.sections[next_map.sections_nb] = *section; + map->sections[map->sections_nb] = *section; memory_region_ref(section->mr); - return next_map.sections_nb++; + return map->sections_nb++; } static void phys_section_destroy(MemoryRegion *mr) @@ -865,7 +858,6 @@ static void phys_sections_free(PhysPageMap *map) } g_free(map->sections); g_free(map->nodes); - g_free(map); } static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section) @@ -874,7 +866,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti hwaddr base = section->offset_within_address_space & TARGET_PAGE_MASK; MemoryRegionSection *existing = phys_page_find(d->phys_map, base, - next_map.nodes, next_map.sections); + d->map.nodes, d->map.sections); MemoryRegionSection subsection = { .offset_within_address_space = base, .size = int128_make64(TARGET_PAGE_SIZE), @@ -887,13 +879,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti subpage = subpage_init(d->as, base); subsection.mr = &subpage->iomem; phys_page_set(d, base >> TARGET_PAGE_BITS, 1, - phys_section_add(&subsection)); + phys_section_add(&d->map, &subsection)); } else { subpage = container_of(existing->mr, subpage_t, iomem); } start = section->offset_within_address_space & ~TARGET_PAGE_MASK; end = start + int128_get64(section->size) - 1; - subpage_register(subpage, start, end, phys_section_add(section)); + subpage_register(subpage, start, end, + phys_section_add(&d->map, section)); } @@ -901,7 +894,7 @@ static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section) { hwaddr start_addr = section->offset_within_address_space; - uint16_t section_index = phys_section_add(section); + uint16_t section_index = phys_section_add(&d->map, section); uint64_t num_pages = int128_get64(int128_rshift(section->size, TARGET_PAGE_BITS)); @@ -1720,7 +1713,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base) return mmio; } -static uint16_t dummy_section(MemoryRegion *mr) +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr) { MemoryRegionSection section = { .mr = mr, @@ -1729,12 +1722,13 @@ static uint16_t dummy_section(MemoryRegion *mr) .size = int128_2_64(), }; - return phys_section_add(§ion); + return phys_section_add(map, §ion); } MemoryRegion *iotlb_to_region(hwaddr index) { - return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr; + return address_space_memory.dispatch->map.sections[ + index & ~TARGET_PAGE_MASK].mr; } static void io_mem_init(void) @@ -1751,7 +1745,17 @@ static void io_mem_init(void) static void mem_begin(MemoryListener *listener) { AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener); - AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1); + AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1); + uint16_t n; + + n = dummy_section(&d->map, &io_mem_unassigned); + assert(n == PHYS_SECTION_UNASSIGNED); + n = dummy_section(&d->map, &io_mem_notdirty); + assert(n == PHYS_SECTION_NOTDIRTY); + n = dummy_section(&d->map, &io_mem_rom); + assert(n == PHYS_SECTION_ROM); + n = dummy_section(&d->map, &io_mem_watch); + assert(n == PHYS_SECTION_WATCH); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; d->as = as; @@ -1764,39 +1768,14 @@ static void mem_commit(MemoryListener *listener) AddressSpaceDispatch *cur = as->dispatch; AddressSpaceDispatch *next = as->next_dispatch; - next->nodes = next_map.nodes; - next->sections = next_map.sections; - - phys_page_compact_all(next, next_map.nodes_nb); + phys_page_compact_all(next, next->map.nodes_nb); as->dispatch = next; - g_free(cur); -} - -static void core_begin(MemoryListener *listener) -{ - uint16_t n; - prev_map = g_new(PhysPageMap, 1); - *prev_map = next_map; - - memset(&next_map, 0, sizeof(next_map)); - n = dummy_section(&io_mem_unassigned); - assert(n == PHYS_SECTION_UNASSIGNED); - n = dummy_section(&io_mem_notdirty); - assert(n == PHYS_SECTION_NOTDIRTY); - n = dummy_section(&io_mem_rom); - assert(n == PHYS_SECTION_ROM); - n = dummy_section(&io_mem_watch); - assert(n == PHYS_SECTION_WATCH); -} - -/* This listener's commit run after the other AddressSpaceDispatch listeners'. - * All AddressSpaceDispatch instances have switched to the next map. - */ -static void core_commit(MemoryListener *listener) -{ - phys_sections_free(prev_map); + if (cur) { + phys_sections_free(&cur->map); + g_free(cur); + } } static void tcg_commit(MemoryListener *listener) @@ -1824,8 +1803,6 @@ static void core_log_global_stop(MemoryListener *listener) } static MemoryListener core_memory_listener = { - .begin = core_begin, - .commit = core_commit, .log_global_start = core_log_global_start, .log_global_stop = core_log_global_stop, .priority = 1, -- cgit 1.4.1