From 6e050d415662ca97b71410b8c4d91f789872b407 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 18 Sep 2019 03:39:56 +0000 Subject: exec: Adjust notdirty tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory_region_tb_read tracepoint is unreachable, since notdirty is supposed to apply only to writes. The memory_region_tb_write tracepoint is mis-named, because notdirty is not only used for TB invalidation. It is also used for e.g. VGA RAM updates and migration. Replace memory_region_tb_write with memory_notdirty_write_access, and place it in memory_notdirty_write_prepare where it can catch all of the instances. Add memory_notdirty_set_dirty to log when we no longer intercept writes to a page. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- exec.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'exec.c') diff --git a/exec.c b/exec.c index 8b998974f8..5f2587b621 100644 --- a/exec.c +++ b/exec.c @@ -2755,6 +2755,8 @@ void memory_notdirty_write_prepare(NotDirtyInfo *ndi, ndi->size = size; ndi->pages = NULL; + trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); + assert(tcg_enabled()); if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { ndi->pages = page_collection_lock(ram_addr, ram_addr + size); @@ -2779,6 +2781,7 @@ void memory_notdirty_write_complete(NotDirtyInfo *ndi) /* we remove the notdirty callback only if the code has been flushed */ if (!cpu_physical_memory_is_clean(ndi->ram_addr)) { + trace_memory_notdirty_set_dirty(ndi->mem_vaddr); tlb_set_dirty(ndi->cpu, ndi->mem_vaddr); } } -- cgit 1.4.1 From 7b0d792ce13c0e894ee32a94b321b329724c9a25 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 19 Sep 2019 17:54:10 -0700 Subject: cputlb: Move ROM handling from I/O path to TLB path It does not require going through the whole I/O path in order to discard a write. Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 36 +++++++++++++++++++++--------------- exec.c | 41 +---------------------------------------- include/exec/cpu-all.h | 5 ++++- include/exec/cpu-common.h | 1 - 4 files changed, 26 insertions(+), 57 deletions(-) (limited to 'exec.c') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 028eebcb44..404ec57a4e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -577,7 +577,8 @@ static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, { uintptr_t addr = tlb_entry->addr_write; - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { + if ((addr & (TLB_INVALID_MASK | TLB_MMIO | + TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { addr &= TARGET_PAGE_MASK; addr += tlb_entry->addend; if ((addr - start) < length) { @@ -745,7 +746,6 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, address |= TLB_MMIO; addend = 0; } else { - /* TLB_MMIO for rom/romd handled below */ addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; } @@ -822,16 +822,17 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, tn.addr_write = -1; if (prot & PAGE_WRITE) { - if ((memory_region_is_ram(section->mr) && section->readonly) - || memory_region_is_romd(section->mr)) { - /* Write access calls the I/O callback. */ - tn.addr_write = address | TLB_MMIO; - } else if (memory_region_is_ram(section->mr) - && cpu_physical_memory_is_clean( - memory_region_get_ram_addr(section->mr) + xlat)) { - tn.addr_write = address | TLB_NOTDIRTY; - } else { - tn.addr_write = address; + tn.addr_write = address; + if (memory_region_is_romd(section->mr)) { + /* Use the MMIO path so that the device can switch states. */ + tn.addr_write |= TLB_MMIO; + } else if (memory_region_is_ram(section->mr)) { + if (section->readonly) { + tn.addr_write |= TLB_DISCARD_WRITE; + } else if (cpu_physical_memory_is_clean( + memory_region_get_ram_addr(section->mr) + xlat)) { + tn.addr_write |= TLB_NOTDIRTY; + } } if (prot & PAGE_WRITE_INV) { tn.addr_write |= TLB_INVALID_MASK; @@ -904,7 +905,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; - if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { + if (mr != &io_mem_notdirty && !cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } @@ -945,7 +946,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; - if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { + if (mr != &io_mem_notdirty && !cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } cpu->mem_io_vaddr = addr; @@ -1125,7 +1126,7 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, } /* Reject I/O access, or other required slow-path. */ - if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP)) { + if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { return NULL; } @@ -1617,6 +1618,11 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } + /* Ignore writes to ROM. */ + if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { + return; + } + haddr = (void *)((uintptr_t)addr + entry->addend); /* diff --git a/exec.c b/exec.c index 5f2587b621..ea8c0b18ac 100644 --- a/exec.c +++ b/exec.c @@ -88,7 +88,7 @@ static MemoryRegion *system_io; AddressSpace address_space_io; AddressSpace address_space_memory; -MemoryRegion io_mem_rom, io_mem_notdirty; +MemoryRegion io_mem_notdirty; static MemoryRegion io_mem_unassigned; #endif @@ -192,7 +192,6 @@ typedef struct subpage_t { #define PHYS_SECTION_UNASSIGNED 0 #define PHYS_SECTION_NOTDIRTY 1 -#define PHYS_SECTION_ROM 2 static void io_mem_init(void); static void memory_map_init(void); @@ -1475,8 +1474,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, iotlb = memory_region_get_ram_addr(section->mr) + xlat; if (!section->readonly) { iotlb |= PHYS_SECTION_NOTDIRTY; - } else { - iotlb |= PHYS_SECTION_ROM; } } else { AddressSpaceDispatch *d; @@ -3002,38 +2999,6 @@ static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) return phys_section_add(map, §ion); } -static void readonly_mem_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - /* Ignore any write to ROM. */ -} - -static bool readonly_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return is_write; -} - -/* This will only be used for writes, because reads are special cased - * to directly access the underlying host ram. - */ -static const MemoryRegionOps readonly_mem_ops = { - .write = readonly_mem_write, - .valid.accepts = readonly_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - MemoryRegionSection *iotlb_to_section(CPUState *cpu, hwaddr index, MemTxAttrs attrs) { @@ -3047,8 +3012,6 @@ MemoryRegionSection *iotlb_to_section(CPUState *cpu, static void io_mem_init(void) { - memory_region_init_io(&io_mem_rom, NULL, &readonly_mem_ops, - NULL, NULL, UINT64_MAX); memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); @@ -3069,8 +3032,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) assert(n == PHYS_SECTION_UNASSIGNED); n = dummy_section(&d->map, fv, &io_mem_notdirty); assert(n == PHYS_SECTION_NOTDIRTY); - n = dummy_section(&d->map, fv, &io_mem_rom); - assert(n == PHYS_SECTION_ROM); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index d148bded35..ad9ab85eb3 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -337,12 +337,15 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) /* Set if TLB entry requires byte swap. */ #define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) +/* Set if TLB entry writes ignored. */ +#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6)) /* Use this mask to check interception with an alignment mask * in a TCG backend. */ #define TLB_FLAGS_MASK \ - (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT | TLB_BSWAP) + (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \ + | TLB_WATCHPOINT | TLB_BSWAP | TLB_DISCARD_WRITE) /** * tlb_hit_page: return true if page aligned @addr is a hit against the diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index f7dbe75fbc..1c0e03ddc2 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -100,7 +100,6 @@ void qemu_flush_coalesced_mmio_buffer(void); void cpu_flush_icache_range(hwaddr start, hwaddr len); -extern struct MemoryRegion io_mem_rom; extern struct MemoryRegion io_mem_notdirty; typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); -- cgit 1.4.1 From 08565552f70ca37da13f24928727f851073cd13e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 18 Sep 2019 09:15:44 -0700 Subject: cputlb: Move NOTDIRTY handling from I/O path to TLB path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pages that we want to track for NOTDIRTY are RAM. We do not really need to go through the I/O path to handle them. Acked-by: David Hildenbrand Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 26 +++++++++++++++++++++--- exec.c | 50 ----------------------------------------------- include/exec/cpu-common.h | 2 -- memory.c | 16 --------------- 4 files changed, 23 insertions(+), 71 deletions(-) (limited to 'exec.c') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 404ec57a4e..7e9a0f7ac8 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -905,7 +905,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; - if (mr != &io_mem_notdirty && !cpu->can_do_io) { + if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } @@ -946,7 +946,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; - if (mr != &io_mem_notdirty && !cpu->can_do_io) { + if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } cpu->mem_io_vaddr = addr; @@ -1612,7 +1612,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, need_swap = size > 1 && (tlb_addr & TLB_BSWAP); /* Handle I/O access. */ - if (likely(tlb_addr & (TLB_MMIO | TLB_NOTDIRTY))) { + if (tlb_addr & TLB_MMIO) { io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op ^ (need_swap * MO_BSWAP)); return; @@ -1625,6 +1625,26 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, haddr = (void *)((uintptr_t)addr + entry->addend); + /* Handle clean RAM pages. */ + if (tlb_addr & TLB_NOTDIRTY) { + NotDirtyInfo ndi; + + /* We require mem_io_pc in tb_invalidate_phys_page_range. */ + env_cpu(env)->mem_io_pc = retaddr; + + memory_notdirty_write_prepare(&ndi, env_cpu(env), addr, + addr + iotlbentry->addr, size); + + if (unlikely(need_swap)) { + store_memop(haddr, val, op ^ MO_BSWAP); + } else { + store_memop(haddr, val, op); + } + + memory_notdirty_write_complete(&ndi); + return; + } + /* * Keep these two store_memop separate to ensure that the compiler * is able to fold the entire function to a single instruction. diff --git a/exec.c b/exec.c index ea8c0b18ac..dc7001f115 100644 --- a/exec.c +++ b/exec.c @@ -88,7 +88,6 @@ static MemoryRegion *system_io; AddressSpace address_space_io; AddressSpace address_space_memory; -MemoryRegion io_mem_notdirty; static MemoryRegion io_mem_unassigned; #endif @@ -191,7 +190,6 @@ typedef struct subpage_t { } subpage_t; #define PHYS_SECTION_UNASSIGNED 0 -#define PHYS_SECTION_NOTDIRTY 1 static void io_mem_init(void); static void memory_map_init(void); @@ -1472,9 +1470,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, if (memory_region_is_ram(section->mr)) { /* Normal RAM. */ iotlb = memory_region_get_ram_addr(section->mr) + xlat; - if (!section->readonly) { - iotlb |= PHYS_SECTION_NOTDIRTY; - } } else { AddressSpaceDispatch *d; @@ -2783,42 +2778,6 @@ void memory_notdirty_write_complete(NotDirtyInfo *ndi) } } -/* Called within RCU critical section. */ -static void notdirty_mem_write(void *opaque, hwaddr ram_addr, - uint64_t val, unsigned size) -{ - NotDirtyInfo ndi; - - memory_notdirty_write_prepare(&ndi, current_cpu, current_cpu->mem_io_vaddr, - ram_addr, size); - - stn_p(qemu_map_ram_ptr(NULL, ram_addr), size, val); - memory_notdirty_write_complete(&ndi); -} - -static bool notdirty_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return is_write; -} - -static const MemoryRegionOps notdirty_mem_ops = { - .write = notdirty_mem_write, - .valid.accepts = notdirty_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - /* Generate a debug exception if a watchpoint has been hit. */ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra) @@ -3014,13 +2973,6 @@ static void io_mem_init(void) { memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); - - /* io_mem_notdirty calls tb_invalidate_phys_page_fast, - * which can be called without the iothread mutex. - */ - memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL, - NULL, UINT64_MAX); - memory_region_clear_global_locking(&io_mem_notdirty); } AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) @@ -3030,8 +2982,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) n = dummy_section(&d->map, fv, &io_mem_unassigned); assert(n == PHYS_SECTION_UNASSIGNED); - n = dummy_section(&d->map, fv, &io_mem_notdirty); - assert(n == PHYS_SECTION_NOTDIRTY); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 1c0e03ddc2..81753bbb34 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -100,8 +100,6 @@ void qemu_flush_coalesced_mmio_buffer(void); void cpu_flush_icache_range(hwaddr start, hwaddr len); -extern struct MemoryRegion io_mem_notdirty; - typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); diff --git a/memory.c b/memory.c index 57c44c97db..a99b8c0767 100644 --- a/memory.c +++ b/memory.c @@ -434,10 +434,6 @@ static MemTxResult memory_region_read_accessor(MemoryRegion *mr, tmp = mr->ops->read(mr->opaque, addr, size); if (mr->subpage) { trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); @@ -460,10 +456,6 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr, r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs); if (mr->subpage) { trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); @@ -484,10 +476,6 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr, if (mr->subpage) { trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); @@ -508,10 +496,6 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr, if (mr->subpage) { trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); -- cgit 1.4.1 From 8f5db6415363740d4eac070bb381202c80a7fc34 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 19 Sep 2019 21:09:58 -0700 Subject: cputlb: Partially inline memory_region_section_get_iotlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only one caller, tlb_set_page_with_attrs. We cannot inline the entire function because the AddressSpaceDispatch structure is private to exec.c, and cannot easily be moved to include/exec/memory-internal.h. Compute is_ram and is_romd once within tlb_set_page_with_attrs. Fold the number of tests against these predicates. Compute cpu_physical_memory_is_clean outside of the tlb lock region. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 66 +++++++++++++++++++++++++++++++------------------ exec.c | 22 +++-------------- include/exec/exec-all.h | 6 +---- 3 files changed, 46 insertions(+), 48 deletions(-) (limited to 'exec.c') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 7e9a0f7ac8..4f118d2cc9 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -705,13 +705,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, MemoryRegionSection *section; unsigned int index; target_ulong address; - target_ulong code_address; + target_ulong write_address; uintptr_t addend; CPUTLBEntry *te, tn; hwaddr iotlb, xlat, sz, paddr_page; target_ulong vaddr_page; int asidx = cpu_asidx_from_attrs(cpu, attrs); int wp_flags; + bool is_ram, is_romd; assert_cpu_is_self(cpu); @@ -740,18 +741,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, if (attrs.byte_swap) { address |= TLB_BSWAP; } - if (!memory_region_is_ram(section->mr) && - !memory_region_is_romd(section->mr)) { - /* IO memory case */ - address |= TLB_MMIO; + + is_ram = memory_region_is_ram(section->mr); + is_romd = memory_region_is_romd(section->mr); + + if (is_ram || is_romd) { + /* RAM and ROMD both have associated host memory. */ + addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; + } else { + /* I/O does not; force the host address to NULL. */ addend = 0; + } + + write_address = address; + if (is_ram) { + iotlb = memory_region_get_ram_addr(section->mr) + xlat; + /* + * Computing is_clean is expensive; avoid all that unless + * the page is actually writable. + */ + if (prot & PAGE_WRITE) { + if (section->readonly) { + write_address |= TLB_DISCARD_WRITE; + } else if (cpu_physical_memory_is_clean(iotlb)) { + write_address |= TLB_NOTDIRTY; + } + } } else { - addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; + /* I/O or ROMD */ + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; + /* + * Writes to romd devices must go through MMIO to enable write. + * Reads to romd devices go through the ram_ptr found above, + * but of course reads to I/O must go through MMIO. + */ + write_address |= TLB_MMIO; + if (!is_romd) { + address = write_address; + } } - code_address = address; - iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, - paddr_page, xlat, prot, &address); wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, TARGET_PAGE_SIZE); @@ -791,8 +820,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, /* * At this point iotlb contains a physical section number in the lower * TARGET_PAGE_BITS, and either - * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM) - * + the offset within section->mr of the page base (otherwise) + * + the ram_addr_t of the page base of the target RAM (RAM) + * + the offset within section->mr of the page base (I/O, ROMD) * We subtract the vaddr_page (which is page aligned and thus won't * disturb the low bits) to give an offset which can be added to the * (non-page-aligned) vaddr of the eventual memory access to get @@ -815,25 +844,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } if (prot & PAGE_EXEC) { - tn.addr_code = code_address; + tn.addr_code = address; } else { tn.addr_code = -1; } tn.addr_write = -1; if (prot & PAGE_WRITE) { - tn.addr_write = address; - if (memory_region_is_romd(section->mr)) { - /* Use the MMIO path so that the device can switch states. */ - tn.addr_write |= TLB_MMIO; - } else if (memory_region_is_ram(section->mr)) { - if (section->readonly) { - tn.addr_write |= TLB_DISCARD_WRITE; - } else if (cpu_physical_memory_is_clean( - memory_region_get_ram_addr(section->mr) + xlat)) { - tn.addr_write |= TLB_NOTDIRTY; - } - } + tn.addr_write = write_address; if (prot & PAGE_WRITE_INV) { tn.addr_write |= TLB_INVALID_MASK; } diff --git a/exec.c b/exec.c index dc7001f115..961d7d6497 100644 --- a/exec.c +++ b/exec.c @@ -1459,26 +1459,10 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, /* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address) + MemoryRegionSection *section) { - hwaddr iotlb; - - if (memory_region_is_ram(section->mr)) { - /* Normal RAM. */ - iotlb = memory_region_get_ram_addr(section->mr) + xlat; - } else { - AddressSpaceDispatch *d; - - d = flatview_to_dispatch(section->fv); - iotlb = section - d->map.sections; - iotlb += xlat; - } - - return iotlb; + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); + return section - d->map.sections; } #endif /* defined(CONFIG_USER_ONLY) */ diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 81b02eb2fe..49db07ba0b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, hwaddr *xlat, hwaddr *plen, MemTxAttrs attrs, int *prot); hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address); + MemoryRegionSection *section); #endif /* vl.c */ -- cgit 1.4.1 From 707526ad865dc4064c3984bcc061596a21bf9d3b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 18:47:59 -0700 Subject: cputlb: Merge and move memory_notdirty_write_{prepare,complete} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 9458a9a1df1a, all readers of the dirty bitmaps wait for the rcu lock, which means that they wait until the end of any executing TranslationBlock. As a consequence, there is no need for the actual access to happen in between the _prepare and _complete. Therefore, we can improve things by merging the two functions into notdirty_write and dropping the NotDirtyInfo structure. In addition, the only users of notdirty_write are in cputlb.c, so move the merged function there. Pass in the CPUIOTLBEntry from which the ram_addr_t may be computed. Reviewed-by: David Hildenbrand Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 76 +++++++++++++++++++++++------------------- exec.c | 44 ------------------------ include/exec/memory-internal.h | 65 ------------------------------------ 3 files changed, 42 insertions(+), 143 deletions(-) (limited to 'exec.c') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 4f118d2cc9..3e91838519 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -33,6 +33,7 @@ #include "exec/helper-proto.h" #include "qemu/atomic.h" #include "qemu/atomic128.h" +#include "translate-all.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -1085,6 +1086,37 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) return qemu_ram_addr_from_host_nofail(p); } +static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, + CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) +{ + ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; + + trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); + + if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { + struct page_collection *pages + = page_collection_lock(ram_addr, ram_addr + size); + + /* We require mem_io_pc in tb_invalidate_phys_page_range. */ + cpu->mem_io_pc = retaddr; + + tb_invalidate_phys_page_fast(pages, ram_addr, size); + page_collection_unlock(pages); + } + + /* + * Set both VGA and migration bits for simplicity and to remove + * the notdirty callback faster. + */ + cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); + + /* We remove the notdirty callback only if the code has been flushed. */ + if (!cpu_physical_memory_is_clean(ram_addr)) { + trace_memory_notdirty_set_dirty(mem_vaddr); + tlb_set_dirty(cpu, mem_vaddr); + } +} + /* * Probe for whether the specified guest access is permitted. If it is not * permitted then an exception will be taken in the same way as if this @@ -1204,8 +1236,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, /* Probe for a read-modify-write atomic operation. Do not allow unaligned * operations, or io operations to proceed. Return the host address. */ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr, - NotDirtyInfo *ndi) + TCGMemOpIdx oi, uintptr_t retaddr) { size_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); @@ -1265,12 +1296,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, hostaddr = (void *)((uintptr_t)addr + tlbe->addend); - ndi->active = false; if (unlikely(tlb_addr & TLB_NOTDIRTY)) { - ndi->active = true; - memory_notdirty_write_prepare(ndi, env_cpu(env), addr, - qemu_ram_addr_from_host_nofail(hostaddr), - 1 << s_bits); + notdirty_write(env_cpu(env), addr, 1 << s_bits, + &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); } return hostaddr; @@ -1641,28 +1669,13 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } - haddr = (void *)((uintptr_t)addr + entry->addend); - /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - NotDirtyInfo ndi; - - /* We require mem_io_pc in tb_invalidate_phys_page_range. */ - env_cpu(env)->mem_io_pc = retaddr; - - memory_notdirty_write_prepare(&ndi, env_cpu(env), addr, - addr + iotlbentry->addr, size); - - if (unlikely(need_swap)) { - store_memop(haddr, val, op ^ MO_BSWAP); - } else { - store_memop(haddr, val, op); - } - - memory_notdirty_write_complete(&ndi); - return; + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); } + haddr = (void *)((uintptr_t)addr + entry->addend); + /* * Keep these two store_memop separate to ensure that the compiler * is able to fold the entire function to a single instruction. @@ -1793,14 +1806,9 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr #define ATOMIC_NAME(X) \ HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) -#define ATOMIC_MMU_DECLS NotDirtyInfo ndi -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi) -#define ATOMIC_MMU_CLEANUP \ - do { \ - if (unlikely(ndi.active)) { \ - memory_notdirty_write_complete(&ndi); \ - } \ - } while (0) +#define ATOMIC_MMU_DECLS +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) +#define ATOMIC_MMU_CLEANUP #define DATA_SIZE 1 #include "atomic_template.h" @@ -1828,7 +1836,7 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #undef ATOMIC_MMU_LOOKUP #define EXTRA_ARGS , TCGMemOpIdx oi #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi) +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) #define DATA_SIZE 1 #include "atomic_template.h" diff --git a/exec.c b/exec.c index 961d7d6497..7d835b1a2b 100644 --- a/exec.c +++ b/exec.c @@ -2718,50 +2718,6 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) return block->offset + offset; } -/* Called within RCU critical section. */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size) -{ - ndi->cpu = cpu; - ndi->ram_addr = ram_addr; - ndi->mem_vaddr = mem_vaddr; - ndi->size = size; - ndi->pages = NULL; - - trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); - - assert(tcg_enabled()); - if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { - ndi->pages = page_collection_lock(ram_addr, ram_addr + size); - tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size); - } -} - -/* Called within RCU critical section. */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi) -{ - if (ndi->pages) { - assert(tcg_enabled()); - page_collection_unlock(ndi->pages); - ndi->pages = NULL; - } - - /* Set both VGA and migration bits for simplicity and to remove - * the notdirty callback faster. - */ - cpu_physical_memory_set_dirty_range(ndi->ram_addr, ndi->size, - DIRTY_CLIENTS_NOCODE); - /* we remove the notdirty callback only if the code has been - flushed */ - if (!cpu_physical_memory_is_clean(ndi->ram_addr)) { - trace_memory_notdirty_set_dirty(ndi->mem_vaddr); - tlb_set_dirty(ndi->cpu, ndi->mem_vaddr); - } -} - /* Generate a debug exception if a watchpoint has been hit. */ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra) diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h index ef4fb92371..9fcc2af25c 100644 --- a/include/exec/memory-internal.h +++ b/include/exec/memory-internal.h @@ -49,70 +49,5 @@ void address_space_dispatch_free(AddressSpaceDispatch *d); void mtree_print_dispatch(struct AddressSpaceDispatch *d, MemoryRegion *root); - -struct page_collection; - -/* Opaque struct for passing info from memory_notdirty_write_prepare() - * to memory_notdirty_write_complete(). Callers should treat all fields - * as private, with the exception of @active. - * - * @active is a field which is not touched by either the prepare or - * complete functions, but which the caller can use if it wishes to - * track whether it has called prepare for this struct and so needs - * to later call the complete function. - */ -typedef struct { - CPUState *cpu; - struct page_collection *pages; - ram_addr_t ram_addr; - vaddr mem_vaddr; - unsigned size; - bool active; -} NotDirtyInfo; - -/** - * memory_notdirty_write_prepare: call before writing to non-dirty memory - * @ndi: pointer to opaque NotDirtyInfo struct - * @cpu: CPU doing the write - * @mem_vaddr: virtual address of write - * @ram_addr: the ram address of the write - * @size: size of write in bytes - * - * Any code which writes to the host memory corresponding to - * guest RAM which has been marked as NOTDIRTY must wrap those - * writes in calls to memory_notdirty_write_prepare() and - * memory_notdirty_write_complete(): - * - * NotDirtyInfo ndi; - * memory_notdirty_write_prepare(&ndi, ....); - * ... perform write here ... - * memory_notdirty_write_complete(&ndi); - * - * These calls will ensure that we flush any TCG translated code for - * the memory being written, update the dirty bits and (if possible) - * remove the slowpath callback for writing to the memory. - * - * This must only be called if we are using TCG; it will assert otherwise. - * - * We may take locks in the prepare call, so callers must ensure that - * they don't exit (via longjump or otherwise) without calling complete. - * - * This call must only be made inside an RCU critical section. - * (Note that while we're executing a TCG TB we're always in an - * RCU critical section, which is likely to be the case for callers - * of these functions.) - */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size); -/** - * memory_notdirty_write_complete: finish write to non-dirty memory - * @ndi: pointer to the opaque NotDirtyInfo struct which was initialized - * by memory_not_dirty_write_prepare(). - */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi); - #endif #endif -- cgit 1.4.1 From ce9f5e279223cf422eae8e228fe03d6f60e251a4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 20:03:36 -0700 Subject: cputlb: Remove tb_invalidate_phys_page_range is_cpu_write_access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All callers pass false to this argument. Remove it and pass the constant on to tb_invalidate_phys_page_range__locked. Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 6 ++---- accel/tcg/translate-all.h | 3 +-- exec.c | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'exec.c') diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 5d1e08b169..de4b697163 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1983,8 +1983,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, * * Called with mmap_lock held for user-mode emulation */ -void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access) +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end) { struct page_collection *pages; PageDesc *p; @@ -1996,8 +1995,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, return; } pages = page_collection_lock(start, end); - tb_invalidate_phys_page_range__locked(pages, p, start, end, - is_cpu_write_access); + tb_invalidate_phys_page_range__locked(pages, p, start, end, 0); page_collection_unlock(pages); } diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h index 64f5fd9a05..31f2117188 100644 --- a/accel/tcg/translate-all.h +++ b/accel/tcg/translate-all.h @@ -28,8 +28,7 @@ struct page_collection *page_collection_lock(tb_page_addr_t start, void page_collection_unlock(struct page_collection *set); void tb_invalidate_phys_page_fast(struct page_collection *pages, tb_page_addr_t start, int len); -void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access); +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end); void tb_check_watchpoint(CPUState *cpu); #ifdef CONFIG_USER_ONLY diff --git a/exec.c b/exec.c index 7d835b1a2b..b3df826039 100644 --- a/exec.c +++ b/exec.c @@ -1012,7 +1012,7 @@ const char *parse_cpu_option(const char *cpu_option) void tb_invalidate_phys_addr(target_ulong addr) { mmap_lock(); - tb_invalidate_phys_page_range(addr, addr + 1, 0); + tb_invalidate_phys_page_range(addr, addr + 1); mmap_unlock(); } @@ -1039,7 +1039,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) return; } ram_addr = memory_region_get_ram_addr(mr) + addr; - tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); + tb_invalidate_phys_page_range(ram_addr, ram_addr + 1); rcu_read_unlock(); } -- cgit 1.4.1 From ae57db63acf5a0399232f852acc5c1d83ef63400 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 20:24:12 -0700 Subject: cputlb: Pass retaddr to tb_check_watchpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the previous TLB_WATCHPOINT patches because we are currently failing to set cpu->mem_io_pc with the call to cpu_check_watchpoint. Pass down the retaddr directly because it's readily available. Fixes: 50b107c5d61 Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 6 +++--- accel/tcg/translate-all.h | 2 +- exec.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'exec.c') diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index db77fb221b..66d4bc4341 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2142,16 +2142,16 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) #endif /* user-mode: call with mmap_lock held */ -void tb_check_watchpoint(CPUState *cpu) +void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) { TranslationBlock *tb; assert_memory_lock(); - tb = tcg_tb_lookup(cpu->mem_io_pc); + tb = tcg_tb_lookup(retaddr); if (tb) { /* We can use retranslation to find the PC. */ - cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc, true); + cpu_restore_state_from_tb(cpu, tb, retaddr, true); tb_phys_invalidate(tb, -1); } else { /* The exception probably happened in a helper. The CPU state should diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h index 135c1ea96a..a557b4e2bb 100644 --- a/accel/tcg/translate-all.h +++ b/accel/tcg/translate-all.h @@ -30,7 +30,7 @@ void tb_invalidate_phys_page_fast(struct page_collection *pages, tb_page_addr_t start, int len, uintptr_t retaddr); void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end); -void tb_check_watchpoint(CPUState *cpu); +void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr); #ifdef CONFIG_USER_ONLY int page_unprotect(target_ulong address, uintptr_t pc); diff --git a/exec.c b/exec.c index b3df826039..8a0a6613b1 100644 --- a/exec.c +++ b/exec.c @@ -2758,7 +2758,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, cpu->watchpoint_hit = wp; mmap_lock(); - tb_check_watchpoint(cpu); + tb_check_watchpoint(cpu, ra); if (wp->flags & BP_STOP_BEFORE_ACCESS) { cpu->exception_index = EXCP_DEBUG; mmap_unlock(); -- cgit 1.4.1