From 1f6f2b34ad2b9522e3db898550c90dfc4b4c44ed Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 13 Sep 2019 11:29:35 -0400 Subject: exec: Use TARGET_PAGE_BITS_MIN for TLB flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These bits do not need to vary with the actual page size used by the guest. Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- include/exec/cpu-all.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/exec') diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index d2d443c4f9..e0c8dc540c 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -317,20 +317,24 @@ CPUArchState *cpu_copy(CPUArchState *env); #if !defined(CONFIG_USER_ONLY) -/* Flags stored in the low bits of the TLB virtual address. These are - * defined so that fast path ram access is all zeros. +/* + * Flags stored in the low bits of the TLB virtual address. + * These are defined so that fast path ram access is all zeros. * The flags all must be between TARGET_PAGE_BITS and * maximum address alignment bit. + * + * Use TARGET_PAGE_BITS_MIN so that these bits are constant + * when TARGET_PAGE_BITS_VARY is in effect. */ /* Zero if TLB entry is valid. */ -#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS - 1)) +#define TLB_INVALID_MASK (1 << (TARGET_PAGE_BITS_MIN - 1)) /* Set if TLB entry references a clean RAM page. The iotlb entry will contain the page physical address. */ -#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS - 2)) +#define TLB_NOTDIRTY (1 << (TARGET_PAGE_BITS_MIN - 2)) /* Set if TLB entry is an IO callback. */ -#define TLB_MMIO (1 << (TARGET_PAGE_BITS - 3)) +#define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) /* Set if TLB entry contains a watchpoint. */ -#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS - 4)) +#define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) /* Use this mask to check interception with an alignment mask * in a TCG backend. -- cgit 1.4.1 From 5b87b3e671c00fd054bcbf4668d99d56b33c598c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 10 Sep 2019 15:47:39 -0400 Subject: cputlb: Introduce TLB_BSWAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle bswap on ram directly in load/store_helper. This fixes a bug with the previous implementation in that one cannot use the I/O path for RAM. Fixes: a26fc6f5152b47f1 Reviewed-by: Alex Bennée Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 72 ++++++++++++++++++++++++++++++-------------------- include/exec/cpu-all.h | 4 ++- 2 files changed, 46 insertions(+), 30 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index eeba8c9847..028eebcb44 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -737,8 +737,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, address |= TLB_INVALID_MASK; } if (attrs.byte_swap) { - /* Force the access through the I/O slow path. */ - address |= TLB_MMIO; + address |= TLB_BSWAP; } if (!memory_region_is_ram(section->mr) && !memory_region_is_romd(section->mr)) { @@ -901,10 +900,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (iotlbentry->attrs.byte_swap) { - op ^= MO_BSWAP; - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -947,10 +942,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (iotlbentry->attrs.byte_swap) { - op ^= MO_BSWAP; - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -1133,8 +1124,8 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, wp_access, retaddr); } - if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) { - /* I/O access */ + /* Reject I/O access, or other required slow-path. */ + if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP)) { return NULL; } @@ -1344,6 +1335,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, /* Handle anything that isn't just a straight memory access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { CPUIOTLBEntry *iotlbentry; + bool need_swap; /* For anything that is unaligned, recurse through full_load. */ if ((addr & (size - 1)) != 0) { @@ -1357,17 +1349,27 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, /* On watchpoint hit, this will longjmp out. */ cpu_check_watchpoint(env_cpu(env), addr, size, iotlbentry->attrs, BP_MEM_READ, retaddr); - - /* The backing page may or may not require I/O. */ - tlb_addr &= ~TLB_WATCHPOINT; - if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) { - goto do_aligned_access; - } } + need_swap = size > 1 && (tlb_addr & TLB_BSWAP); + /* Handle I/O access. */ - return io_readx(env, iotlbentry, mmu_idx, addr, - retaddr, access_type, op); + if (likely(tlb_addr & TLB_MMIO)) { + return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, + access_type, op ^ (need_swap * MO_BSWAP)); + } + + haddr = (void *)((uintptr_t)addr + entry->addend); + + /* + * Keep these two load_memop separate to ensure that the compiler + * is able to fold the entire function to a single instruction. + * There is a build-time assert inside to remind you of this. ;-) + */ + if (unlikely(need_swap)) { + return load_memop(haddr, op ^ MO_BSWAP); + } + return load_memop(haddr, op); } /* Handle slow unaligned access (it spans two pages or IO). */ @@ -1394,7 +1396,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, return res & MAKE_64BIT_MASK(0, size * 8); } - do_aligned_access: haddr = (void *)((uintptr_t)addr + entry->addend); return load_memop(haddr, op); } @@ -1591,6 +1592,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* Handle anything that isn't just a straight memory access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { CPUIOTLBEntry *iotlbentry; + bool need_swap; /* For anything that is unaligned, recurse through byte stores. */ if ((addr & (size - 1)) != 0) { @@ -1604,16 +1606,29 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, /* On watchpoint hit, this will longjmp out. */ cpu_check_watchpoint(env_cpu(env), addr, size, iotlbentry->attrs, BP_MEM_WRITE, retaddr); - - /* The backing page may or may not require I/O. */ - tlb_addr &= ~TLB_WATCHPOINT; - if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) { - goto do_aligned_access; - } } + need_swap = size > 1 && (tlb_addr & TLB_BSWAP); + /* Handle I/O access. */ - io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op); + if (likely(tlb_addr & (TLB_MMIO | TLB_NOTDIRTY))) { + io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, + op ^ (need_swap * MO_BSWAP)); + return; + } + + haddr = (void *)((uintptr_t)addr + entry->addend); + + /* + * Keep these two store_memop separate to ensure that the compiler + * is able to fold the entire function to a single instruction. + * There is a build-time assert inside to remind you of this. ;-) + */ + if (unlikely(need_swap)) { + store_memop(haddr, val, op ^ MO_BSWAP); + } else { + store_memop(haddr, val, op); + } return; } @@ -1682,7 +1697,6 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } - do_aligned_access: haddr = (void *)((uintptr_t)addr + entry->addend); store_memop(haddr, val, op); } diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index e0c8dc540c..d148bded35 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -335,12 +335,14 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_MMIO (1 << (TARGET_PAGE_BITS_MIN - 3)) /* Set if TLB entry contains a watchpoint. */ #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) +/* Set if TLB entry requires byte swap. */ +#define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) /* Use this mask to check interception with an alignment mask * in a TCG backend. */ #define TLB_FLAGS_MASK \ - (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT) + (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT | TLB_BSWAP) /** * tlb_hit_page: return true if page aligned @addr is a hit against the -- cgit 1.4.1 From 7b0d792ce13c0e894ee32a94b321b329724c9a25 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 19 Sep 2019 17:54:10 -0700 Subject: cputlb: Move ROM handling from I/O path to TLB path It does not require going through the whole I/O path in order to discard a write. Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 36 +++++++++++++++++++++--------------- exec.c | 41 +---------------------------------------- include/exec/cpu-all.h | 5 ++++- include/exec/cpu-common.h | 1 - 4 files changed, 26 insertions(+), 57 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 028eebcb44..404ec57a4e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -577,7 +577,8 @@ static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, { uintptr_t addr = tlb_entry->addr_write; - if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { + if ((addr & (TLB_INVALID_MASK | TLB_MMIO | + TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { addr &= TARGET_PAGE_MASK; addr += tlb_entry->addend; if ((addr - start) < length) { @@ -745,7 +746,6 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, address |= TLB_MMIO; addend = 0; } else { - /* TLB_MMIO for rom/romd handled below */ addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; } @@ -822,16 +822,17 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, tn.addr_write = -1; if (prot & PAGE_WRITE) { - if ((memory_region_is_ram(section->mr) && section->readonly) - || memory_region_is_romd(section->mr)) { - /* Write access calls the I/O callback. */ - tn.addr_write = address | TLB_MMIO; - } else if (memory_region_is_ram(section->mr) - && cpu_physical_memory_is_clean( - memory_region_get_ram_addr(section->mr) + xlat)) { - tn.addr_write = address | TLB_NOTDIRTY; - } else { - tn.addr_write = address; + tn.addr_write = address; + if (memory_region_is_romd(section->mr)) { + /* Use the MMIO path so that the device can switch states. */ + tn.addr_write |= TLB_MMIO; + } else if (memory_region_is_ram(section->mr)) { + if (section->readonly) { + tn.addr_write |= TLB_DISCARD_WRITE; + } else if (cpu_physical_memory_is_clean( + memory_region_get_ram_addr(section->mr) + xlat)) { + tn.addr_write |= TLB_NOTDIRTY; + } } if (prot & PAGE_WRITE_INV) { tn.addr_write |= TLB_INVALID_MASK; @@ -904,7 +905,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; - if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { + if (mr != &io_mem_notdirty && !cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } @@ -945,7 +946,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; - if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { + if (mr != &io_mem_notdirty && !cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } cpu->mem_io_vaddr = addr; @@ -1125,7 +1126,7 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, } /* Reject I/O access, or other required slow-path. */ - if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP)) { + if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { return NULL; } @@ -1617,6 +1618,11 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } + /* Ignore writes to ROM. */ + if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { + return; + } + haddr = (void *)((uintptr_t)addr + entry->addend); /* diff --git a/exec.c b/exec.c index 5f2587b621..ea8c0b18ac 100644 --- a/exec.c +++ b/exec.c @@ -88,7 +88,7 @@ static MemoryRegion *system_io; AddressSpace address_space_io; AddressSpace address_space_memory; -MemoryRegion io_mem_rom, io_mem_notdirty; +MemoryRegion io_mem_notdirty; static MemoryRegion io_mem_unassigned; #endif @@ -192,7 +192,6 @@ typedef struct subpage_t { #define PHYS_SECTION_UNASSIGNED 0 #define PHYS_SECTION_NOTDIRTY 1 -#define PHYS_SECTION_ROM 2 static void io_mem_init(void); static void memory_map_init(void); @@ -1475,8 +1474,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, iotlb = memory_region_get_ram_addr(section->mr) + xlat; if (!section->readonly) { iotlb |= PHYS_SECTION_NOTDIRTY; - } else { - iotlb |= PHYS_SECTION_ROM; } } else { AddressSpaceDispatch *d; @@ -3002,38 +2999,6 @@ static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) return phys_section_add(map, §ion); } -static void readonly_mem_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - /* Ignore any write to ROM. */ -} - -static bool readonly_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return is_write; -} - -/* This will only be used for writes, because reads are special cased - * to directly access the underlying host ram. - */ -static const MemoryRegionOps readonly_mem_ops = { - .write = readonly_mem_write, - .valid.accepts = readonly_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - MemoryRegionSection *iotlb_to_section(CPUState *cpu, hwaddr index, MemTxAttrs attrs) { @@ -3047,8 +3012,6 @@ MemoryRegionSection *iotlb_to_section(CPUState *cpu, static void io_mem_init(void) { - memory_region_init_io(&io_mem_rom, NULL, &readonly_mem_ops, - NULL, NULL, UINT64_MAX); memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); @@ -3069,8 +3032,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) assert(n == PHYS_SECTION_UNASSIGNED); n = dummy_section(&d->map, fv, &io_mem_notdirty); assert(n == PHYS_SECTION_NOTDIRTY); - n = dummy_section(&d->map, fv, &io_mem_rom); - assert(n == PHYS_SECTION_ROM); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index d148bded35..ad9ab85eb3 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -337,12 +337,15 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) /* Set if TLB entry requires byte swap. */ #define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) +/* Set if TLB entry writes ignored. */ +#define TLB_DISCARD_WRITE (1 << (TARGET_PAGE_BITS_MIN - 6)) /* Use this mask to check interception with an alignment mask * in a TCG backend. */ #define TLB_FLAGS_MASK \ - (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT | TLB_BSWAP) + (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \ + | TLB_WATCHPOINT | TLB_BSWAP | TLB_DISCARD_WRITE) /** * tlb_hit_page: return true if page aligned @addr is a hit against the diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index f7dbe75fbc..1c0e03ddc2 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -100,7 +100,6 @@ void qemu_flush_coalesced_mmio_buffer(void); void cpu_flush_icache_range(hwaddr start, hwaddr len); -extern struct MemoryRegion io_mem_rom; extern struct MemoryRegion io_mem_notdirty; typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); -- cgit 1.4.1 From 08565552f70ca37da13f24928727f851073cd13e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 18 Sep 2019 09:15:44 -0700 Subject: cputlb: Move NOTDIRTY handling from I/O path to TLB path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pages that we want to track for NOTDIRTY are RAM. We do not really need to go through the I/O path to handle them. Acked-by: David Hildenbrand Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 26 +++++++++++++++++++++--- exec.c | 50 ----------------------------------------------- include/exec/cpu-common.h | 2 -- memory.c | 16 --------------- 4 files changed, 23 insertions(+), 71 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 404ec57a4e..7e9a0f7ac8 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -905,7 +905,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; - if (mr != &io_mem_notdirty && !cpu->can_do_io) { + if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } @@ -946,7 +946,7 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; - if (mr != &io_mem_notdirty && !cpu->can_do_io) { + if (!cpu->can_do_io) { cpu_io_recompile(cpu, retaddr); } cpu->mem_io_vaddr = addr; @@ -1612,7 +1612,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, need_swap = size > 1 && (tlb_addr & TLB_BSWAP); /* Handle I/O access. */ - if (likely(tlb_addr & (TLB_MMIO | TLB_NOTDIRTY))) { + if (tlb_addr & TLB_MMIO) { io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op ^ (need_swap * MO_BSWAP)); return; @@ -1625,6 +1625,26 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, haddr = (void *)((uintptr_t)addr + entry->addend); + /* Handle clean RAM pages. */ + if (tlb_addr & TLB_NOTDIRTY) { + NotDirtyInfo ndi; + + /* We require mem_io_pc in tb_invalidate_phys_page_range. */ + env_cpu(env)->mem_io_pc = retaddr; + + memory_notdirty_write_prepare(&ndi, env_cpu(env), addr, + addr + iotlbentry->addr, size); + + if (unlikely(need_swap)) { + store_memop(haddr, val, op ^ MO_BSWAP); + } else { + store_memop(haddr, val, op); + } + + memory_notdirty_write_complete(&ndi); + return; + } + /* * Keep these two store_memop separate to ensure that the compiler * is able to fold the entire function to a single instruction. diff --git a/exec.c b/exec.c index ea8c0b18ac..dc7001f115 100644 --- a/exec.c +++ b/exec.c @@ -88,7 +88,6 @@ static MemoryRegion *system_io; AddressSpace address_space_io; AddressSpace address_space_memory; -MemoryRegion io_mem_notdirty; static MemoryRegion io_mem_unassigned; #endif @@ -191,7 +190,6 @@ typedef struct subpage_t { } subpage_t; #define PHYS_SECTION_UNASSIGNED 0 -#define PHYS_SECTION_NOTDIRTY 1 static void io_mem_init(void); static void memory_map_init(void); @@ -1472,9 +1470,6 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, if (memory_region_is_ram(section->mr)) { /* Normal RAM. */ iotlb = memory_region_get_ram_addr(section->mr) + xlat; - if (!section->readonly) { - iotlb |= PHYS_SECTION_NOTDIRTY; - } } else { AddressSpaceDispatch *d; @@ -2783,42 +2778,6 @@ void memory_notdirty_write_complete(NotDirtyInfo *ndi) } } -/* Called within RCU critical section. */ -static void notdirty_mem_write(void *opaque, hwaddr ram_addr, - uint64_t val, unsigned size) -{ - NotDirtyInfo ndi; - - memory_notdirty_write_prepare(&ndi, current_cpu, current_cpu->mem_io_vaddr, - ram_addr, size); - - stn_p(qemu_map_ram_ptr(NULL, ram_addr), size, val); - memory_notdirty_write_complete(&ndi); -} - -static bool notdirty_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return is_write; -} - -static const MemoryRegionOps notdirty_mem_ops = { - .write = notdirty_mem_write, - .valid.accepts = notdirty_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - /* Generate a debug exception if a watchpoint has been hit. */ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra) @@ -3014,13 +2973,6 @@ static void io_mem_init(void) { memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); - - /* io_mem_notdirty calls tb_invalidate_phys_page_fast, - * which can be called without the iothread mutex. - */ - memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL, - NULL, UINT64_MAX); - memory_region_clear_global_locking(&io_mem_notdirty); } AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) @@ -3030,8 +2982,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) n = dummy_section(&d->map, fv, &io_mem_unassigned); assert(n == PHYS_SECTION_UNASSIGNED); - n = dummy_section(&d->map, fv, &io_mem_notdirty); - assert(n == PHYS_SECTION_NOTDIRTY); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 1c0e03ddc2..81753bbb34 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -100,8 +100,6 @@ void qemu_flush_coalesced_mmio_buffer(void); void cpu_flush_icache_range(hwaddr start, hwaddr len); -extern struct MemoryRegion io_mem_notdirty; - typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); diff --git a/memory.c b/memory.c index 57c44c97db..a99b8c0767 100644 --- a/memory.c +++ b/memory.c @@ -434,10 +434,6 @@ static MemTxResult memory_region_read_accessor(MemoryRegion *mr, tmp = mr->ops->read(mr->opaque, addr, size); if (mr->subpage) { trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); @@ -460,10 +456,6 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr, r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs); if (mr->subpage) { trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); @@ -484,10 +476,6 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr, if (mr->subpage) { trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); @@ -508,10 +496,6 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr, if (mr->subpage) { trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); - } else if (mr == &io_mem_notdirty) { - /* Accesses to code which has previously been translated into a TB show - * up in the MMIO path, as accesses to the io_mem_notdirty - * MemoryRegion. */ } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); -- cgit 1.4.1 From 8f5db6415363740d4eac070bb381202c80a7fc34 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 19 Sep 2019 21:09:58 -0700 Subject: cputlb: Partially inline memory_region_section_get_iotlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only one caller, tlb_set_page_with_attrs. We cannot inline the entire function because the AddressSpaceDispatch structure is private to exec.c, and cannot easily be moved to include/exec/memory-internal.h. Compute is_ram and is_romd once within tlb_set_page_with_attrs. Fold the number of tests against these predicates. Compute cpu_physical_memory_is_clean outside of the tlb lock region. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 66 +++++++++++++++++++++++++++++++------------------ exec.c | 22 +++-------------- include/exec/exec-all.h | 6 +---- 3 files changed, 46 insertions(+), 48 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 7e9a0f7ac8..4f118d2cc9 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -705,13 +705,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, MemoryRegionSection *section; unsigned int index; target_ulong address; - target_ulong code_address; + target_ulong write_address; uintptr_t addend; CPUTLBEntry *te, tn; hwaddr iotlb, xlat, sz, paddr_page; target_ulong vaddr_page; int asidx = cpu_asidx_from_attrs(cpu, attrs); int wp_flags; + bool is_ram, is_romd; assert_cpu_is_self(cpu); @@ -740,18 +741,46 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, if (attrs.byte_swap) { address |= TLB_BSWAP; } - if (!memory_region_is_ram(section->mr) && - !memory_region_is_romd(section->mr)) { - /* IO memory case */ - address |= TLB_MMIO; + + is_ram = memory_region_is_ram(section->mr); + is_romd = memory_region_is_romd(section->mr); + + if (is_ram || is_romd) { + /* RAM and ROMD both have associated host memory. */ + addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; + } else { + /* I/O does not; force the host address to NULL. */ addend = 0; + } + + write_address = address; + if (is_ram) { + iotlb = memory_region_get_ram_addr(section->mr) + xlat; + /* + * Computing is_clean is expensive; avoid all that unless + * the page is actually writable. + */ + if (prot & PAGE_WRITE) { + if (section->readonly) { + write_address |= TLB_DISCARD_WRITE; + } else if (cpu_physical_memory_is_clean(iotlb)) { + write_address |= TLB_NOTDIRTY; + } + } } else { - addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; + /* I/O or ROMD */ + iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; + /* + * Writes to romd devices must go through MMIO to enable write. + * Reads to romd devices go through the ram_ptr found above, + * but of course reads to I/O must go through MMIO. + */ + write_address |= TLB_MMIO; + if (!is_romd) { + address = write_address; + } } - code_address = address; - iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, - paddr_page, xlat, prot, &address); wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, TARGET_PAGE_SIZE); @@ -791,8 +820,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, /* * At this point iotlb contains a physical section number in the lower * TARGET_PAGE_BITS, and either - * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM) - * + the offset within section->mr of the page base (otherwise) + * + the ram_addr_t of the page base of the target RAM (RAM) + * + the offset within section->mr of the page base (I/O, ROMD) * We subtract the vaddr_page (which is page aligned and thus won't * disturb the low bits) to give an offset which can be added to the * (non-page-aligned) vaddr of the eventual memory access to get @@ -815,25 +844,14 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } if (prot & PAGE_EXEC) { - tn.addr_code = code_address; + tn.addr_code = address; } else { tn.addr_code = -1; } tn.addr_write = -1; if (prot & PAGE_WRITE) { - tn.addr_write = address; - if (memory_region_is_romd(section->mr)) { - /* Use the MMIO path so that the device can switch states. */ - tn.addr_write |= TLB_MMIO; - } else if (memory_region_is_ram(section->mr)) { - if (section->readonly) { - tn.addr_write |= TLB_DISCARD_WRITE; - } else if (cpu_physical_memory_is_clean( - memory_region_get_ram_addr(section->mr) + xlat)) { - tn.addr_write |= TLB_NOTDIRTY; - } - } + tn.addr_write = write_address; if (prot & PAGE_WRITE_INV) { tn.addr_write |= TLB_INVALID_MASK; } diff --git a/exec.c b/exec.c index dc7001f115..961d7d6497 100644 --- a/exec.c +++ b/exec.c @@ -1459,26 +1459,10 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, /* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address) + MemoryRegionSection *section) { - hwaddr iotlb; - - if (memory_region_is_ram(section->mr)) { - /* Normal RAM. */ - iotlb = memory_region_get_ram_addr(section->mr) + xlat; - } else { - AddressSpaceDispatch *d; - - d = flatview_to_dispatch(section->fv); - iotlb = section - d->map.sections; - iotlb += xlat; - } - - return iotlb; + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); + return section - d->map.sections; } #endif /* defined(CONFIG_USER_ONLY) */ diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 81b02eb2fe..49db07ba0b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -509,11 +509,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, hwaddr *xlat, hwaddr *plen, MemTxAttrs attrs, int *prot); hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address); + MemoryRegionSection *section); #endif /* vl.c */ -- cgit 1.4.1 From 707526ad865dc4064c3984bcc061596a21bf9d3b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Sep 2019 18:47:59 -0700 Subject: cputlb: Merge and move memory_notdirty_write_{prepare,complete} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 9458a9a1df1a, all readers of the dirty bitmaps wait for the rcu lock, which means that they wait until the end of any executing TranslationBlock. As a consequence, there is no need for the actual access to happen in between the _prepare and _complete. Therefore, we can improve things by merging the two functions into notdirty_write and dropping the NotDirtyInfo structure. In addition, the only users of notdirty_write are in cputlb.c, so move the merged function there. Pass in the CPUIOTLBEntry from which the ram_addr_t may be computed. Reviewed-by: David Hildenbrand Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 76 +++++++++++++++++++++++------------------- exec.c | 44 ------------------------ include/exec/memory-internal.h | 65 ------------------------------------ 3 files changed, 42 insertions(+), 143 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 4f118d2cc9..3e91838519 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -33,6 +33,7 @@ #include "exec/helper-proto.h" #include "qemu/atomic.h" #include "qemu/atomic128.h" +#include "translate-all.h" /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ /* #define DEBUG_TLB */ @@ -1085,6 +1086,37 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) return qemu_ram_addr_from_host_nofail(p); } +static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, + CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) +{ + ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; + + trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); + + if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { + struct page_collection *pages + = page_collection_lock(ram_addr, ram_addr + size); + + /* We require mem_io_pc in tb_invalidate_phys_page_range. */ + cpu->mem_io_pc = retaddr; + + tb_invalidate_phys_page_fast(pages, ram_addr, size); + page_collection_unlock(pages); + } + + /* + * Set both VGA and migration bits for simplicity and to remove + * the notdirty callback faster. + */ + cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); + + /* We remove the notdirty callback only if the code has been flushed. */ + if (!cpu_physical_memory_is_clean(ram_addr)) { + trace_memory_notdirty_set_dirty(mem_vaddr); + tlb_set_dirty(cpu, mem_vaddr); + } +} + /* * Probe for whether the specified guest access is permitted. If it is not * permitted then an exception will be taken in the same way as if this @@ -1204,8 +1236,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, /* Probe for a read-modify-write atomic operation. Do not allow unaligned * operations, or io operations to proceed. Return the host address. */ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr, - NotDirtyInfo *ndi) + TCGMemOpIdx oi, uintptr_t retaddr) { size_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); @@ -1265,12 +1296,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, hostaddr = (void *)((uintptr_t)addr + tlbe->addend); - ndi->active = false; if (unlikely(tlb_addr & TLB_NOTDIRTY)) { - ndi->active = true; - memory_notdirty_write_prepare(ndi, env_cpu(env), addr, - qemu_ram_addr_from_host_nofail(hostaddr), - 1 << s_bits); + notdirty_write(env_cpu(env), addr, 1 << s_bits, + &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); } return hostaddr; @@ -1641,28 +1669,13 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val, return; } - haddr = (void *)((uintptr_t)addr + entry->addend); - /* Handle clean RAM pages. */ if (tlb_addr & TLB_NOTDIRTY) { - NotDirtyInfo ndi; - - /* We require mem_io_pc in tb_invalidate_phys_page_range. */ - env_cpu(env)->mem_io_pc = retaddr; - - memory_notdirty_write_prepare(&ndi, env_cpu(env), addr, - addr + iotlbentry->addr, size); - - if (unlikely(need_swap)) { - store_memop(haddr, val, op ^ MO_BSWAP); - } else { - store_memop(haddr, val, op); - } - - memory_notdirty_write_complete(&ndi); - return; + notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); } + haddr = (void *)((uintptr_t)addr + entry->addend); + /* * Keep these two store_memop separate to ensure that the compiler * is able to fold the entire function to a single instruction. @@ -1793,14 +1806,9 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr #define ATOMIC_NAME(X) \ HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) -#define ATOMIC_MMU_DECLS NotDirtyInfo ndi -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi) -#define ATOMIC_MMU_CLEANUP \ - do { \ - if (unlikely(ndi.active)) { \ - memory_notdirty_write_complete(&ndi); \ - } \ - } while (0) +#define ATOMIC_MMU_DECLS +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) +#define ATOMIC_MMU_CLEANUP #define DATA_SIZE 1 #include "atomic_template.h" @@ -1828,7 +1836,7 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, #undef ATOMIC_MMU_LOOKUP #define EXTRA_ARGS , TCGMemOpIdx oi #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi) +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) #define DATA_SIZE 1 #include "atomic_template.h" diff --git a/exec.c b/exec.c index 961d7d6497..7d835b1a2b 100644 --- a/exec.c +++ b/exec.c @@ -2718,50 +2718,6 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) return block->offset + offset; } -/* Called within RCU critical section. */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size) -{ - ndi->cpu = cpu; - ndi->ram_addr = ram_addr; - ndi->mem_vaddr = mem_vaddr; - ndi->size = size; - ndi->pages = NULL; - - trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); - - assert(tcg_enabled()); - if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { - ndi->pages = page_collection_lock(ram_addr, ram_addr + size); - tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size); - } -} - -/* Called within RCU critical section. */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi) -{ - if (ndi->pages) { - assert(tcg_enabled()); - page_collection_unlock(ndi->pages); - ndi->pages = NULL; - } - - /* Set both VGA and migration bits for simplicity and to remove - * the notdirty callback faster. - */ - cpu_physical_memory_set_dirty_range(ndi->ram_addr, ndi->size, - DIRTY_CLIENTS_NOCODE); - /* we remove the notdirty callback only if the code has been - flushed */ - if (!cpu_physical_memory_is_clean(ndi->ram_addr)) { - trace_memory_notdirty_set_dirty(ndi->mem_vaddr); - tlb_set_dirty(ndi->cpu, ndi->mem_vaddr); - } -} - /* Generate a debug exception if a watchpoint has been hit. */ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, MemTxAttrs attrs, int flags, uintptr_t ra) diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h index ef4fb92371..9fcc2af25c 100644 --- a/include/exec/memory-internal.h +++ b/include/exec/memory-internal.h @@ -49,70 +49,5 @@ void address_space_dispatch_free(AddressSpaceDispatch *d); void mtree_print_dispatch(struct AddressSpaceDispatch *d, MemoryRegion *root); - -struct page_collection; - -/* Opaque struct for passing info from memory_notdirty_write_prepare() - * to memory_notdirty_write_complete(). Callers should treat all fields - * as private, with the exception of @active. - * - * @active is a field which is not touched by either the prepare or - * complete functions, but which the caller can use if it wishes to - * track whether it has called prepare for this struct and so needs - * to later call the complete function. - */ -typedef struct { - CPUState *cpu; - struct page_collection *pages; - ram_addr_t ram_addr; - vaddr mem_vaddr; - unsigned size; - bool active; -} NotDirtyInfo; - -/** - * memory_notdirty_write_prepare: call before writing to non-dirty memory - * @ndi: pointer to opaque NotDirtyInfo struct - * @cpu: CPU doing the write - * @mem_vaddr: virtual address of write - * @ram_addr: the ram address of the write - * @size: size of write in bytes - * - * Any code which writes to the host memory corresponding to - * guest RAM which has been marked as NOTDIRTY must wrap those - * writes in calls to memory_notdirty_write_prepare() and - * memory_notdirty_write_complete(): - * - * NotDirtyInfo ndi; - * memory_notdirty_write_prepare(&ndi, ....); - * ... perform write here ... - * memory_notdirty_write_complete(&ndi); - * - * These calls will ensure that we flush any TCG translated code for - * the memory being written, update the dirty bits and (if possible) - * remove the slowpath callback for writing to the memory. - * - * This must only be called if we are using TCG; it will assert otherwise. - * - * We may take locks in the prepare call, so callers must ensure that - * they don't exit (via longjump or otherwise) without calling complete. - * - * This call must only be made inside an RCU critical section. - * (Note that while we're executing a TCG TB we're always in an - * RCU critical section, which is likely to be the case for callers - * of these functions.) - */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size); -/** - * memory_notdirty_write_complete: finish write to non-dirty memory - * @ndi: pointer to the opaque NotDirtyInfo struct which was initialized - * by memory_not_dirty_write_prepare(). - */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi); - #endif #endif -- cgit 1.4.1