diff options
76 files changed, 1670 insertions, 1022 deletions
diff --git a/.gitlab-ci.d/cirrus/kvm-build.yml b/.gitlab-ci.d/cirrus/kvm-build.yml index 4334fabf39..a93881aa8b 100644 --- a/.gitlab-ci.d/cirrus/kvm-build.yml +++ b/.gitlab-ci.d/cirrus/kvm-build.yml @@ -15,7 +15,7 @@ env: folder: $HOME/.cache/qemu-vm install_script: - dnf update -y - - dnf install -y git make openssh-clients qemu-img qemu-system-x86 wget + - dnf install -y git make openssh-clients qemu-img qemu-system-x86 wget meson clone_script: - git clone --depth 100 "$CI_REPOSITORY_URL" . - git fetch origin "$CI_COMMIT_REF_NAME" diff --git a/VERSION b/VERSION index 66ab90161b..eb3e782582 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -8.0.92 +8.0.93 diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index ba44501a7c..d68fa6867c 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1363,6 +1363,21 @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, } } +/* + * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin. + * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match + * because of the side effect of io_writex changing memory layout. + */ +static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section, + hwaddr mr_offset) +{ +#ifdef CONFIG_PLUGIN + SavedIOTLB *saved = &cs->saved_iotlb; + saved->section = section; + saved->mr_offset = mr_offset; +#endif +} + static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, int mmu_idx, vaddr addr, uintptr_t retaddr, MMUAccessType access_type, MemOp op) @@ -1382,6 +1397,12 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, cpu_io_recompile(cpu, retaddr); } + /* + * The memory_region_dispatch may trigger a flush/resize + * so for plugins we save the iotlb_data just in case. + */ + save_iotlb_data(cpu, section, mr_offset); + { QEMU_IOTHREAD_LOCK_GUARD(); r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); @@ -1398,21 +1419,6 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, return val; } -/* - * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin. - * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match - * because of the side effect of io_writex changing memory layout. - */ -static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section, - hwaddr mr_offset) -{ -#ifdef CONFIG_PLUGIN - SavedIOTLB *saved = &cs->saved_iotlb; - saved->section = section; - saved->mr_offset = mr_offset; -#endif -} - static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, int mmu_idx, uint64_t val, vaddr addr, uintptr_t retaddr, MemOp op) @@ -2066,27 +2072,55 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi, /** * do_ld_mmio_beN: * @env: cpu context - * @p: translation parameters + * @full: page parameters * @ret_be: accumulated data + * @addr: virtual address + * @size: number of bytes * @mmu_idx: virtual address context * @ra: return address into tcg generated code, or 0 + * Context: iothread lock held * - * Load @p->size bytes from @p->addr, which is memory-mapped i/o. + * Load @size bytes from @addr, which is memory-mapped i/o. * The bytes are concatenated in big-endian order with @ret_be. */ -static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p, - uint64_t ret_be, int mmu_idx, - MMUAccessType type, uintptr_t ra) +static uint64_t do_ld_mmio_beN(CPUArchState *env, CPUTLBEntryFull *full, + uint64_t ret_be, vaddr addr, int size, + int mmu_idx, MMUAccessType type, uintptr_t ra) { - CPUTLBEntryFull *full = p->full; - vaddr addr = p->addr; - int i, size = p->size; + uint64_t t; - QEMU_IOTHREAD_LOCK_GUARD(); - for (i = 0; i < size; i++) { - uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB); - ret_be = (ret_be << 8) | x; - } + tcg_debug_assert(size > 0 && size <= 8); + do { + /* Read aligned pieces up to 8 bytes. */ + switch ((size | (int)addr) & 7) { + case 1: + case 3: + case 5: + case 7: + t = io_readx(env, full, mmu_idx, addr, ra, type, MO_UB); + ret_be = (ret_be << 8) | t; + size -= 1; + addr += 1; + break; + case 2: + case 6: + t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUW); + ret_be = (ret_be << 16) | t; + size -= 2; + addr += 2; + break; + case 4: + t = io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUL); + ret_be = (ret_be << 32) | t; + size -= 4; + addr += 4; + break; + case 0: + return io_readx(env, full, mmu_idx, addr, ra, type, MO_BEUQ); + default: + qemu_build_not_reached(); + } + } while (size); return ret_be; } @@ -2232,7 +2266,9 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p, unsigned tmp, half_size; if (unlikely(p->flags & TLB_MMIO)) { - return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra); + QEMU_IOTHREAD_LOCK_GUARD(); + return do_ld_mmio_beN(env, p->full, ret_be, p->addr, p->size, + mmu_idx, type, ra); } /* @@ -2281,11 +2317,11 @@ static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p, MemOp atom; if (unlikely(p->flags & TLB_MMIO)) { - p->size = size - 8; - a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra); - p->addr += p->size; - p->size = 8; - b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra); + QEMU_IOTHREAD_LOCK_GUARD(); + a = do_ld_mmio_beN(env, p->full, a, p->addr, size - 8, + mmu_idx, MMU_DATA_LOAD, ra); + b = do_ld_mmio_beN(env, p->full, 0, p->addr + 8, 8, + mmu_idx, MMU_DATA_LOAD, ra); return int128_make128(b, a); } @@ -2340,16 +2376,20 @@ static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx, static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx, MMUAccessType type, MemOp memop, uintptr_t ra) { - uint64_t ret; + uint16_t ret; if (unlikely(p->flags & TLB_MMIO)) { - return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); - } - - /* Perform the load host endian, then swap if necessary. */ - ret = load_atom_2(env, ra, p->haddr, memop); - if (memop & MO_BSWAP) { - ret = bswap16(ret); + QEMU_IOTHREAD_LOCK_GUARD(); + ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 2, mmu_idx, type, ra); + if ((memop & MO_BSWAP) == MO_LE) { + ret = bswap16(ret); + } + } else { + /* Perform the load host endian, then swap if necessary. */ + ret = load_atom_2(env, ra, p->haddr, memop); + if (memop & MO_BSWAP) { + ret = bswap16(ret); + } } return ret; } @@ -2360,13 +2400,17 @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx, uint32_t ret; if (unlikely(p->flags & TLB_MMIO)) { - return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); - } - - /* Perform the load host endian. */ - ret = load_atom_4(env, ra, p->haddr, memop); - if (memop & MO_BSWAP) { - ret = bswap32(ret); + QEMU_IOTHREAD_LOCK_GUARD(); + ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 4, mmu_idx, type, ra); + if ((memop & MO_BSWAP) == MO_LE) { + ret = bswap32(ret); + } + } else { + /* Perform the load host endian. */ + ret = load_atom_4(env, ra, p->haddr, memop); + if (memop & MO_BSWAP) { + ret = bswap32(ret); + } } return ret; } @@ -2377,13 +2421,17 @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx, uint64_t ret; if (unlikely(p->flags & TLB_MMIO)) { - return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop); - } - - /* Perform the load host endian. */ - ret = load_atom_8(env, ra, p->haddr, memop); - if (memop & MO_BSWAP) { - ret = bswap64(ret); + QEMU_IOTHREAD_LOCK_GUARD(); + ret = do_ld_mmio_beN(env, p->full, 0, p->addr, 8, mmu_idx, type, ra); + if ((memop & MO_BSWAP) == MO_LE) { + ret = bswap64(ret); + } + } else { + /* Perform the load host endian. */ + ret = load_atom_8(env, ra, p->haddr, memop); + if (memop & MO_BSWAP) { + ret = bswap64(ret); + } } return ret; } @@ -2531,20 +2579,22 @@ static Int128 do_ld16_mmu(CPUArchState *env, vaddr addr, cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l); if (likely(!crosspage)) { - /* Perform the load host endian. */ if (unlikely(l.page[0].flags & TLB_MMIO)) { QEMU_IOTHREAD_LOCK_GUARD(); - a = io_readx(env, l.page[0].full, l.mmu_idx, addr, - ra, MMU_DATA_LOAD, MO_64); - b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8, - ra, MMU_DATA_LOAD, MO_64); - ret = int128_make128(HOST_BIG_ENDIAN ? b : a, - HOST_BIG_ENDIAN ? a : b); + a = do_ld_mmio_beN(env, l.page[0].full, 0, addr, 8, + l.mmu_idx, MMU_DATA_LOAD, ra); + b = do_ld_mmio_beN(env, l.page[0].full, 0, addr + 8, 8, + l.mmu_idx, MMU_DATA_LOAD, ra); + ret = int128_make128(b, a); + if ((l.memop & MO_BSWAP) == MO_LE) { + ret = bswap128(ret); + } } else { + /* Perform the load host endian. */ ret = load_atom_16(env, ra, l.page[0].haddr, l.memop); - } - if (l.memop & MO_BSWAP) { - ret = bswap128(ret); + if (l.memop & MO_BSWAP) { + ret = bswap128(ret); + } } return ret; } @@ -2664,26 +2714,57 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, /** * do_st_mmio_leN: * @env: cpu context - * @p: translation parameters + * @full: page parameters * @val_le: data to store + * @addr: virtual address + * @size: number of bytes * @mmu_idx: virtual address context * @ra: return address into tcg generated code, or 0 + * Context: iothread lock held * - * Store @p->size bytes at @p->addr, which is memory-mapped i/o. + * Store @size bytes at @addr, which is memory-mapped i/o. * The bytes to store are extracted in little-endian order from @val_le; * return the bytes of @val_le beyond @p->size that have not been stored. */ -static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p, - uint64_t val_le, int mmu_idx, uintptr_t ra) +static uint64_t do_st_mmio_leN(CPUArchState *env, CPUTLBEntryFull *full, + uint64_t val_le, vaddr addr, int size, + int mmu_idx, uintptr_t ra) { - CPUTLBEntryFull *full = p->full; - vaddr addr = p->addr; - int i, size = p->size; + tcg_debug_assert(size > 0 && size <= 8); + + do { + /* Store aligned pieces up to 8 bytes. */ + switch ((size | (int)addr) & 7) { + case 1: + case 3: + case 5: + case 7: + io_writex(env, full, mmu_idx, val_le, addr, ra, MO_UB); + val_le >>= 8; + size -= 1; + addr += 1; + break; + case 2: + case 6: + io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUW); + val_le >>= 16; + size -= 2; + addr += 2; + break; + case 4: + io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUL); + val_le >>= 32; + size -= 4; + addr += 4; + break; + case 0: + io_writex(env, full, mmu_idx, val_le, addr, ra, MO_LEUQ); + return 0; + default: + qemu_build_not_reached(); + } + } while (size); - QEMU_IOTHREAD_LOCK_GUARD(); - for (i = 0; i < size; i++, val_le >>= 8) { - io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB); - } return val_le; } @@ -2698,7 +2779,9 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p, unsigned tmp, half_size; if (unlikely(p->flags & TLB_MMIO)) { - return do_st_mmio_leN(env, p, val_le, mmu_idx, ra); + QEMU_IOTHREAD_LOCK_GUARD(); + return do_st_mmio_leN(env, p->full, val_le, p->addr, + p->size, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { return val_le >> (p->size * 8); } @@ -2751,11 +2834,11 @@ static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p, MemOp atom; if (unlikely(p->flags & TLB_MMIO)) { - p->size = 8; - do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra); - p->size = size - 8; - p->addr += 8; - return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra); + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, p->full, int128_getlo(val_le), + p->addr, 8, mmu_idx, ra); + return do_st_mmio_leN(env, p->full, int128_gethi(val_le), + p->addr + 8, size - 8, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { return int128_gethi(val_le) >> ((size - 8) * 8); } @@ -2811,7 +2894,11 @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val, int mmu_idx, MemOp memop, uintptr_t ra) { if (unlikely(p->flags & TLB_MMIO)) { - io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + if ((memop & MO_BSWAP) != MO_LE) { + val = bswap16(val); + } + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, p->full, val, p->addr, 2, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { /* nothing */ } else { @@ -2827,7 +2914,11 @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val, int mmu_idx, MemOp memop, uintptr_t ra) { if (unlikely(p->flags & TLB_MMIO)) { - io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + if ((memop & MO_BSWAP) != MO_LE) { + val = bswap32(val); + } + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, p->full, val, p->addr, 4, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { /* nothing */ } else { @@ -2843,7 +2934,11 @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val, int mmu_idx, MemOp memop, uintptr_t ra) { if (unlikely(p->flags & TLB_MMIO)) { - io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + if ((memop & MO_BSWAP) != MO_LE) { + val = bswap64(val); + } + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, p->full, val, p->addr, 8, mmu_idx, ra); } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { /* nothing */ } else { @@ -2966,22 +3061,22 @@ static void do_st16_mmu(CPUArchState *env, vaddr addr, Int128 val, cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); if (likely(!crosspage)) { - /* Swap to host endian if necessary, then store. */ - if (l.memop & MO_BSWAP) { - val = bswap128(val); - } if (unlikely(l.page[0].flags & TLB_MMIO)) { - QEMU_IOTHREAD_LOCK_GUARD(); - if (HOST_BIG_ENDIAN) { - b = int128_getlo(val), a = int128_gethi(val); - } else { - a = int128_getlo(val), b = int128_gethi(val); + if ((l.memop & MO_BSWAP) != MO_LE) { + val = bswap128(val); } - io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64); - io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64); + a = int128_getlo(val); + b = int128_gethi(val); + QEMU_IOTHREAD_LOCK_GUARD(); + do_st_mmio_leN(env, l.page[0].full, a, addr, 8, l.mmu_idx, ra); + do_st_mmio_leN(env, l.page[0].full, b, addr + 8, 8, l.mmu_idx, ra); } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) { /* nothing */ } else { + /* Swap to host endian if necessary, then store. */ + if (l.memop & MO_BSWAP) { + val = bswap128(val); + } store_atom_16(env, ra, l.page[0].haddr, l.memop, val); } return; diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc index e5c590a499..1b793e6935 100644 --- a/accel/tcg/ldst_atomicity.c.inc +++ b/accel/tcg/ldst_atomicity.c.inc @@ -404,7 +404,10 @@ static uint16_t load_atom_2(CPUArchState *env, uintptr_t ra, return load_atomic2(pv); } if (HAVE_ATOMIC128_RO) { - return load_atom_extract_al16_or_al8(pv, 2); + intptr_t left_in_page = -(pi | TARGET_PAGE_MASK); + if (likely(left_in_page > 8)) { + return load_atom_extract_al16_or_al8(pv, 2); + } } atmax = required_atomicity(env, pi, memop); @@ -443,7 +446,10 @@ static uint32_t load_atom_4(CPUArchState *env, uintptr_t ra, return load_atomic4(pv); } if (HAVE_ATOMIC128_RO) { - return load_atom_extract_al16_or_al8(pv, 4); + intptr_t left_in_page = -(pi | TARGET_PAGE_MASK); + if (likely(left_in_page > 8)) { + return load_atom_extract_al16_or_al8(pv, 4); + } } atmax = required_atomicity(env, pi, memop); diff --git a/block/blkio.c b/block/blkio.c index 8e7ce42c79..1dd495617c 100644 --- a/block/blkio.c +++ b/block/blkio.c @@ -678,7 +678,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, const char *path = qdict_get_try_str(options, "path"); BDRVBlkioState *s = bs->opaque; bool fd_supported = false; - int fd, ret; + int fd = -1, ret; if (!path) { error_setg(errp, "missing 'path' option"); @@ -713,12 +713,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, */ fd = qemu_open(path, O_RDWR, NULL); if (fd < 0) { + /* + * qemu_open() can fail if the user specifies a path that is not + * a file or device, for example in the case of Unix Domain Socket + * for the virtio-blk-vhost-user driver. In such cases let's have + * libblkio open the path directly. + */ fd_supported = false; } else { ret = blkio_set_int(s->blkio, "fd", fd); if (ret < 0) { fd_supported = false; qemu_close(fd); + fd = -1; } } } @@ -733,14 +740,21 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, } ret = blkio_connect(s->blkio); + if (ret < 0 && fd >= 0) { + /* Failed to give the FD to libblkio, close it */ + qemu_close(fd); + fd = -1; + } + /* - * If the libblkio driver doesn't support the `fd` property, blkio_connect() - * will fail with -EINVAL. So let's try calling blkio_connect() again by - * directly setting `path`. + * Before https://gitlab.com/libblkio/libblkio/-/merge_requests/208 + * (libblkio <= v1.3.0), setting the `fd` property is not enough to check + * whether the driver supports the `fd` property or not. In that case, + * blkio_connect() will fail with -EINVAL. + * So let's try calling blkio_connect() again by directly setting `path` + * to cover this scenario. */ if (fd_supported && ret == -EINVAL) { - qemu_close(fd); - /* * We need to clear the `fd` property we set previously by setting * it to -1. diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c index b62a69bd07..8e148a2ea3 100644 --- a/bsd-user/mmap.c +++ b/bsd-user/mmap.c @@ -214,8 +214,6 @@ static int mmap_frag(abi_ulong real_start, #endif abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; -unsigned long last_brk; - /* * Subroutine of mmap_find_vma, used when we have pre-allocated a chunk of guest * address space. diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h index edf9602f9b..8f2d6a3c78 100644 --- a/bsd-user/qemu.h +++ b/bsd-user/qemu.h @@ -232,7 +232,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, abi_ulong new_size, unsigned long flags, abi_ulong new_addr); int target_msync(abi_ulong start, abi_ulong len, int flags); -extern unsigned long last_brk; extern abi_ulong mmap_next_start; abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size); void TSA_NO_TSA mmap_fork_start(void); diff --git a/bsd-user/syscall_defs.h b/bsd-user/syscall_defs.h index b6d113d24a..aedfbf2d7d 100644 --- a/bsd-user/syscall_defs.h +++ b/bsd-user/syscall_defs.h @@ -227,7 +227,9 @@ type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ } /* So far all target and host bitmasks are the same */ +#undef target_to_host_bitmask #define target_to_host_bitmask(x, tbl) (x) +#undef host_to_target_bitmask #define host_to_target_bitmask(x, tbl) (x) #endif /* SYSCALL_DEFS_H */ diff --git a/chardev/char-socket.c b/chardev/char-socket.c index 8c58532171..e8e3a743d5 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -742,8 +742,12 @@ static void tcp_chr_websock_handshake(QIOTask *task, gpointer user_data) { Chardev *chr = user_data; SocketChardev *s = user_data; + Error *err = NULL; - if (qio_task_propagate_error(task, NULL)) { + if (qio_task_propagate_error(task, &err)) { + error_reportf_err(err, + "websock handshake of character device %s failed: ", + chr->label); tcp_chr_disconnect(chr); } else { if (s->do_telnetopt) { @@ -778,8 +782,12 @@ static void tcp_chr_tls_handshake(QIOTask *task, { Chardev *chr = user_data; SocketChardev *s = user_data; + Error *err = NULL; - if (qio_task_propagate_error(task, NULL)) { + if (qio_task_propagate_error(task, &err)) { + error_reportf_err(err, + "TLS handshake of character device %s failed: ", + chr->label); tcp_chr_disconnect(chr); } else { if (s->is_websock) { diff --git a/configure b/configure index afb25fd558..133f4e3235 100755 --- a/configure +++ b/configure @@ -469,50 +469,119 @@ else echo "WARNING: unrecognized host CPU, proceeding with 'uname -m' output '$cpu'" fi -# Normalise host CPU name and set multilib cflags. The canonicalization -# isn't really necessary, because the architectures that we check for -# should not hit the 'uname -m' case, but better safe than sorry. +# Normalise host CPU name to the values used by Meson cross files and in source +# directories, and set multilib cflags. The canonicalization isn't really +# necessary, because the architectures that we check for should not hit the +# 'uname -m' case, but better safe than sorry in case --cpu= is used. +# # Note that this case should only have supported host CPUs, not guests. +# Please keep it sorted and synchronized with meson.build's host_arch. +host_arch= +linux_arch= case "$cpu" in + aarch64) + host_arch=aarch64 + linux_arch=arm64 + ;; + armv*b|armv*l|arm) - cpu="arm" ;; + cpu=arm + host_arch=arm + linux_arch=arm + ;; i386|i486|i586|i686) cpu="i386" - CPU_CFLAGS="-m32" ;; - x32) - cpu="x86_64" - CPU_CFLAGS="-mx32" ;; - x86_64|amd64) - cpu="x86_64" - # ??? Only extremely old AMD cpus do not have cmpxchg16b. - # If we truly care, we should simply detect this case at - # runtime and generate the fallback to serial emulation. - CPU_CFLAGS="-m64 -mcx16" ;; + host_arch=i386 + linux_arch=x86 + CPU_CFLAGS="-m32" + ;; + loongarch*) + cpu=loongarch64 + host_arch=loongarch64 + ;; + + mips64*) + cpu=mips64 + host_arch=mips + linux_arch=mips + ;; mips*) - cpu="mips" ;; + cpu=mips + host_arch=mips + linux_arch=mips + ;; ppc) - CPU_CFLAGS="-m32" ;; + host_arch=ppc + linux_arch=powerpc + CPU_CFLAGS="-m32" + ;; ppc64) - CPU_CFLAGS="-m64 -mbig-endian" ;; + host_arch=ppc64 + linux_arch=powerpc + CPU_CFLAGS="-m64 -mbig-endian" + ;; ppc64le) - cpu="ppc64" - CPU_CFLAGS="-m64 -mlittle-endian" ;; + cpu=ppc64 + host_arch=ppc64 + linux_arch=powerpc + CPU_CFLAGS="-m64 -mlittle-endian" + ;; + + riscv32 | riscv64) + host_arch=riscv + linux_arch=riscv + ;; s390) - CPU_CFLAGS="-m31" ;; + linux_arch=s390 + CPU_CFLAGS="-m31" + ;; s390x) - CPU_CFLAGS="-m64" ;; + host_arch=s390x + linux_arch=s390 + CPU_CFLAGS="-m64" + ;; sparc|sun4[cdmuv]) - cpu="sparc" - CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" ;; + cpu=sparc + CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" + ;; sparc64) - CPU_CFLAGS="-m64 -mcpu=ultrasparc" ;; + host_arch=sparc64 + CPU_CFLAGS="-m64 -mcpu=ultrasparc" + ;; + + x32) + cpu="x86_64" + host_arch=x86_64 + linux_arch=x86 + CPU_CFLAGS="-mx32" + ;; + x86_64|amd64) + cpu="x86_64" + host_arch=x86_64 + linux_arch=x86 + # ??? Only extremely old AMD cpus do not have cmpxchg16b. + # If we truly care, we should simply detect this case at + # runtime and generate the fallback to serial emulation. + CPU_CFLAGS="-m64 -mcx16" + ;; esac +if test -n "$host_arch" && { + ! test -d "$source_path/linux-user/include/host/$host_arch" || + ! test -d "$source_path/common-user/host/$host_arch"; }; then + error_exit "linux-user/include/host/$host_arch does not exist." \ + "This is a bug in the configure script, please report it." +fi +if test -n "$linux_arch" && ! test -d "$source_path/linux-headers/asm-$linux_arch"; then + error_exit "linux-headers/asm-$linux_arch does not exist." \ + "This is a bug in the configure script, please report it." +fi + check_py_version() { # We require python >= 3.7. # NB: a True python conditional creates a non-zero return code (Failure) @@ -803,7 +872,7 @@ default_target_list="" mak_wilds="" if [ "$linux_user" != no ]; then - if [ "$targetos" = linux ] && [ -d "$source_path/linux-user/include/host/$cpu" ]; then + if [ "$targetos" = linux ] && [ -n "$host_arch" ]; then linux_user=yes elif [ "$linux_user" = yes ]; then error_exit "linux-user not supported on this architecture" @@ -1708,40 +1777,9 @@ echo "PKG_CONFIG=${pkg_config}" >> $config_host_mak echo "CC=$cc" >> $config_host_mak echo "EXESUF=$EXESUF" >> $config_host_mak -# use included Linux headers -if test "$linux" = "yes" ; then - mkdir -p linux-headers - case "$cpu" in - i386|x86_64) - linux_arch=x86 - ;; - ppc|ppc64) - linux_arch=powerpc - ;; - s390x) - linux_arch=s390 - ;; - aarch64) - linux_arch=arm64 - ;; - loongarch*) - linux_arch=loongarch - ;; - mips64) - linux_arch=mips - ;; - riscv32|riscv64) - linux_arch=riscv - ;; - *) - # For most CPUs the kernel architecture name and QEMU CPU name match. - linux_arch="$cpu" - ;; - esac - # For non-KVM architectures we will not have asm headers - if [ -e "$source_path/linux-headers/asm-$linux_arch" ]; then - symlink "$source_path/linux-headers/asm-$linux_arch" linux-headers/asm - fi +# use included Linux headers for KVM architectures +if test "$linux" = "yes" && test -n "$linux_arch"; then + symlink "$source_path/linux-headers/asm-$linux_arch" linux-headers/asm fi for target in $target_list; do diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst index 2a3af268f7..4ea957baed 100644 --- a/docs/system/devices/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -271,9 +271,15 @@ The virtual namespace device supports DIF- and DIX-based protection information ``pil=UINT8`` (default: ``0``) Controls the location of the protection information within the metadata. Set - to ``1`` to transfer protection information as the first eight bytes of - metadata. Otherwise, the protection information is transferred as the last - eight bytes. + to ``1`` to transfer protection information as the first bytes of metadata. + Otherwise, the protection information is transferred as the last bytes of + metadata. + +``pif=UINT8`` (default: ``0``) + By default, the namespace device uses 16 bit guard protection information + format (``pif=0``). Set to ``2`` to enable 64 bit guard protection + information format. This requires at least 16 bytes of metadata. Note that + ``pif=1`` (32 bit guards) is currently not supported. Virtualization Enhancements and SR-IOV (Experimental Support) ------------------------------------------------------------- diff --git a/dump/dump.c b/dump/dump.c index 1f1a6edcab..d4ef713cd0 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -1293,8 +1293,8 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, memcpy(buf + addr % page_size, hbuf, n); addr += n; - if (addr % page_size == 0) { - /* we filled up the page */ + if (addr % page_size == 0 || addr >= block->target_end) { + /* we filled up the page or the current block is finished */ break; } } else { diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c index ce8b42eb15..5f28d5cf57 100644 --- a/gdbstub/gdbstub.c +++ b/gdbstub/gdbstub.c @@ -537,7 +537,7 @@ static GDBThreadIdKind read_thread_id(const char *buf, const char **end_buf, /* Skip '.' */ buf++; } else { - p = 1; + p = 0; } ret = qemu_strtoul(buf, &buf, 16, &t); @@ -2059,9 +2059,10 @@ void gdb_read_byte(uint8_t ch) * here, but it does expect a stop reply. */ if (ch != 0x03) { - warn_report("gdbstub: client sent packet while target running\n"); + trace_gdbstub_err_unexpected_runpkt(ch); + } else { + gdbserver_state.allow_stop_reply = true; } - gdbserver_state.allow_stop_reply = true; vm_stop(RUN_STATE_PAUSED); } else #endif @@ -2073,6 +2074,11 @@ void gdb_read_byte(uint8_t ch) gdbserver_state.line_buf_index = 0; gdbserver_state.line_sum = 0; gdbserver_state.state = RS_GETLINE; + } else if (ch == '+') { + /* + * do nothing, gdb may preemptively send out ACKs on + * initial connection + */ } else { trace_gdbstub_err_garbage(ch); } diff --git a/gdbstub/trace-events b/gdbstub/trace-events index 0c18a4d70a..7bc79a73c4 100644 --- a/gdbstub/trace-events +++ b/gdbstub/trace-events @@ -26,6 +26,7 @@ gdbstub_err_invalid_repeat(uint8_t ch) "got invalid RLE count: 0x%02x" gdbstub_err_invalid_rle(void) "got invalid RLE sequence" gdbstub_err_checksum_invalid(uint8_t ch) "got invalid command checksum digit: 0x%02x" gdbstub_err_checksum_incorrect(uint8_t expected, uint8_t got) "got command packet with incorrect checksum, expected=0x%02x, received=0x%02x" +gdbstub_err_unexpected_runpkt(uint8_t ch) "unexpected packet (0x%02x) while target running" # softmmu.c gdbstub_hit_watchpoint(const char *type, int cpu_gdb_index, uint64_t vaddr) "Watchpoint hit, type=\"%s\" cpu=%d, vaddr=0x%" PRIx64 "" diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 7ab7d08d0a..ca1fb7b16f 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -244,7 +244,7 @@ virtio_gpu_base_set_features(VirtIODevice *vdev, uint64_t features) trace_virtio_gpu_features(((features & virgl) == virgl)); } -static void +void virtio_gpu_base_device_unrealize(DeviceState *qdev) { VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev); diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index e8603d78ca..bbd5c6561a 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qemu/iov.h" +#include "sysemu/cpus.h" #include "ui/console.h" #include "trace.h" #include "sysemu/dma.h" @@ -41,6 +42,7 @@ virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id, static void virtio_gpu_cleanup_mapping(VirtIOGPU *g, struct virtio_gpu_simple_resource *res); +static void virtio_gpu_reset_bh(void *opaque); void virtio_gpu_update_cursor_data(VirtIOGPU *g, struct virtio_gpu_scanout *s, @@ -1387,22 +1389,57 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) &qdev->mem_reentrancy_guard); g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, &qdev->mem_reentrancy_guard); + g->reset_bh = qemu_bh_new(virtio_gpu_reset_bh, g); + qemu_cond_init(&g->reset_cond); QTAILQ_INIT(&g->reslist); QTAILQ_INIT(&g->cmdq); QTAILQ_INIT(&g->fenceq); } -void virtio_gpu_reset(VirtIODevice *vdev) +static void virtio_gpu_device_unrealize(DeviceState *qdev) { - VirtIOGPU *g = VIRTIO_GPU(vdev); + VirtIOGPU *g = VIRTIO_GPU(qdev); + + g_clear_pointer(&g->ctrl_bh, qemu_bh_delete); + g_clear_pointer(&g->cursor_bh, qemu_bh_delete); + g_clear_pointer(&g->reset_bh, qemu_bh_delete); + qemu_cond_destroy(&g->reset_cond); + virtio_gpu_base_device_unrealize(qdev); +} + +static void virtio_gpu_reset_bh(void *opaque) +{ + VirtIOGPU *g = VIRTIO_GPU(opaque); struct virtio_gpu_simple_resource *res, *tmp; - struct virtio_gpu_ctrl_command *cmd; int i = 0; QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { virtio_gpu_resource_destroy(g, res); } + for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { + dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL); + } + + g->reset_finished = true; + qemu_cond_signal(&g->reset_cond); +} + +void virtio_gpu_reset(VirtIODevice *vdev) +{ + VirtIOGPU *g = VIRTIO_GPU(vdev); + struct virtio_gpu_ctrl_command *cmd; + + if (qemu_in_vcpu_thread()) { + g->reset_finished = false; + qemu_bh_schedule(g->reset_bh); + while (!g->reset_finished) { + qemu_cond_wait_iothread(&g->reset_cond); + } + } else { + virtio_gpu_reset_bh(g); + } + while (!QTAILQ_EMPTY(&g->cmdq)) { cmd = QTAILQ_FIRST(&g->cmdq); QTAILQ_REMOVE(&g->cmdq, cmd, next); @@ -1416,10 +1453,6 @@ void virtio_gpu_reset(VirtIODevice *vdev) g_free(cmd); } - for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { - dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL); - } - virtio_gpu_base_reset(VIRTIO_GPU_BASE(vdev)); } @@ -1492,6 +1525,7 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data) vgbc->gl_flushed = virtio_gpu_handle_gl_flushed; vdc->realize = virtio_gpu_device_realize; + vdc->unrealize = virtio_gpu_device_unrealize; vdc->reset = virtio_gpu_reset; vdc->get_config = virtio_gpu_get_config; vdc->set_config = virtio_gpu_set_config; diff --git a/hw/i2c/bitbang_i2c.c b/hw/i2c/bitbang_i2c.c index bb18954765..de5f5aacf5 100644 --- a/hw/i2c/bitbang_i2c.c +++ b/hw/i2c/bitbang_i2c.c @@ -70,7 +70,7 @@ static int bitbang_i2c_ret(bitbang_i2c_interface *i2c, int level) return level & i2c->last_data; } -/* Leave device data pin unodified. */ +/* Leave device data pin unmodified. */ static int bitbang_i2c_nop(bitbang_i2c_interface *i2c) { return bitbang_i2c_ret(i2c, i2c->device_out); diff --git a/hw/i2c/trace-events b/hw/i2c/trace-events index 8e88aa24c1..d7b1e25858 100644 --- a/hw/i2c/trace-events +++ b/hw/i2c/trace-events @@ -5,7 +5,7 @@ bitbang_i2c_state(const char *old_state, const char *new_state) "state %s -> %s" bitbang_i2c_addr(uint8_t addr) "Address 0x%02x" bitbang_i2c_send(uint8_t byte) "TX byte 0x%02x" bitbang_i2c_recv(uint8_t byte) "RX byte 0x%02x" -bitbang_i2c_data(unsigned dat, unsigned clk, unsigned old_out, unsigned new_out) "dat %u clk %u out %u -> %u" +bitbang_i2c_data(unsigned clk, unsigned dat, unsigned old_out, unsigned new_out) "clk %u dat %u out %u -> %u" # core.c diff --git a/hw/i386/vmmouse.c b/hw/i386/vmmouse.c index a56c185f15..6cd624bd09 100644 --- a/hw/i386/vmmouse.c +++ b/hw/i386/vmmouse.c @@ -44,6 +44,12 @@ #define VMMOUSE_VERSION 0x3442554a +#define VMMOUSE_RELATIVE_PACKET 0x00010000 + +#define VMMOUSE_LEFT_BUTTON 0x20 +#define VMMOUSE_RIGHT_BUTTON 0x10 +#define VMMOUSE_MIDDLE_BUTTON 0x08 + #ifdef DEBUG_VMMOUSE #define DPRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else @@ -103,15 +109,18 @@ static void vmmouse_mouse_event(void *opaque, int x, int y, int dz, int buttons_ x, y, dz, buttons_state); if ((buttons_state & MOUSE_EVENT_LBUTTON)) - buttons |= 0x20; + buttons |= VMMOUSE_LEFT_BUTTON; if ((buttons_state & MOUSE_EVENT_RBUTTON)) - buttons |= 0x10; + buttons |= VMMOUSE_RIGHT_BUTTON; if ((buttons_state & MOUSE_EVENT_MBUTTON)) - buttons |= 0x08; + buttons |= VMMOUSE_MIDDLE_BUTTON; if (s->absolute) { x <<= 1; y <<= 1; + } else{ + /* add for guest vmmouse driver to judge this is a relative packet. */ + buttons |= VMMOUSE_RELATIVE_PACKET; } s->queue[s->nb_queue++] = buttons; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index f2e5a2fa73..539d273553 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1507,7 +1507,7 @@ static void nvme_post_cqes(void *opaque) req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase); req->cqe.sq_id = cpu_to_le16(sq->sqid); req->cqe.sq_head = cpu_to_le16(sq->head); - addr = cq->dma_addr + cq->tail * n->cqe_size; + addr = cq->dma_addr + (cq->tail << NVME_CQES); ret = pci_dma_write(PCI_DEVICE(n), addr, (void *)&req->cqe, sizeof(req->cqe)); if (ret) { @@ -4361,7 +4361,13 @@ static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl *n, NvmeRequest *req) uint32_t npid = (cdw10 >> 1) + 1; unsigned int i = 0; g_autofree uint16_t *pids = NULL; - uint32_t maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh; + uint32_t maxnpid; + + if (!ns->endgrp || !ns->endgrp->fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh; if (unlikely(npid >= MIN(NVME_FDP_MAXPIDS, maxnpid))) { return NVME_INVALID_FIELD | NVME_DNR; @@ -5120,6 +5126,11 @@ static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t endgrpid, } log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * sizeof(NvmeFdpEvent); + + if (off >= log_size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + trans_len = MIN(log_size - off, buf_len); elog = g_malloc0(log_size); elog->num_events = cpu_to_le32(ebuf->nelems); @@ -5295,10 +5306,18 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) uint16_t qsize = le16_to_cpu(c->qsize); uint16_t qflags = le16_to_cpu(c->cq_flags); uint64_t prp1 = le64_to_cpu(c->prp1); + uint32_t cc = ldq_le_p(&n->bar.cc); + uint8_t iocqes = NVME_CC_IOCQES(cc); + uint8_t iosqes = NVME_CC_IOSQES(cc); trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags, NVME_CQ_FLAGS_IEN(qflags) != 0); + if (iosqes != NVME_SQES || iocqes != NVME_CQES) { + trace_pci_nvme_err_invalid_create_cq_entry_size(iosqes, iocqes); + return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; + } + if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) { trace_pci_nvme_err_invalid_create_cq_cqid(cqid); return NVME_INVALID_QID | NVME_DNR; @@ -6887,7 +6906,7 @@ static uint16_t nvme_directive_receive(NvmeCtrl *n, NvmeRequest *req) case NVME_DIRECTIVE_IDENTIFY: switch (doper) { case NVME_DIRECTIVE_RETURN_PARAMS: - if (ns->endgrp->fdp.enabled) { + if (ns->endgrp && ns->endgrp->fdp.enabled) { id.supported |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; id.enabled |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; id.persistent |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; @@ -6995,7 +7014,7 @@ static void nvme_process_sq(void *opaque) } while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { - addr = sq->dma_addr + sq->head * n->sqe_size; + addr = sq->dma_addr + (sq->head << NVME_SQES); if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) { trace_pci_nvme_err_addr_read(addr); trace_pci_nvme_err_cfs(); @@ -7220,34 +7239,6 @@ static int nvme_start_ctrl(NvmeCtrl *n) NVME_CAP_MPSMAX(cap)); return -1; } - if (unlikely(NVME_CC_IOCQES(cc) < - NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { - trace_pci_nvme_err_startfail_cqent_too_small( - NVME_CC_IOCQES(cc), - NVME_CTRL_CQES_MIN(cap)); - return -1; - } - if (unlikely(NVME_CC_IOCQES(cc) > - NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { - trace_pci_nvme_err_startfail_cqent_too_large( - NVME_CC_IOCQES(cc), - NVME_CTRL_CQES_MAX(cap)); - return -1; - } - if (unlikely(NVME_CC_IOSQES(cc) < - NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { - trace_pci_nvme_err_startfail_sqent_too_small( - NVME_CC_IOSQES(cc), - NVME_CTRL_SQES_MIN(cap)); - return -1; - } - if (unlikely(NVME_CC_IOSQES(cc) > - NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { - trace_pci_nvme_err_startfail_sqent_too_large( - NVME_CC_IOSQES(cc), - NVME_CTRL_SQES_MAX(cap)); - return -1; - } if (unlikely(!NVME_AQA_ASQS(aqa))) { trace_pci_nvme_err_startfail_asqent_sz_zero(); return -1; @@ -7260,8 +7251,6 @@ static int nvme_start_ctrl(NvmeCtrl *n) n->page_bits = page_bits; n->page_size = page_size; n->max_prp_ents = n->page_size / sizeof(uint64_t); - n->cqe_size = 1 << NVME_CC_IOCQES(cc); - n->sqe_size = 1 << NVME_CC_IOSQES(cc); nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1); nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1); @@ -8230,8 +8219,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING); id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL); - id->sqes = (0x6 << 4) | 0x6; - id->cqes = (0x4 << 4) | 0x4; + id->sqes = (NVME_SQES << 4) | NVME_SQES; + id->cqes = (NVME_CQES << 4) | NVME_CQES; id->nn = cpu_to_le32(NVME_MAX_NAMESPACES); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP | NVME_ONCS_FEATURES | NVME_ONCS_DSM | diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c index 63c44c86ab..01b19c3373 100644 --- a/hw/nvme/dif.c +++ b/hw/nvme/dif.c @@ -115,7 +115,7 @@ static void nvme_dif_pract_generate_dif_crc64(NvmeNamespace *ns, uint8_t *buf, uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz); if (pil) { - crc = crc64_nvme(crc, mbuf, pil); + crc = crc64_nvme(~crc, mbuf, pil); } dif->g64.guard = cpu_to_be64(crc); @@ -246,7 +246,7 @@ static uint16_t nvme_dif_prchk_crc64(NvmeNamespace *ns, NvmeDifTuple *dif, uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz); if (pil) { - crc = crc64_nvme(crc, mbuf, pil); + crc = crc64_nvme(~crc, mbuf, pil); } trace_pci_nvme_dif_prchk_guard_crc64(be64_to_cpu(dif->g64.guard), crc); diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 209e8f5b4c..5f2ae7b28b 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -30,6 +30,13 @@ #define NVME_FDP_MAX_EVENTS 63 #define NVME_FDP_MAXPIDS 128 +/* + * The controller only supports Submission and Completion Queue Entry Sizes of + * 64 and 16 bytes respectively. + */ +#define NVME_SQES 6 +#define NVME_CQES 4 + QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); typedef struct NvmeCtrl NvmeCtrl; @@ -530,8 +537,6 @@ typedef struct NvmeCtrl { uint32_t page_size; uint16_t page_bits; uint16_t max_prp_ents; - uint16_t cqe_size; - uint16_t sqe_size; uint32_t max_q_ents; uint8_t outstanding_aers; uint32_t irq_status; diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index 9afddf3b95..3a67680c6a 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -168,6 +168,7 @@ pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion q pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_create_cq_entry_size(uint8_t iosqes, uint8_t iocqes) "iosqes %"PRIu8" iocqes %"PRIu8"" pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 4447bbe8ec..075367d94d 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -324,9 +324,13 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 2) << 8) | PCI_INTERRUPT_LINE, 2, 0x409); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 2) << 8) | + PCI_COMMAND, 2, 0x7); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 3) << 8) | PCI_INTERRUPT_LINE, 2, 0x409); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 3) << 8) | + PCI_COMMAND, 2, 0x7); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | PCI_INTERRUPT_LINE, 2, 0x9); @@ -735,6 +739,13 @@ static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque) pci_get_word(&d->config[PCI_VENDOR_ID]), pci_get_word(&d->config[PCI_DEVICE_ID])); + if (pci_get_word(&d->config[PCI_CLASS_DEVICE]) == + PCI_CLASS_NETWORK_ETHERNET) { + name = "ethernet"; + } else if (pci_get_word(&d->config[PCI_CLASS_DEVICE]) >> 8 == + PCI_BASE_CLASS_DISPLAY) { + name = "display"; + } for (i = 0; device_map[i].id; i++) { if (!strcmp(pn, device_map[i].id)) { name = device_map[i].name; @@ -762,11 +773,19 @@ static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque) if (!d->io_regions[i].size) { continue; } - cells[j] = cpu_to_be32(d->devfn << 8 | (PCI_BASE_ADDRESS_0 + i * 4)); + cells[j] = PCI_BASE_ADDRESS_0 + i * 4; + if (cells[j] == 0x28) { + cells[j] = 0x30; + } + cells[j] = cpu_to_be32(d->devfn << 8 | cells[j]); if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { cells[j] |= cpu_to_be32(1 << 24); } else { - cells[j] |= cpu_to_be32(2 << 24); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + cells[j] |= cpu_to_be32(3 << 24); + } else { + cells[j] |= cpu_to_be32(2 << 24); + } if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_PREFETCH) { cells[j] |= cpu_to_be32(4 << 28); } @@ -921,6 +940,7 @@ static void *build_fdt(MachineState *machine, int *fdt_size) qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0); qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20); qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1); + qemu_fdt_setprop_string(fdt, "/rtas", "name", "rtas"); /* cpus */ qemu_fdt_add_subnode(fdt, "/cpus"); diff --git a/include/exec/user/abitypes.h b/include/exec/user/abitypes.h index 6191ce9f74..6178453d94 100644 --- a/include/exec/user/abitypes.h +++ b/include/exec/user/abitypes.h @@ -15,8 +15,16 @@ #define ABI_LLONG_ALIGNMENT 2 #endif +#ifdef TARGET_CRIS +#define ABI_SHORT_ALIGNMENT 1 +#define ABI_INT_ALIGNMENT 1 +#define ABI_LONG_ALIGNMENT 1 +#define ABI_LLONG_ALIGNMENT 1 +#endif + #if (defined(TARGET_I386) && !defined(TARGET_X86_64)) \ || defined(TARGET_SH4) \ + || defined(TARGET_OPENRISC) \ || defined(TARGET_MICROBLAZE) \ || defined(TARGET_NIOS2) #define ABI_LLONG_ALIGNMENT 4 diff --git a/include/exec/user/thunk.h b/include/exec/user/thunk.h index 300a840d58..6eedef48d8 100644 --- a/include/exec/user/thunk.h +++ b/include/exec/user/thunk.h @@ -193,10 +193,17 @@ static inline int thunk_type_align(const argtype *type_ptr, int is_host) } } -unsigned int target_to_host_bitmask(unsigned int target_mask, - const bitmask_transtbl * trans_tbl); -unsigned int host_to_target_bitmask(unsigned int host_mask, - const bitmask_transtbl * trans_tbl); +unsigned int target_to_host_bitmask_len(unsigned int target_mask, + const bitmask_transtbl *trans_tbl, + size_t trans_len); +unsigned int host_to_target_bitmask_len(unsigned int host_mask, + const bitmask_transtbl * trans_tbl, + size_t trans_len); + +#define target_to_host_bitmask(M, T) \ + target_to_host_bitmask_len(M, T, ARRAY_SIZE(T)) +#define host_to_target_bitmask(M, T) \ + host_to_target_bitmask_len(M, T, ARRAY_SIZE(T)) void thunk_init(unsigned int max_structs); diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 7ea8ae2bee..390c4642b8 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -169,6 +169,9 @@ struct VirtIOGPU { QEMUBH *ctrl_bh; QEMUBH *cursor_bh; + QEMUBH *reset_bh; + QemuCond reset_cond; + bool reset_finished; QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist; QTAILQ_HEAD(, virtio_gpu_ctrl_command) cmdq; @@ -238,6 +241,7 @@ bool virtio_gpu_base_device_realize(DeviceState *qdev, VirtIOHandleOutput ctrl_cb, VirtIOHandleOutput cursor_cb, Error **errp); +void virtio_gpu_base_device_unrealize(DeviceState *qdev); void virtio_gpu_base_reset(VirtIOGPUBase *g); void virtio_gpu_base_fill_display_info(VirtIOGPUBase *g, struct virtio_gpu_resp_display_info *dpy_info); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index cc61b00ba9..21ef8f1699 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -289,9 +289,6 @@ void QEMU_ERROR("code path is reachable") #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif -#ifndef MAP_FIXED_NOREPLACE -#define MAP_FIXED_NOREPLACE 0 -#endif #ifndef MAP_NORESERVE #define MAP_NORESERVE 0 #endif diff --git a/include/qemu/selfmap.h b/include/qemu/selfmap.h index 3479a2a618..7d938945cb 100644 --- a/include/qemu/selfmap.h +++ b/include/qemu/selfmap.h @@ -9,9 +9,10 @@ #ifndef SELFMAP_H #define SELFMAP_H +#include "qemu/interval-tree.h" + typedef struct { - unsigned long start; - unsigned long end; + IntervalTreeNode itree; /* flags */ bool is_read; @@ -19,26 +20,25 @@ typedef struct { bool is_exec; bool is_priv; - unsigned long offset; - gchar *dev; + uint64_t offset; uint64_t inode; - gchar *path; + const char *path; + char dev[]; } MapInfo; - /** * read_self_maps: * - * Read /proc/self/maps and return a list of MapInfo structures. + * Read /proc/self/maps and return a tree of MapInfo structures. */ -GSList *read_self_maps(void); +IntervalTreeRoot *read_self_maps(void); /** * free_self_maps: - * @info: a GSlist + * @info: an interval tree * - * Free a list of MapInfo structures. + * Free a tree of MapInfo structures. */ -void free_self_maps(GSList *info); +void free_self_maps(IntervalTreeRoot *root); #endif /* SELFMAP_H */ diff --git a/linux-user/aarch64/target_mman.h b/linux-user/aarch64/target_mman.h index f721295fe1..69ec5d5739 100644 --- a/linux-user/aarch64/target_mman.h +++ b/linux-user/aarch64/target_mman.h @@ -4,6 +4,19 @@ #define TARGET_PROT_BTI 0x10 #define TARGET_PROT_MTE 0x20 +/* + * arch/arm64/include/asm/processor.h: + * + * TASK_UNMAPPED_BASE DEFAULT_MAP_WINDOW / 4 + * DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64 + * DEFAULT_MAP_WINDOW_64 UL(1) << VA_BITS_MIN + * VA_BITS_MIN 48 (unless explicitly configured smaller) + */ +#define TASK_UNMAPPED_BASE (1ull << (48 - 2)) + +/* arch/arm64/include/asm/elf.h */ +#define ELF_ET_DYN_BASE TARGET_PAGE_ALIGN((1ull << 48) / 3 * 2) + #include "../generic/target_mman.h" #endif diff --git a/linux-user/alpha/target_mman.h b/linux-user/alpha/target_mman.h index 6bb03e7336..8edfe2b88c 100644 --- a/linux-user/alpha/target_mman.h +++ b/linux-user/alpha/target_mman.h @@ -20,6 +20,17 @@ #define TARGET_MS_SYNC 2 #define TARGET_MS_INVALIDATE 4 +/* + * arch/alpha/include/asm/processor.h: + * + * TASK_UNMAPPED_BASE TASK_SIZE / 2 + * TASK_SIZE 0x40000000000UL + */ +#define TASK_UNMAPPED_BASE 0x20000000000ull + +/* arch/alpha/include/asm/elf.h */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) + #include "../generic/target_mman.h" #endif diff --git a/linux-user/arm/target_mman.h b/linux-user/arm/target_mman.h index e7ba6070fe..51005da869 100644 --- a/linux-user/arm/target_mman.h +++ b/linux-user/arm/target_mman.h @@ -1 +1,12 @@ +/* + * arch/arm/include/asm/memory.h + * TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M) + * TASK_SIZE CONFIG_PAGE_OFFSET + * CONFIG_PAGE_OFFSET 0xC0000000 (default in Kconfig) + */ +#define TASK_UNMAPPED_BASE 0x40000000 + +/* arch/arm/include/asm/elf.h */ +#define ELF_ET_DYN_BASE 0x00400000 + #include "../generic/target_mman.h" diff --git a/linux-user/cris/target_mman.h b/linux-user/cris/target_mman.h index e7ba6070fe..9ace8ac292 100644 --- a/linux-user/cris/target_mman.h +++ b/linux-user/cris/target_mman.h @@ -1 +1,13 @@ +/* + * arch/cris/include/asm/processor.h: + * TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) + * + * arch/cris/include/arch-v32/arch/processor.h + * TASK_SIZE 0xb0000000 + */ +#define TASK_UNMAPPED_BASE TARGET_PAGE_ALIGN(0xb0000000 / 3) + +/* arch/cris/include/uapi/asm/elf.h */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2) + #include "../generic/target_mman.h" diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 861ec07abc..ac03beb01b 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1710,7 +1710,8 @@ static uint32_t get_elf_hwcap(void) #define MISA_BIT(EXT) (1 << (EXT - 'A')) RISCVCPU *cpu = RISCV_CPU(thread_cpu); uint32_t mask = MISA_BIT('I') | MISA_BIT('M') | MISA_BIT('A') - | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C'); + | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C') + | MISA_BIT('V'); return cpu->env.misa_ext & mask; #undef MISA_BIT @@ -1959,15 +1960,6 @@ struct exec #define ZMAGIC 0413 #define QMAGIC 0314 -/* Necessary parameters */ -#define TARGET_ELF_EXEC_PAGESIZE \ - (((eppnt->p_align & ~qemu_host_page_mask) != 0) ? \ - TARGET_PAGE_SIZE : MAX(qemu_host_page_size, TARGET_PAGE_SIZE)) -#define TARGET_ELF_PAGELENGTH(_v) ROUND_UP((_v), TARGET_ELF_EXEC_PAGESIZE) -#define TARGET_ELF_PAGESTART(_v) ((_v) & \ - ~(abi_ulong)(TARGET_ELF_EXEC_PAGESIZE-1)) -#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1)) - #define DLINFO_ITEMS 16 static inline void memcpy_fromfs(void * to, const void * from, unsigned long n) @@ -2219,47 +2211,37 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, } } -/* Map and zero the bss. We need to explicitly zero any fractional pages - after the data section (i.e. bss). */ -static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot) +/** + * zero_bss: + * + * Map and zero the bss. We need to explicitly zero any fractional pages + * after the data section (i.e. bss). Return false on mapping failure. + */ +static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, int prot) { - uintptr_t host_start, host_map_start, host_end; - - last_bss = TARGET_PAGE_ALIGN(last_bss); - - /* ??? There is confusion between qemu_real_host_page_size and - qemu_host_page_size here and elsewhere in target_mmap, which - may lead to the end of the data section mapping from the file - not being mapped. At least there was an explicit test and - comment for that here, suggesting that "the file size must - be known". The comment probably pre-dates the introduction - of the fstat system call in target_mmap which does in fact - find out the size. What isn't clear is if the workaround - here is still actually needed. For now, continue with it, - but merge it with the "normal" mmap that would allocate the bss. */ - - host_start = (uintptr_t) g2h_untagged(elf_bss); - host_end = (uintptr_t) g2h_untagged(last_bss); - host_map_start = REAL_HOST_PAGE_ALIGN(host_start); - - if (host_map_start < host_end) { - void *p = mmap((void *)host_map_start, host_end - host_map_start, - prot, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (p == MAP_FAILED) { - perror("cannot mmap brk"); - exit(-1); - } - } + abi_ulong align_bss; - /* Ensure that the bss page(s) are valid */ - if ((page_get_flags(last_bss-1) & prot) != prot) { - page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss - 1, - prot | PAGE_VALID); - } + align_bss = TARGET_PAGE_ALIGN(start_bss); + end_bss = TARGET_PAGE_ALIGN(end_bss); - if (host_start < host_map_start) { - memset((void *)host_start, 0, host_map_start - host_start); + if (start_bss < align_bss) { + int flags = page_get_flags(start_bss); + + if (!(flags & PAGE_VALID)) { + /* Map the start of the bss. */ + align_bss -= TARGET_PAGE_SIZE; + } else if (flags & PAGE_WRITE) { + /* The page is already mapped writable. */ + memset(g2h_untagged(start_bss), 0, align_bss - start_bss); + } else { + /* Read-only zeros? */ + g_assert_not_reached(); + } } + + return align_bss >= end_bss || + target_mmap(align_bss, end_bss - align_bss, prot, + MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) != -1; } #if defined(TARGET_ARM) @@ -2522,308 +2504,322 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, #endif #endif -static void pgb_fail_in_use(const char *image_name) +/** + * pgb_try_mmap: + * @addr: host start address + * @addr_last: host last address + * @keep: do not unmap the probe region + * + * Return 1 if [@addr, @addr_last] is not mapped in the host, + * return 0 if it is not available to map, and -1 on mmap error. + * If @keep, the region is left mapped on success, otherwise unmapped. + */ +static int pgb_try_mmap(uintptr_t addr, uintptr_t addr_last, bool keep) { - error_report("%s: requires virtual address space that is in use " - "(omit the -B option or choose a different value)", - image_name); - exit(EXIT_FAILURE); + size_t size = addr_last - addr + 1; + void *p = mmap((void *)addr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | + MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0); + int ret; + + if (p == MAP_FAILED) { + return errno == EEXIST ? 0 : -1; + } + ret = p == (void *)addr; + if (!keep || !ret) { + munmap(p, size); + } + return ret; } -static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr, - abi_ulong guest_hiaddr, long align) +/** + * pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t size, uintptr_t brk) + * @addr: host address + * @addr_last: host last address + * @brk: host brk + * + * Like pgb_try_mmap, but additionally reserve some memory following brk. + */ +static int pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t addr_last, + uintptr_t brk, bool keep) { - const int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE; - void *addr, *test; + uintptr_t brk_last = brk + 16 * MiB - 1; - if (!QEMU_IS_ALIGNED(guest_base, align)) { - fprintf(stderr, "Requested guest base %p does not satisfy " - "host minimum alignment (0x%lx)\n", - (void *)guest_base, align); - exit(EXIT_FAILURE); + /* Do not map anything close to the host brk. */ + if (addr <= brk_last && brk <= addr_last) { + return 0; } + return pgb_try_mmap(addr, addr_last, keep); +} - /* Sanity check the guest binary. */ - if (reserved_va) { - if (guest_hiaddr > reserved_va) { - error_report("%s: requires more than reserved virtual " - "address space (0x%" PRIx64 " > 0x%lx)", - image_name, (uint64_t)guest_hiaddr, reserved_va); - exit(EXIT_FAILURE); +/** + * pgb_try_mmap_set: + * @ga: set of guest addrs + * @base: guest_base + * @brk: host brk + * + * Return true if all @ga can be mapped by the host at @base. + * On success, retain the mapping at index 0 for reserved_va. + */ + +typedef struct PGBAddrs { + uintptr_t bounds[3][2]; /* start/last pairs */ + int nbounds; +} PGBAddrs; + +static bool pgb_try_mmap_set(const PGBAddrs *ga, uintptr_t base, uintptr_t brk) +{ + for (int i = ga->nbounds - 1; i >= 0; --i) { + if (pgb_try_mmap_skip_brk(ga->bounds[i][0] + base, + ga->bounds[i][1] + base, + brk, i == 0 && reserved_va) <= 0) { + return false; + } + } + return true; +} + +/** + * pgb_addr_set: + * @ga: output set of guest addrs + * @guest_loaddr: guest image low address + * @guest_loaddr: guest image high address + * @identity: create for identity mapping + * + * Fill in @ga with the image, COMMPAGE and NULL page. + */ +static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr, + abi_ulong guest_hiaddr, bool try_identity) +{ + int n; + + /* + * With a low commpage, or a guest mapped very low, + * we may not be able to use the identity map. + */ + if (try_identity) { + if (LO_COMMPAGE != -1 && LO_COMMPAGE < mmap_min_addr) { + return false; + } + if (guest_loaddr != 0 && guest_loaddr < mmap_min_addr) { + return false; } + } + + memset(ga, 0, sizeof(*ga)); + n = 0; + + if (reserved_va) { + ga->bounds[n][0] = try_identity ? mmap_min_addr : 0; + ga->bounds[n][1] = reserved_va; + n++; + /* LO_COMMPAGE and NULL handled by reserving from 0. */ } else { -#if HOST_LONG_BITS < TARGET_ABI_BITS - if ((guest_hiaddr - guest_base) > ~(uintptr_t)0) { - error_report("%s: requires more virtual address space " - "than the host can provide (0x%" PRIx64 ")", - image_name, (uint64_t)guest_hiaddr + 1 - guest_base); - exit(EXIT_FAILURE); + /* Add any LO_COMMPAGE or NULL page. */ + if (LO_COMMPAGE != -1) { + ga->bounds[n][0] = 0; + ga->bounds[n][1] = LO_COMMPAGE + TARGET_PAGE_SIZE - 1; + n++; + } else if (!try_identity) { + ga->bounds[n][0] = 0; + ga->bounds[n][1] = TARGET_PAGE_SIZE - 1; + n++; + } + + /* Add the guest image for ET_EXEC. */ + if (guest_loaddr) { + ga->bounds[n][0] = guest_loaddr; + ga->bounds[n][1] = guest_hiaddr; + n++; } -#endif } /* - * Expand the allocation to the entire reserved_va. - * Exclude the mmap_min_addr hole. + * Temporarily disable + * "comparison is always false due to limited range of data type" + * due to comparison between unsigned and (possible) 0. */ - if (reserved_va) { - guest_loaddr = (guest_base >= mmap_min_addr ? 0 - : mmap_min_addr - guest_base); - guest_hiaddr = reserved_va; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + + /* Add any HI_COMMPAGE not covered by reserved_va. */ + if (reserved_va < HI_COMMPAGE) { + ga->bounds[n][0] = HI_COMMPAGE & qemu_host_page_mask; + ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1; + n++; } - /* Reserve the address space for the binary, or reserved_va. */ - test = g2h_untagged(guest_loaddr); - addr = mmap(test, guest_hiaddr - guest_loaddr + 1, PROT_NONE, flags, -1, 0); - if (test != addr) { +#pragma GCC diagnostic pop + + ga->nbounds = n; + return true; +} + +static void pgb_fail_in_use(const char *image_name) +{ + error_report("%s: requires virtual address space that is in use " + "(omit the -B option or choose a different value)", + image_name); + exit(EXIT_FAILURE); +} + +static void pgb_fixed(const char *image_name, uintptr_t guest_loaddr, + uintptr_t guest_hiaddr, uintptr_t align) +{ + PGBAddrs ga; + uintptr_t brk = (uintptr_t)sbrk(0); + + if (!QEMU_IS_ALIGNED(guest_base, align)) { + fprintf(stderr, "Requested guest base %p does not satisfy " + "host minimum alignment (0x%" PRIxPTR ")\n", + (void *)guest_base, align); + exit(EXIT_FAILURE); + } + + if (!pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, !guest_base) + || !pgb_try_mmap_set(&ga, guest_base, brk)) { pgb_fail_in_use(image_name); } - qemu_log_mask(CPU_LOG_PAGE, - "%s: base @ %p for %" PRIu64 " bytes\n", - __func__, addr, (uint64_t)guest_hiaddr - guest_loaddr + 1); } /** - * pgd_find_hole_fallback: potential mmap address - * @guest_size: size of available space - * @brk: location of break - * @align: memory alignment + * pgb_find_fallback: * - * This is a fallback method for finding a hole in the host address - * space if we don't have the benefit of being able to access - * /proc/self/map. It can potentially take a very long time as we can - * only dumbly iterate up the host address space seeing if the - * allocation would work. + * This is a fallback method for finding holes in the host address space + * if we don't have the benefit of being able to access /proc/self/map. + * It can potentially take a very long time as we can only dumbly iterate + * up the host address space seeing if the allocation would work. */ -static uintptr_t pgd_find_hole_fallback(uintptr_t guest_size, uintptr_t brk, - long align, uintptr_t offset) +static uintptr_t pgb_find_fallback(const PGBAddrs *ga, uintptr_t align, + uintptr_t brk) { - uintptr_t base; - - /* Start (aligned) at the bottom and work our way up */ - base = ROUND_UP(mmap_min_addr, align); + /* TODO: come up with a better estimate of how much to skip. */ + uintptr_t skip = sizeof(uintptr_t) == 4 ? MiB : GiB; - while (true) { - uintptr_t align_start, end; - align_start = ROUND_UP(base, align); - end = align_start + guest_size + offset; - - /* if brk is anywhere in the range give ourselves some room to grow. */ - if (align_start <= brk && brk < end) { - base = brk + (16 * MiB); - continue; - } else if (align_start + guest_size < align_start) { - /* we have run out of space */ + for (uintptr_t base = skip; ; base += skip) { + base = ROUND_UP(base, align); + if (pgb_try_mmap_set(ga, base, brk)) { + return base; + } + if (base >= -skip) { return -1; - } else { - int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE | - MAP_FIXED_NOREPLACE; - void * mmap_start = mmap((void *) align_start, guest_size, - PROT_NONE, flags, -1, 0); - if (mmap_start != MAP_FAILED) { - munmap(mmap_start, guest_size); - if (mmap_start == (void *) align_start) { - qemu_log_mask(CPU_LOG_PAGE, - "%s: base @ %p for %" PRIdPTR" bytes\n", - __func__, mmap_start + offset, guest_size); - return (uintptr_t) mmap_start + offset; - } - } - base += qemu_host_page_size; } } } -/* Return value for guest_base, or -1 if no hole found. */ -static uintptr_t pgb_find_hole(uintptr_t guest_loaddr, uintptr_t guest_size, - long align, uintptr_t offset) +static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base, + IntervalTreeRoot *root) { - GSList *maps, *iter; - uintptr_t this_start, this_end, next_start, brk; - intptr_t ret = -1; - - assert(QEMU_IS_ALIGNED(guest_loaddr, align)); - - maps = read_self_maps(); - - /* Read brk after we've read the maps, which will malloc. */ - brk = (uintptr_t)sbrk(0); + for (int i = ga->nbounds - 1; i >= 0; --i) { + uintptr_t s = base + ga->bounds[i][0]; + uintptr_t l = base + ga->bounds[i][1]; + IntervalTreeNode *n; + + if (l < s) { + /* Wraparound. Skip to advance S to mmap_min_addr. */ + return mmap_min_addr - s; + } - if (!maps) { - return pgd_find_hole_fallback(guest_size, brk, align, offset); + n = interval_tree_iter_first(root, s, l); + if (n != NULL) { + /* Conflict. Skip to advance S to LAST + 1. */ + return n->last - s + 1; + } } + return 0; /* success */ +} - /* The first hole is before the first map entry. */ - this_start = mmap_min_addr; - - for (iter = maps; iter; - this_start = next_start, iter = g_slist_next(iter)) { - uintptr_t align_start, hole_size; - - this_end = ((MapInfo *)iter->data)->start; - next_start = ((MapInfo *)iter->data)->end; - align_start = ROUND_UP(this_start + offset, align); - - /* Skip holes that are too small. */ - if (align_start >= this_end) { - continue; - } - hole_size = this_end - align_start; - if (hole_size < guest_size) { - continue; - } +static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root, + uintptr_t align, uintptr_t brk) +{ + uintptr_t last = mmap_min_addr; + uintptr_t base, skip; - /* If this hole contains brk, give ourselves some room to grow. */ - if (this_start <= brk && brk < this_end) { - hole_size -= guest_size; - if (sizeof(uintptr_t) == 8 && hole_size >= 1 * GiB) { - align_start += 1 * GiB; - } else if (hole_size >= 16 * MiB) { - align_start += 16 * MiB; - } else { - align_start = (this_end - guest_size) & -align; - if (align_start < this_start) { - continue; - } - } + while (true) { + base = ROUND_UP(last, align); + if (base < last) { + return -1; } - /* Record the lowest successful match. */ - if (ret < 0) { - ret = align_start; - } - /* If this hole contains the identity map, select it. */ - if (align_start <= guest_loaddr && - guest_loaddr + guest_size <= this_end) { - ret = 0; - } - /* If this hole ends above the identity map, stop looking. */ - if (this_end >= guest_loaddr) { + skip = pgb_try_itree(ga, base, root); + if (skip == 0) { break; } - } - free_self_maps(maps); - if (ret != -1) { - qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %" PRIxPTR - " for %" PRIuPTR " bytes\n", - __func__, ret, guest_size); + last = base + skip; + if (last < base) { + return -1; + } } - return ret; + /* + * We've chosen 'base' based on holes in the interval tree, + * but we don't yet know if it is a valid host address. + * Because it is the first matching hole, if the host addresses + * are invalid we know there are no further matches. + */ + return pgb_try_mmap_set(ga, base, brk) ? base : -1; } -static void pgb_static(const char *image_name, abi_ulong orig_loaddr, - abi_ulong orig_hiaddr, long align) +static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr, + uintptr_t guest_hiaddr, uintptr_t align) { - uintptr_t loaddr = orig_loaddr; - uintptr_t hiaddr = orig_hiaddr; - uintptr_t offset = 0; - uintptr_t addr; - - if (hiaddr != orig_hiaddr) { - error_report("%s: requires virtual address space that the " - "host cannot provide (0x%" PRIx64 ")", - image_name, (uint64_t)orig_hiaddr + 1); - exit(EXIT_FAILURE); - } + IntervalTreeRoot *root; + uintptr_t brk, ret; + PGBAddrs ga; - loaddr &= -align; - if (HI_COMMPAGE) { - /* - * Extend the allocation to include the commpage. - * For a 64-bit host, this is just 4GiB; for a 32-bit host we - * need to ensure there is space bellow the guest_base so we - * can map the commpage in the place needed when the address - * arithmetic wraps around. - */ - if (sizeof(uintptr_t) == 8 || loaddr >= 0x80000000u) { - hiaddr = UINT32_MAX; - } else { - offset = -(HI_COMMPAGE & -align); - } - } else if (LO_COMMPAGE != -1) { - loaddr = MIN(loaddr, LO_COMMPAGE & -align); - } + assert(QEMU_IS_ALIGNED(guest_loaddr, align)); - addr = pgb_find_hole(loaddr, hiaddr - loaddr + 1, align, offset); - if (addr == -1) { - /* - * If HI_COMMPAGE, there *might* be a non-consecutive allocation - * that can satisfy both. But as the normal arm32 link base address - * is ~32k, and we extend down to include the commpage, making the - * overhead only ~96k, this is unlikely. - */ - error_report("%s: Unable to allocate %#zx bytes of " - "virtual address space", image_name, - (size_t)(hiaddr - loaddr)); - exit(EXIT_FAILURE); + /* Try the identity map first. */ + if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) { + brk = (uintptr_t)sbrk(0); + if (pgb_try_mmap_set(&ga, 0, brk)) { + guest_base = 0; + return; + } } - guest_base = addr; - - qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %"PRIxPTR" for %" PRIuPTR" bytes\n", - __func__, addr, hiaddr - loaddr); -} - -static void pgb_dynamic(const char *image_name, long align) -{ /* - * The executable is dynamic and does not require a fixed address. - * All we need is a commpage that satisfies align. - * If we do not need a commpage, leave guest_base == 0. + * Rebuild the address set for non-identity map. + * This differs in the mapping of the guest NULL page. */ - if (HI_COMMPAGE) { - uintptr_t addr, commpage; + pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, false); - /* 64-bit hosts should have used reserved_va. */ - assert(sizeof(uintptr_t) == 4); + root = read_self_maps(); + + /* Read brk after we've read the maps, which will malloc. */ + brk = (uintptr_t)sbrk(0); + if (!root) { + ret = pgb_find_fallback(&ga, align, brk); + } else { /* - * By putting the commpage at the first hole, that puts guest_base - * just above that, and maximises the positive guest addresses. + * Reserve the area close to the host brk. + * This will be freed with the rest of the tree. */ - commpage = HI_COMMPAGE & -align; - addr = pgb_find_hole(commpage, -commpage, align, 0); - assert(addr != -1); - guest_base = addr; - } -} + IntervalTreeNode *b = g_new0(IntervalTreeNode, 1); + b->start = brk; + b->last = brk + 16 * MiB - 1; + interval_tree_insert(b, root); -static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr, - abi_ulong guest_hiaddr, long align) -{ - int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE; - void *addr, *test; - - if (guest_hiaddr > reserved_va) { - error_report("%s: requires more than reserved virtual " - "address space (0x%" PRIx64 " > 0x%lx)", - image_name, (uint64_t)guest_hiaddr, reserved_va); - exit(EXIT_FAILURE); + ret = pgb_find_itree(&ga, root, align, brk); + free_self_maps(root); } - /* Widen the "image" to the entire reserved address space. */ - pgb_static(image_name, 0, reserved_va, align); + if (ret == -1) { + int w = TARGET_LONG_BITS / 4; - /* osdep.h defines this as 0 if it's missing */ - flags |= MAP_FIXED_NOREPLACE; + error_report("%s: Unable to find a guest_base to satisfy all " + "guest address mapping requirements", image_name); - /* Reserve the memory on the host. */ - assert(guest_base != 0); - test = g2h_untagged(0); - addr = mmap(test, reserved_va + 1, PROT_NONE, flags, -1, 0); - if (addr == MAP_FAILED || addr != test) { - error_report("Unable to reserve 0x%lx bytes of virtual address " - "space at %p (%s) for use as guest address space (check your " - "virtual memory ulimit setting, mmap_min_addr or reserve less " - "using qemu-user's -R option)", - reserved_va + 1, test, strerror(errno)); + for (int i = 0; i < ga.nbounds; ++i) { + error_printf(" %0*" PRIx64 "-%0*" PRIx64 "\n", + w, (uint64_t)ga.bounds[i][0], + w, (uint64_t)ga.bounds[i][1]); + } exit(EXIT_FAILURE); } - - qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %p for %lu bytes\n", - __func__, addr, reserved_va + 1); + guest_base = ret; } void probe_guest_base(const char *image_name, abi_ulong guest_loaddr, @@ -2832,25 +2828,33 @@ void probe_guest_base(const char *image_name, abi_ulong guest_loaddr, /* In order to use host shmat, we must be able to honor SHMLBA. */ uintptr_t align = MAX(SHMLBA, qemu_host_page_size); + /* Sanity check the guest binary. */ + if (reserved_va) { + if (guest_hiaddr > reserved_va) { + error_report("%s: requires more than reserved virtual " + "address space (0x%" PRIx64 " > 0x%lx)", + image_name, (uint64_t)guest_hiaddr, reserved_va); + exit(EXIT_FAILURE); + } + } else { + if (guest_hiaddr != (uintptr_t)guest_hiaddr) { + error_report("%s: requires more virtual address space " + "than the host can provide (0x%" PRIx64 ")", + image_name, (uint64_t)guest_hiaddr + 1); + exit(EXIT_FAILURE); + } + } + if (have_guest_base) { - pgb_have_guest_base(image_name, guest_loaddr, guest_hiaddr, align); - } else if (reserved_va) { - pgb_reserved_va(image_name, guest_loaddr, guest_hiaddr, align); - } else if (guest_loaddr) { - pgb_static(image_name, guest_loaddr, guest_hiaddr, align); + pgb_fixed(image_name, guest_loaddr, guest_hiaddr, align); } else { - pgb_dynamic(image_name, align); + pgb_dynamic(image_name, guest_loaddr, guest_hiaddr, align); } /* Reserve and initialize the commpage. */ if (!init_guest_commpage()) { - /* - * With have_guest_base, the user has selected the address and - * we are trying to work with that. Otherwise, we have selected - * free space and init_guest_commpage must succeeded. - */ - assert(have_guest_base); - pgb_fail_in_use(image_name); + /* We have already probed for the commpage being free. */ + g_assert_not_reached(); } assert(QEMU_IS_ALIGNED(guest_base, align)); @@ -3106,28 +3110,9 @@ static void load_elf_image(const char *image_name, int image_fd, } } - if (pinterp_name != NULL) { - /* - * This is the main executable. - * - * Reserve extra space for brk. - * We hold on to this space while placing the interpreter - * and the stack, lest they be placed immediately after - * the data segment and block allocation from the brk. - * - * 16MB is chosen as "large enough" without being so large as - * to allow the result to not fit with a 32-bit guest on a - * 32-bit host. However some 64 bit guests (e.g. s390x) - * attempt to place their heap further ahead and currently - * nothing stops them smashing into QEMUs address space. - */ -#if TARGET_LONG_BITS == 64 - info->reserve_brk = 32 * MiB; -#else - info->reserve_brk = 16 * MiB; -#endif - hiaddr += info->reserve_brk; + load_addr = loaddr; + if (pinterp_name != NULL) { if (ehdr->e_type == ET_EXEC) { /* * Make sure that the low address does not conflict with @@ -3135,31 +3120,55 @@ static void load_elf_image(const char *image_name, int image_fd, */ probe_guest_base(image_name, loaddr, hiaddr); } else { + abi_ulong align; + /* * The binary is dynamic, but we still need to * select guest_base. In this case we pass a size. */ probe_guest_base(image_name, 0, hiaddr - loaddr); + + /* + * Avoid collision with the loader by providing a different + * default load address. + */ + load_addr += elf_et_dyn_base; + + /* + * TODO: Better support for mmap alignment is desirable. + * Since we do not have complete control over the guest + * address space, we prefer the kernel to choose some address + * rather than force the use of LOAD_ADDR via MAP_FIXED. + * But without MAP_FIXED we cannot guarantee alignment, + * only suggest it. + */ + align = pow2ceil(info->alignment); + if (align) { + load_addr &= -align; + } } } /* * Reserve address space for all of this. * - * In the case of ET_EXEC, we supply MAP_FIXED so that we get - * exactly the address range that is required. + * In the case of ET_EXEC, we supply MAP_FIXED_NOREPLACE so that we get + * exactly the address range that is required. Without reserved_va, + * the guest address space is not isolated. We have attempted to avoid + * conflict with the host program itself via probe_guest_base, but using + * MAP_FIXED_NOREPLACE instead of MAP_FIXED provides an extra check. * * Otherwise this is ET_DYN, and we are searching for a location * that can hold the memory space required. If the image is - * pre-linked, LOADDR will be non-zero, and the kernel should + * pre-linked, LOAD_ADDR will be non-zero, and the kernel should * honor that address if it happens to be free. * * In both cases, we will overwrite pages in this range with mappings * from the executable. */ - load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, PROT_NONE, + load_addr = target_mmap(load_addr, (size_t)hiaddr - loaddr + 1, PROT_NONE, MAP_PRIVATE | MAP_ANON | MAP_NORESERVE | - (ehdr->e_type == ET_EXEC ? MAP_FIXED : 0), + (ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0), -1, 0); if (load_addr == -1) { goto exit_mmap; @@ -3194,7 +3203,8 @@ static void load_elf_image(const char *image_name, int image_fd, info->end_code = 0; info->start_data = -1; info->end_data = 0; - info->brk = 0; + /* Usual start for brk is after all sections of the main executable. */ + info->brk = TARGET_PAGE_ALIGN(hiaddr); info->elf_flags = ehdr->e_flags; prot_exec = PROT_EXEC; @@ -3220,7 +3230,7 @@ static void load_elf_image(const char *image_name, int image_fd, for (i = 0; i < ehdr->e_phnum; i++) { struct elf_phdr *eppnt = phdr + i; if (eppnt->p_type == PT_LOAD) { - abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em, vaddr_len; + abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em; int elf_prot = 0; if (eppnt->p_flags & PF_R) { @@ -3234,8 +3244,8 @@ static void load_elf_image(const char *image_name, int image_fd, } vaddr = load_bias + eppnt->p_vaddr; - vaddr_po = TARGET_ELF_PAGEOFFSET(vaddr); - vaddr_ps = TARGET_ELF_PAGESTART(vaddr); + vaddr_po = vaddr & ~TARGET_PAGE_MASK; + vaddr_ps = vaddr & TARGET_PAGE_MASK; vaddr_ef = vaddr + eppnt->p_filesz; vaddr_em = vaddr + eppnt->p_memsz; @@ -3245,30 +3255,18 @@ static void load_elf_image(const char *image_name, int image_fd, * but no backing file segment. */ if (eppnt->p_filesz != 0) { - vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po); - error = target_mmap(vaddr_ps, vaddr_len, elf_prot, - MAP_PRIVATE | MAP_FIXED, + error = target_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po, + elf_prot, MAP_PRIVATE | MAP_FIXED, image_fd, eppnt->p_offset - vaddr_po); - if (error == -1) { goto exit_mmap; } + } - /* - * If the load segment requests extra zeros (e.g. bss), map it. - */ - if (eppnt->p_filesz < eppnt->p_memsz) { - zero_bss(vaddr_ef, vaddr_em, elf_prot); - } - } else if (eppnt->p_memsz != 0) { - vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_memsz + vaddr_po); - error = target_mmap(vaddr_ps, vaddr_len, elf_prot, - MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, - -1, 0); - - if (error == -1) { - goto exit_mmap; - } + /* If the load segment requests extra zeros (e.g. bss), map it. */ + if (vaddr_ef < vaddr_em && + !zero_bss(vaddr_ef, vaddr_em, elf_prot)) { + goto exit_mmap; } /* Find the full program boundaries. */ @@ -3288,9 +3286,6 @@ static void load_elf_image(const char *image_name, int image_fd, info->end_data = vaddr_ef; } } - if (vaddr_em > info->brk) { - info->brk = vaddr_em; - } #ifdef TARGET_MIPS } else if (eppnt->p_type == PT_MIPS_ABIFLAGS) { Mips_elf_abiflags_v0 abiflags; @@ -3619,6 +3614,19 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) if (elf_interpreter) { load_elf_interp(elf_interpreter, &interp_info, bprm->buf); + /* + * While unusual because of ELF_ET_DYN_BASE, if we are unlucky + * with the mappings the interpreter can be loaded above but + * near the main executable, which can leave very little room + * for the heap. + * If the current brk has less than 16MB, use the end of the + * interpreter. + */ + if (interp_info.brk > info->brk && + interp_info.load_bias - info->brk < 16 * MiB) { + info->brk = interp_info.brk; + } + /* If the program interpreter is one of these two, then assume an iBCS2 image. Otherwise assume a native linux image. */ @@ -3672,17 +3680,6 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) bprm->core_dump = &elf_core_dump; #endif - /* - * If we reserved extra space for brk, release it now. - * The implementation of do_brk in syscalls.c expects to be able - * to mmap pages in this space. - */ - if (info->reserve_brk) { - abi_ulong start_brk = HOST_PAGE_ALIGN(info->brk); - abi_ulong end_brk = HOST_PAGE_ALIGN(info->brk + info->reserve_brk); - target_munmap(start_brk, end_brk - start_brk); - } - return 0; } diff --git a/linux-user/flatload.c b/linux-user/flatload.c index 5efec2630e..8f5e9f489b 100644 --- a/linux-user/flatload.c +++ b/linux-user/flatload.c @@ -811,7 +811,7 @@ int load_flt_binary(struct linux_binprm *bprm, struct image_info *info) info->end_code = libinfo[0].start_code + libinfo[0].text_len; info->start_data = libinfo[0].start_data; info->end_data = libinfo[0].end_data; - info->start_brk = libinfo[0].start_brk; + info->brk = libinfo[0].start_brk; info->start_stack = sp; info->stack_limit = libinfo[0].start_brk; info->entry = start_addr; diff --git a/linux-user/hexagon/target_mman.h b/linux-user/hexagon/target_mman.h index e7ba6070fe..e6b5e2ca36 100644 --- a/linux-user/hexagon/target_mman.h +++ b/linux-user/hexagon/target_mman.h @@ -1 +1,14 @@ +/* + * arch/hexgon/include/asm/processor.h + * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) + * + * arch/hexagon/include/asm/mem-layout.h + * TASK_SIZE PAGE_OFFSET + * PAGE_OFFSET 0xc0000000 + */ +#define TASK_UNMAPPED_BASE 0x40000000 + +/* arch/hexagon/include/asm/elf.h */ +#define ELF_ET_DYN_BASE 0x08000000 + #include "../generic/target_mman.h" diff --git a/linux-user/hppa/target_mman.h b/linux-user/hppa/target_mman.h index 97f87d042a..ccda46e842 100644 --- a/linux-user/hppa/target_mman.h +++ b/linux-user/hppa/target_mman.h @@ -24,6 +24,12 @@ #define TARGET_MS_ASYNC 2 #define TARGET_MS_INVALIDATE 4 +/* arch/parisc/include/asm/processor.h: DEFAULT_MAP_BASE32 */ +#define TASK_UNMAPPED_BASE 0x40000000 + +/* arch/parisc/include/asm/elf.h */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000) + #include "../generic/target_mman.h" #endif diff --git a/linux-user/i386/target_mman.h b/linux-user/i386/target_mman.h index e7ba6070fe..e3b8e1eaa6 100644 --- a/linux-user/i386/target_mman.h +++ b/linux-user/i386/target_mman.h @@ -1 +1,17 @@ +/* + * arch/x86/include/asm/processor.h: + * TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) + * __TASK_UNMAPPED_BASE(S) PAGE_ALIGN(S / 3) + * + * arch/x86/include/asm/page_32_types.h: + * TASK_SIZE_LOW TASK_SIZE + * TASK_SIZE __PAGE_OFFSET + * __PAGE_OFFSET CONFIG_PAGE_OFFSET + * CONFIG_PAGE_OFFSET 0xc0000000 (default in Kconfig) + */ +#define TASK_UNMAPPED_BASE 0x40000000 + +/* arch/x86/include/asm/elf.h */ +#define ELF_ET_DYN_BASE 0x00400000 + #include "../generic/target_mman.h" diff --git a/linux-user/include/host/alpha/host-signal.h b/linux-user/include/host/alpha/host-signal.h deleted file mode 100644 index 4f9e2abc4b..0000000000 --- a/linux-user/include/host/alpha/host-signal.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * host-signal.h: signal info dependent on the host architecture - * - * Copyright (c) 2003-2005 Fabrice Bellard - * Copyright (c) 2021 Linaro Limited - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef ALPHA_HOST_SIGNAL_H -#define ALPHA_HOST_SIGNAL_H - -/* The third argument to a SA_SIGINFO handler is ucontext_t. */ -typedef ucontext_t host_sigcontext; - -static inline uintptr_t host_signal_pc(host_sigcontext *uc) -{ - return uc->uc_mcontext.sc_pc; -} - -static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) -{ - uc->uc_mcontext.sc_pc = pc; -} - -static inline void *host_signal_mask(host_sigcontext *uc) -{ - return &uc->uc_sigmask; -} - -static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) -{ - uint32_t *pc = (uint32_t *)host_signal_pc(uc); - uint32_t insn = *pc; - - /* XXX: need kernel patch to get write flag faster */ - switch (insn >> 26) { - case 0x0d: /* stw */ - case 0x0e: /* stb */ - case 0x0f: /* stq_u */ - case 0x24: /* stf */ - case 0x25: /* stg */ - case 0x26: /* sts */ - case 0x27: /* stt */ - case 0x2c: /* stl */ - case 0x2d: /* stq */ - case 0x2e: /* stl_c */ - case 0x2f: /* stq_c */ - return true; - } - return false; -} - -#endif diff --git a/linux-user/include/host/s390/host-signal.h b/linux-user/include/host/s390/host-signal.h deleted file mode 100644 index e6d3ec26dc..0000000000 --- a/linux-user/include/host/s390/host-signal.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * host-signal.h: signal info dependent on the host architecture - * - * Copyright (c) 2003-2005 Fabrice Bellard - * Copyright (c) 2021 Linaro Limited - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef S390_HOST_SIGNAL_H -#define S390_HOST_SIGNAL_H - -/* The third argument to a SA_SIGINFO handler is ucontext_t. */ -typedef ucontext_t host_sigcontext; - -static inline uintptr_t host_signal_pc(host_sigcontext *uc) -{ - return uc->uc_mcontext.psw.addr; -} - -static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) -{ - uc->uc_mcontext.psw.addr = pc; -} - -static inline void *host_signal_mask(host_sigcontext *uc) -{ - return &uc->uc_sigmask; -} - -static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) -{ - uint16_t *pinsn = (uint16_t *)host_signal_pc(uc); - - /* - * ??? On linux, the non-rt signal handler has 4 (!) arguments instead - * of the normal 2 arguments. The 4th argument contains the "Translation- - * Exception Identification for DAT Exceptions" from the hardware (aka - * "int_parm_long"), which does in fact contain the is_write value. - * The rt signal handler, as far as I can tell, does not give this value - * at all. Not that we could get to it from here even if it were. - * So fall back to parsing instructions. Treat read-modify-write ones as - * writes, which is not fully correct, but for tracking self-modifying code - * this is better than treating them as reads. Checking si_addr page flags - * might be a viable improvement, albeit a racy one. - */ - /* ??? This is not even close to complete. */ - switch (pinsn[0] >> 8) { - case 0x50: /* ST */ - case 0x42: /* STC */ - case 0x40: /* STH */ - case 0x44: /* EX */ - case 0xba: /* CS */ - case 0xbb: /* CDS */ - return true; - case 0xc4: /* RIL format insns */ - switch (pinsn[0] & 0xf) { - case 0xf: /* STRL */ - case 0xb: /* STGRL */ - case 0x7: /* STHRL */ - return true; - } - break; - case 0xc6: /* RIL-b format insns */ - switch (pinsn[0] & 0xf) { - case 0x0: /* EXRL */ - return true; - } - break; - case 0xc8: /* SSF format insns */ - switch (pinsn[0] & 0xf) { - case 0x2: /* CSST */ - return true; - } - break; - case 0xe3: /* RXY format insns */ - switch (pinsn[2] & 0xff) { - case 0x50: /* STY */ - case 0x24: /* STG */ - case 0x72: /* STCY */ - case 0x70: /* STHY */ - case 0x8e: /* STPQ */ - case 0x3f: /* STRVH */ - case 0x3e: /* STRV */ - case 0x2f: /* STRVG */ - return true; - } - break; - case 0xe6: - switch (pinsn[2] & 0xff) { - case 0x09: /* VSTEBRH */ - case 0x0a: /* VSTEBRG */ - case 0x0b: /* VSTEBRF */ - case 0x0e: /* VSTBR */ - case 0x0f: /* VSTER */ - case 0x3f: /* VSTRLR */ - return true; - } - break; - case 0xe7: - switch (pinsn[2] & 0xff) { - case 0x08: /* VSTEB */ - case 0x09: /* VSTEH */ - case 0x0a: /* VSTEG */ - case 0x0b: /* VSTEF */ - case 0x0e: /* VST */ - case 0x1a: /* VSCEG */ - case 0x1b: /* VSCEF */ - case 0x3e: /* VSTM */ - case 0x3f: /* VSTL */ - return true; - } - break; - case 0xeb: /* RSY format insns */ - switch (pinsn[2] & 0xff) { - case 0x14: /* CSY */ - case 0x30: /* CSG */ - case 0x31: /* CDSY */ - case 0x3e: /* CDSG */ - case 0xe4: /* LANG */ - case 0xe6: /* LAOG */ - case 0xe7: /* LAXG */ - case 0xe8: /* LAAG */ - case 0xea: /* LAALG */ - case 0xf4: /* LAN */ - case 0xf6: /* LAO */ - case 0xf7: /* LAX */ - case 0xfa: /* LAAL */ - case 0xf8: /* LAA */ - return true; - } - break; - } - return false; -} - -#endif diff --git a/linux-user/include/host/s390x/host-signal.h b/linux-user/include/host/s390x/host-signal.h index 0e83f9358d..e6d3ec26dc 100644 --- a/linux-user/include/host/s390x/host-signal.h +++ b/linux-user/include/host/s390x/host-signal.h @@ -1 +1,138 @@ -#include "../s390/host-signal.h" +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef S390_HOST_SIGNAL_H +#define S390_HOST_SIGNAL_H + +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) +{ + return uc->uc_mcontext.psw.addr; +} + +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) +{ + uc->uc_mcontext.psw.addr = pc; +} + +static inline void *host_signal_mask(host_sigcontext *uc) +{ + return &uc->uc_sigmask; +} + +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) +{ + uint16_t *pinsn = (uint16_t *)host_signal_pc(uc); + + /* + * ??? On linux, the non-rt signal handler has 4 (!) arguments instead + * of the normal 2 arguments. The 4th argument contains the "Translation- + * Exception Identification for DAT Exceptions" from the hardware (aka + * "int_parm_long"), which does in fact contain the is_write value. + * The rt signal handler, as far as I can tell, does not give this value + * at all. Not that we could get to it from here even if it were. + * So fall back to parsing instructions. Treat read-modify-write ones as + * writes, which is not fully correct, but for tracking self-modifying code + * this is better than treating them as reads. Checking si_addr page flags + * might be a viable improvement, albeit a racy one. + */ + /* ??? This is not even close to complete. */ + switch (pinsn[0] >> 8) { + case 0x50: /* ST */ + case 0x42: /* STC */ + case 0x40: /* STH */ + case 0x44: /* EX */ + case 0xba: /* CS */ + case 0xbb: /* CDS */ + return true; + case 0xc4: /* RIL format insns */ + switch (pinsn[0] & 0xf) { + case 0xf: /* STRL */ + case 0xb: /* STGRL */ + case 0x7: /* STHRL */ + return true; + } + break; + case 0xc6: /* RIL-b format insns */ + switch (pinsn[0] & 0xf) { + case 0x0: /* EXRL */ + return true; + } + break; + case 0xc8: /* SSF format insns */ + switch (pinsn[0] & 0xf) { + case 0x2: /* CSST */ + return true; + } + break; + case 0xe3: /* RXY format insns */ + switch (pinsn[2] & 0xff) { + case 0x50: /* STY */ + case 0x24: /* STG */ + case 0x72: /* STCY */ + case 0x70: /* STHY */ + case 0x8e: /* STPQ */ + case 0x3f: /* STRVH */ + case 0x3e: /* STRV */ + case 0x2f: /* STRVG */ + return true; + } + break; + case 0xe6: + switch (pinsn[2] & 0xff) { + case 0x09: /* VSTEBRH */ + case 0x0a: /* VSTEBRG */ + case 0x0b: /* VSTEBRF */ + case 0x0e: /* VSTBR */ + case 0x0f: /* VSTER */ + case 0x3f: /* VSTRLR */ + return true; + } + break; + case 0xe7: + switch (pinsn[2] & 0xff) { + case 0x08: /* VSTEB */ + case 0x09: /* VSTEH */ + case 0x0a: /* VSTEG */ + case 0x0b: /* VSTEF */ + case 0x0e: /* VST */ + case 0x1a: /* VSCEG */ + case 0x1b: /* VSCEF */ + case 0x3e: /* VSTM */ + case 0x3f: /* VSTL */ + return true; + } + break; + case 0xeb: /* RSY format insns */ + switch (pinsn[2] & 0xff) { + case 0x14: /* CSY */ + case 0x30: /* CSG */ + case 0x31: /* CDSY */ + case 0x3e: /* CDSG */ + case 0xe4: /* LANG */ + case 0xe6: /* LAOG */ + case 0xe7: /* LAXG */ + case 0xe8: /* LAAG */ + case 0xea: /* LAALG */ + case 0xf4: /* LAN */ + case 0xf6: /* LAO */ + case 0xf7: /* LAX */ + case 0xfa: /* LAAL */ + case 0xf8: /* LAA */ + return true; + } + break; + } + return false; +} + +#endif diff --git a/linux-user/include/host/x32/host-signal.h b/linux-user/include/host/x32/host-signal.h deleted file mode 100644 index 26800591d3..0000000000 --- a/linux-user/include/host/x32/host-signal.h +++ /dev/null @@ -1 +0,0 @@ -#include "../x86_64/host-signal.h" diff --git a/linux-user/loongarch64/target_mman.h b/linux-user/loongarch64/target_mman.h index e7ba6070fe..8c2a3d5596 100644 --- a/linux-user/loongarch64/target_mman.h +++ b/linux-user/loongarch64/target_mman.h @@ -1 +1,12 @@ +/* + * arch/loongarch/include/asm/processor.h: + * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) + * TASK_SIZE64 0x1UL << (... ? VA_BITS : ...) + */ +#define TASK_UNMAPPED_BASE \ + TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3) + +/* arch/loongarch/include/asm/elf.h */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2) + #include "../generic/target_mman.h" diff --git a/linux-user/m68k/target_mman.h b/linux-user/m68k/target_mman.h index e7ba6070fe..20cfe750c5 100644 --- a/linux-user/m68k/target_mman.h +++ b/linux-user/m68k/target_mman.h @@ -1 +1,6 @@ +/* arch/m68k/include/asm/processor.h */ +#define TASK_UNMAPPED_BASE 0xC0000000 +/* arch/m68k/include/asm/elf.h */ +#define ELF_ET_DYN_BASE 0xD0000000 + #include "../generic/target_mman.h" diff --git a/linux-user/main.c b/linux-user/main.c index dba67ffa36..96be354897 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -821,6 +821,49 @@ int main(int argc, char **argv, char **envp) reserved_va = max_reserved_va; } + /* + * Temporarily disable + * "comparison is always false due to limited range of data type" + * due to comparison between (possible) uint64_t and uintptr_t. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + + /* + * Select an initial value for task_unmapped_base that is in range. + */ + if (reserved_va) { + if (TASK_UNMAPPED_BASE < reserved_va) { + task_unmapped_base = TASK_UNMAPPED_BASE; + } else { + /* The most common default formula is TASK_SIZE / 3. */ + task_unmapped_base = TARGET_PAGE_ALIGN(reserved_va / 3); + } + } else if (TASK_UNMAPPED_BASE < UINTPTR_MAX) { + task_unmapped_base = TASK_UNMAPPED_BASE; + } else { + /* 32-bit host: pick something medium size. */ + task_unmapped_base = 0x10000000; + } + mmap_next_start = task_unmapped_base; + + /* Similarly for elf_et_dyn_base. */ + if (reserved_va) { + if (ELF_ET_DYN_BASE < reserved_va) { + elf_et_dyn_base = ELF_ET_DYN_BASE; + } else { + /* The most common default formula is TASK_SIZE / 3 * 2. */ + elf_et_dyn_base = TARGET_PAGE_ALIGN(reserved_va / 3) * 2; + } + } else if (ELF_ET_DYN_BASE < UINTPTR_MAX) { + elf_et_dyn_base = ELF_ET_DYN_BASE; + } else { + /* 32-bit host: pick something medium size. */ + elf_et_dyn_base = 0x18000000; + } + +#pragma GCC diagnostic pop + { Error *err = NULL; if (seed_optarg != NULL) { @@ -920,8 +963,6 @@ int main(int argc, char **argv, char **envp) fprintf(f, "page layout changed following binary load\n"); page_dump(f); - fprintf(f, "start_brk 0x" TARGET_ABI_FMT_lx "\n", - info->start_brk); fprintf(f, "end_code 0x" TARGET_ABI_FMT_lx "\n", info->end_code); fprintf(f, "start_code 0x" TARGET_ABI_FMT_lx "\n", diff --git a/linux-user/microblaze/target_mman.h b/linux-user/microblaze/target_mman.h index e7ba6070fe..6b3dd54f89 100644 --- a/linux-user/microblaze/target_mman.h +++ b/linux-user/microblaze/target_mman.h @@ -1 +1,12 @@ +/* + * arch/microblaze/include/asm/processor.h: + * TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3) + * TASK_SIZE CONFIG_KERNEL_START + * CONFIG_KERNEL_START 0xc0000000 (default in Kconfig) + */ +#define TASK_UNMAPPED_BASE 0x48000000 + +/* arch/microblaze/include/uapi/asm/elf.h */ +#define ELF_ET_DYN_BASE 0x08000000 + #include "../generic/target_mman.h" diff --git a/linux-user/mips/target_mman.h b/linux-user/mips/target_mman.h index e97694aa4e..b84fe1e8a8 100644 --- a/linux-user/mips/target_mman.h +++ b/linux-user/mips/target_mman.h @@ -14,6 +14,16 @@ #define TARGET_MAP_STACK 0x40000 #define TARGET_MAP_HUGETLB 0x80000 +/* + * arch/mips/include/asm/processor.h: + * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) + */ +#define TASK_UNMAPPED_BASE \ + TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3) + +/* arch/mips/include/asm/elf.h */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2) + #include "../generic/target_mman.h" #endif diff --git a/linux-user/mmap.c b/linux-user/mmap.c index a5dfb56545..9aab48d4a3 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -263,7 +263,11 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, void *p = mmap(host_start, qemu_host_page_size, target_to_host_prot(prot), flags | MAP_ANONYMOUS, -1, 0); - if (p == MAP_FAILED) { + if (p != host_start) { + if (p != MAP_FAILED) { + munmap(p, qemu_host_page_size); + errno = EEXIST; + } return false; } prot_old = prot; @@ -295,22 +299,9 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, return true; } -#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 -#ifdef TARGET_AARCH64 -# define TASK_UNMAPPED_BASE 0x5500000000 -#else -# define TASK_UNMAPPED_BASE (1ul << 38) -#endif -#else -#ifdef TARGET_HPPA -# define TASK_UNMAPPED_BASE 0xfa000000 -#else -# define TASK_UNMAPPED_BASE 0x40000000 -#endif -#endif -abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; - -unsigned long last_brk; +abi_ulong task_unmapped_base; +abi_ulong elf_et_dyn_base; +abi_ulong mmap_next_start; /* * Subroutine of mmap_find_vma, used when we have pre-allocated @@ -389,7 +380,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) if ((addr & (align - 1)) == 0) { /* Success. */ - if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { + if (start == mmap_next_start && addr >= task_unmapped_base) { mmap_next_start = addr + size; } return addr; @@ -603,11 +594,26 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, goto fail; } - /* Validate that the chosen range is empty. */ - if ((flags & MAP_FIXED_NOREPLACE) - && !page_check_range_empty(start, last)) { - errno = EEXIST; - goto fail; + if (flags & MAP_FIXED_NOREPLACE) { + /* Validate that the chosen range is empty. */ + if (!page_check_range_empty(start, last)) { + errno = EEXIST; + goto fail; + } + + /* + * With reserved_va, the entire address space is mmaped in the + * host to ensure it isn't accidentally used for something else. + * We have just checked that the guest address is not mapped + * within the guest, but need to replace the host reservation. + * + * Without reserved_va, despite the guest address check above, + * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite + * any host address mappings. + */ + if (reserved_va) { + flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; + } } /* @@ -672,17 +678,25 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, /* map the middle (easier) */ if (real_start < real_last) { - void *p; + void *p, *want_p; off_t offset1; + size_t len1; if (flags & MAP_ANONYMOUS) { offset1 = 0; } else { offset1 = offset + real_start - start; } - p = mmap(g2h_untagged(real_start), real_last - real_start + 1, - target_to_host_prot(target_prot), flags, fd, offset1); - if (p == MAP_FAILED) { + len1 = real_last - real_start + 1; + want_p = g2h_untagged(real_start); + + p = mmap(want_p, len1, target_to_host_prot(target_prot), + flags, fd, offset1); + if (p != want_p) { + if (p != MAP_FAILED) { + munmap(p, len1); + errno = EEXIST; + } goto fail; } passthrough_start = real_start; diff --git a/linux-user/nios2/target_mman.h b/linux-user/nios2/target_mman.h index e7ba6070fe..ab16ad4f03 100644 --- a/linux-user/nios2/target_mman.h +++ b/linux-user/nios2/target_mman.h @@ -1 +1,11 @@ +/* + * arch/nios2/include/asm/processor.h: + * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) + * TASK_SIZE 0x7FFF0000UL + */ +#define TASK_UNMAPPED_BASE TARGET_PAGE_ALIGN(0x7FFF0000 / 3) + +/* arch/nios2/include/asm/elf.h */ +#define ELF_ET_DYN_BASE 0xD0000000 + #include "../generic/target_mman.h" diff --git a/linux-user/openrisc/target_mman.h b/linux-user/openrisc/target_mman.h index e7ba6070fe..243c1d5f26 100644 --- a/linux-user/openrisc/target_mman.h +++ b/linux-user/openrisc/target_mman.h @@ -1 +1,11 @@ +/* + * arch/openrisc/include/asm/processor.h: + * TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3) + * TASK_SIZE (0x80000000UL) + */ +#define TASK_UNMAPPED_BASE 0x30000000 + +/* arch/openrisc/include/asm/elf.h */ +#define ELF_ET_DYN_BASE 0x08000000 + #include "../generic/target_mman.h" diff --git a/linux-user/ppc/target_mman.h b/linux-user/ppc/target_mman.h index 67cc218f2e..646d1ccae7 100644 --- a/linux-user/ppc/target_mman.h +++ b/linux-user/ppc/target_mman.h @@ -4,6 +4,26 @@ #define TARGET_MAP_NORESERVE 0x40 #define TARGET_MAP_LOCKED 0x80 +/* + * arch/powerpc/include/asm/task_size_64.h + * TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) + * TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4)) + * TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE)) + * DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB (with 4k pages) + */ +#ifdef TARGET_PPC64 +#define TASK_UNMAPPED_BASE 0x0000100000000000ull +#else +#define TASK_UNMAPPED_BASE 0x40000000 +#endif + +/* arch/powerpc/include/asm/elf.h */ +#ifdef TARGET_PPC64 +#define ELF_ET_DYN_BASE 0x100000000ull +#else +#define ELF_ET_DYN_BASE 0x000400000 +#endif + #include "../generic/target_mman.h" #endif diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 802794db63..4f8b55e2fb 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -29,9 +29,7 @@ struct image_info { abi_ulong end_code; abi_ulong start_data; abi_ulong end_data; - abi_ulong start_brk; abi_ulong brk; - abi_ulong reserve_brk; abi_ulong start_mmap; abi_ulong start_stack; abi_ulong stack_limit; diff --git a/linux-user/riscv/target_mman.h b/linux-user/riscv/target_mman.h index e7ba6070fe..3049bcc67d 100644 --- a/linux-user/riscv/target_mman.h +++ b/linux-user/riscv/target_mman.h @@ -1 +1,11 @@ +/* + * arch/loongarch/include/asm/processor.h: + * TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) + */ +#define TASK_UNMAPPED_BASE \ + TARGET_PAGE_ALIGN((1ull << (TARGET_VIRT_ADDR_SPACE_BITS - 1)) / 3) + +/* arch/riscv/include/asm/elf.h */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2) + #include "../generic/target_mman.h" diff --git a/linux-user/s390x/target_mman.h b/linux-user/s390x/target_mman.h index e7ba6070fe..c82435e381 100644 --- a/linux-user/s390x/target_mman.h +++ b/linux-user/s390x/target_mman.h @@ -1 +1,21 @@ +/* + * arch/s390/include/asm/processor.h: + * TASK_UNMAPPED_BASE (... : (_REGION2_SIZE >> 1)) + * + * arch/s390/include/asm/pgtable.h: + * _REGION2_SIZE (1UL << _REGION2_SHIFT) + * _REGION2_SHIFT 42 + */ +#define TASK_UNMAPPED_BASE (1ull << 41) + +/* + * arch/s390/include/asm/elf.h: + * ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1) + * + * arch/s390/include/asm/processor.h: + * STACK_TOP VDSO_LIMIT - VDSO_SIZE - PAGE_SIZE + * VDSO_LIMIT _REGION2_SIZE + */ +#define ELF_ET_DYN_BASE (((1ull << 42) / 3 * 2) & ~0xffffffffull) + #include "../generic/target_mman.h" diff --git a/linux-user/sh4/target_mman.h b/linux-user/sh4/target_mman.h index e7ba6070fe..dd9016081e 100644 --- a/linux-user/sh4/target_mman.h +++ b/linux-user/sh4/target_mman.h @@ -1 +1,8 @@ +/* arch/sh/include/asm/processor_32.h */ +#define TASK_UNMAPPED_BASE \ + TARGET_PAGE_ALIGN((1u << TARGET_VIRT_ADDR_SPACE_BITS) / 3) + +/* arch/sh/include/asm/elf.h */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2) + #include "../generic/target_mman.h" diff --git a/linux-user/sparc/target_mman.h b/linux-user/sparc/target_mman.h index 9bad99c852..696ca73fe4 100644 --- a/linux-user/sparc/target_mman.h +++ b/linux-user/sparc/target_mman.h @@ -5,6 +5,31 @@ #define TARGET_MAP_LOCKED 0x100 #define TARGET_MAP_GROWSDOWN 0x0200 +/* + * arch/sparc/include/asm/page_64.h: + * TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ + * _AC(0x0000000070000000,UL) : \ + * VA_EXCLUDE_END) + * But VA_EXCLUDE_END is > 0xffff800000000000UL which doesn't work + * in userland emulation. + */ +#ifdef TARGET_ABI32 +#define TASK_UNMAPPED_BASE 0x70000000 +#else +#define TASK_UNMAPPED_BASE (1ull << (TARGET_VIRT_ADDR_SPACE_BITS - 2)) +#endif + +/* + * arch/sparc/include/asm/elf_64.h + * Except that COMPAT_ELF_ET_DYN_BASE exactly matches TASK_UNMAPPED_BASE, + * so move it up a bit. + */ +#ifdef TARGET_ABI32 +#define ELF_ET_DYN_BASE 0x78000000 +#else +#define ELF_ET_DYN_BASE 0x0000010000000000ull +#endif + #include "../generic/target_mman.h" #endif diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 95727a816a..9353268cc1 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -455,7 +455,6 @@ static const bitmask_transtbl fcntl_flags_tbl[] = { #if TARGET_O_LARGEFILE != 0 || O_LARGEFILE != 0 { TARGET_O_LARGEFILE, TARGET_O_LARGEFILE, O_LARGEFILE, O_LARGEFILE, }, #endif - { 0, 0, 0, 0 } }; _syscall2(int, sys_getcwd1, char *, buf, size_t, size) @@ -802,93 +801,52 @@ static inline int host_to_target_sock_type(int host_type) } static abi_ulong target_brk, initial_target_brk; -static abi_ulong brk_page; void target_set_brk(abi_ulong new_brk) { target_brk = TARGET_PAGE_ALIGN(new_brk); initial_target_brk = target_brk; - brk_page = HOST_PAGE_ALIGN(target_brk); } /* do_brk() must return target values and target errnos. */ abi_long do_brk(abi_ulong brk_val) { abi_long mapped_addr; - abi_ulong new_alloc_size; - abi_ulong new_brk, new_host_brk_page; + abi_ulong new_brk; + abi_ulong old_brk; /* brk pointers are always untagged */ - /* return old brk value if brk_val unchanged or zero */ - if (!brk_val || brk_val == target_brk) { - return target_brk; - } - /* do not allow to shrink below initial brk value */ if (brk_val < initial_target_brk) { - brk_val = initial_target_brk; + return target_brk; } new_brk = TARGET_PAGE_ALIGN(brk_val); - new_host_brk_page = HOST_PAGE_ALIGN(brk_val); + old_brk = TARGET_PAGE_ALIGN(target_brk); - /* brk_val and old target_brk might be on the same page */ - if (new_brk == TARGET_PAGE_ALIGN(target_brk)) { - /* empty remaining bytes in (possibly larger) host page */ - memset(g2h_untagged(new_brk), 0, new_host_brk_page - new_brk); + /* new and old target_brk might be on the same page */ + if (new_brk == old_brk) { target_brk = brk_val; return target_brk; } /* Release heap if necesary */ - if (new_brk < target_brk) { - /* empty remaining bytes in (possibly larger) host page */ - memset(g2h_untagged(new_brk), 0, new_host_brk_page - new_brk); - - /* free unused host pages and set new brk_page */ - target_munmap(new_host_brk_page, brk_page - new_host_brk_page); - brk_page = new_host_brk_page; + if (new_brk < old_brk) { + target_munmap(new_brk, old_brk - new_brk); target_brk = brk_val; return target_brk; } - /* We need to allocate more memory after the brk... Note that - * we don't use MAP_FIXED because that will map over the top of - * any existing mapping (like the one with the host libc or qemu - * itself); instead we treat "mapped but at wrong address" as - * a failure and unmap again. - */ - if (new_host_brk_page > brk_page) { - new_alloc_size = new_host_brk_page - brk_page; - mapped_addr = get_errno(target_mmap(brk_page, new_alloc_size, - PROT_READ|PROT_WRITE, - MAP_ANON|MAP_PRIVATE, 0, 0)); - } else { - new_alloc_size = 0; - mapped_addr = brk_page; - } - - if (mapped_addr == brk_page) { - /* Heap contents are initialized to zero, as for anonymous - * mapped pages. Technically the new pages are already - * initialized to zero since they *are* anonymous mapped - * pages, however we have to take care with the contents that - * come from the remaining part of the previous page: it may - * contains garbage data due to a previous heap usage (grown - * then shrunken). */ - memset(g2h_untagged(brk_page), 0, HOST_PAGE_ALIGN(brk_page) - brk_page); + mapped_addr = target_mmap(old_brk, new_brk - old_brk, + PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE, + -1, 0); + if (mapped_addr == old_brk) { target_brk = brk_val; - brk_page = new_host_brk_page; return target_brk; - } else if (mapped_addr != -1) { - /* Mapped but at wrong address, meaning there wasn't actually - * enough space for this brk. - */ - target_munmap(mapped_addr, new_alloc_size); - mapped_addr = -1; } #if defined(TARGET_ALPHA) @@ -5854,7 +5812,6 @@ static const bitmask_transtbl iflag_tbl[] = { { TARGET_IXOFF, TARGET_IXOFF, IXOFF, IXOFF }, { TARGET_IMAXBEL, TARGET_IMAXBEL, IMAXBEL, IMAXBEL }, { TARGET_IUTF8, TARGET_IUTF8, IUTF8, IUTF8}, - { 0, 0, 0, 0 } }; static const bitmask_transtbl oflag_tbl[] = { @@ -5882,7 +5839,6 @@ static const bitmask_transtbl oflag_tbl[] = { { TARGET_VTDLY, TARGET_VT1, VTDLY, VT1 }, { TARGET_FFDLY, TARGET_FF0, FFDLY, FF0 }, { TARGET_FFDLY, TARGET_FF1, FFDLY, FF1 }, - { 0, 0, 0, 0 } }; static const bitmask_transtbl cflag_tbl[] = { @@ -5917,7 +5873,6 @@ static const bitmask_transtbl cflag_tbl[] = { { TARGET_HUPCL, TARGET_HUPCL, HUPCL, HUPCL }, { TARGET_CLOCAL, TARGET_CLOCAL, CLOCAL, CLOCAL }, { TARGET_CRTSCTS, TARGET_CRTSCTS, CRTSCTS, CRTSCTS }, - { 0, 0, 0, 0 } }; static const bitmask_transtbl lflag_tbl[] = { @@ -5937,7 +5892,6 @@ static const bitmask_transtbl lflag_tbl[] = { { TARGET_PENDIN, TARGET_PENDIN, PENDIN, PENDIN }, { TARGET_IEXTEN, TARGET_IEXTEN, IEXTEN, IEXTEN }, { TARGET_EXTPROC, TARGET_EXTPROC, EXTPROC, EXTPROC}, - { 0, 0, 0, 0 } }; static void target_to_host_termios (void *dst, const void *src) @@ -6026,10 +5980,6 @@ static const StructEntry struct_termios_def = { #endif static const bitmask_transtbl mmap_flags_tbl[] = { - { TARGET_MAP_TYPE, TARGET_MAP_SHARED, MAP_TYPE, MAP_SHARED }, - { TARGET_MAP_TYPE, TARGET_MAP_PRIVATE, MAP_TYPE, MAP_PRIVATE }, - { TARGET_MAP_TYPE, TARGET_MAP_SHARED_VALIDATE, - MAP_TYPE, MAP_SHARED_VALIDATE }, { TARGET_MAP_FIXED, TARGET_MAP_FIXED, MAP_FIXED, MAP_FIXED }, { TARGET_MAP_ANONYMOUS, TARGET_MAP_ANONYMOUS, MAP_ANONYMOUS, MAP_ANONYMOUS }, @@ -6047,17 +5997,84 @@ static const bitmask_transtbl mmap_flags_tbl[] = { Recognize it for the target insofar as we do not want to pass it through to the host. */ { TARGET_MAP_STACK, TARGET_MAP_STACK, 0, 0 }, - { TARGET_MAP_SYNC, TARGET_MAP_SYNC, MAP_SYNC, MAP_SYNC }, { TARGET_MAP_NONBLOCK, TARGET_MAP_NONBLOCK, MAP_NONBLOCK, MAP_NONBLOCK }, { TARGET_MAP_POPULATE, TARGET_MAP_POPULATE, MAP_POPULATE, MAP_POPULATE }, { TARGET_MAP_FIXED_NOREPLACE, TARGET_MAP_FIXED_NOREPLACE, MAP_FIXED_NOREPLACE, MAP_FIXED_NOREPLACE }, { TARGET_MAP_UNINITIALIZED, TARGET_MAP_UNINITIALIZED, MAP_UNINITIALIZED, MAP_UNINITIALIZED }, - { 0, 0, 0, 0 } }; /* + * Arrange for legacy / undefined architecture specific flags to be + * ignored by mmap handling code. + */ +#ifndef TARGET_MAP_32BIT +#define TARGET_MAP_32BIT 0 +#endif +#ifndef TARGET_MAP_HUGE_2MB +#define TARGET_MAP_HUGE_2MB 0 +#endif +#ifndef TARGET_MAP_HUGE_1GB +#define TARGET_MAP_HUGE_1GB 0 +#endif + +static abi_long do_mmap(abi_ulong addr, abi_ulong len, int prot, + int target_flags, int fd, off_t offset) +{ + /* + * The historical set of flags that all mmap types implicitly support. + */ + enum { + TARGET_LEGACY_MAP_MASK = TARGET_MAP_SHARED + | TARGET_MAP_PRIVATE + | TARGET_MAP_FIXED + | TARGET_MAP_ANONYMOUS + | TARGET_MAP_DENYWRITE + | TARGET_MAP_EXECUTABLE + | TARGET_MAP_UNINITIALIZED + | TARGET_MAP_GROWSDOWN + | TARGET_MAP_LOCKED + | TARGET_MAP_NORESERVE + | TARGET_MAP_POPULATE + | TARGET_MAP_NONBLOCK + | TARGET_MAP_STACK + | TARGET_MAP_HUGETLB + | TARGET_MAP_32BIT + | TARGET_MAP_HUGE_2MB + | TARGET_MAP_HUGE_1GB + }; + int host_flags; + + switch (target_flags & TARGET_MAP_TYPE) { + case TARGET_MAP_PRIVATE: + host_flags = MAP_PRIVATE; + break; + case TARGET_MAP_SHARED: + host_flags = MAP_SHARED; + break; + case TARGET_MAP_SHARED_VALIDATE: + /* + * MAP_SYNC is only supported for MAP_SHARED_VALIDATE, and is + * therefore omitted from mmap_flags_tbl and TARGET_LEGACY_MAP_MASK. + */ + if (target_flags & ~(TARGET_LEGACY_MAP_MASK | TARGET_MAP_SYNC)) { + return -TARGET_EOPNOTSUPP; + } + host_flags = MAP_SHARED_VALIDATE; + if (target_flags & TARGET_MAP_SYNC) { + host_flags |= MAP_SYNC; + } + break; + default: + return -TARGET_EINVAL; + } + host_flags |= target_to_host_bitmask(target_flags, mmap_flags_tbl); + + return get_errno(target_mmap(addr, len, prot, host_flags, fd, offset)); +} + +/* * NOTE: TARGET_ABI32 is defined for TARGET_I386 (but not for TARGET_X86_64) * TARGET_I386 is defined if TARGET_X86_64 is defined */ @@ -8111,16 +8128,17 @@ static int open_self_maps_1(CPUArchState *cpu_env, int fd, bool smaps) { CPUState *cpu = env_cpu(cpu_env); TaskState *ts = cpu->opaque; - GSList *map_info = read_self_maps(); - GSList *s; + IntervalTreeRoot *map_info = read_self_maps(); + IntervalTreeNode *s; int count; - for (s = map_info; s; s = g_slist_next(s)) { - MapInfo *e = (MapInfo *) s->data; + for (s = interval_tree_iter_first(map_info, 0, -1); s; + s = interval_tree_iter_next(s, 0, -1)) { + MapInfo *e = container_of(s, MapInfo, itree); - if (h2g_valid(e->start)) { - unsigned long min = e->start; - unsigned long max = e->end; + if (h2g_valid(e->itree.start)) { + unsigned long min = e->itree.start; + unsigned long max = e->itree.last + 1; int flags = page_get_flags(h2g(min)); const char *path; @@ -8539,9 +8557,12 @@ static int open_hardware(CPUArchState *cpu_env, int fd) } #endif -int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, + +int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, int flags, mode_t mode, bool safe) { + g_autofree char *proc_name = NULL; + const char *pathname; struct fake_open { const char *filename; int (*fill)(CPUArchState *cpu_env, int fd); @@ -8567,6 +8588,14 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, { NULL, NULL, NULL } }; + /* if this is a file from /proc/ filesystem, expand full name */ + proc_name = realpath(fname, NULL); + if (proc_name && strncmp(proc_name, "/proc/", 6) == 0) { + pathname = proc_name; + } else { + pathname = fname; + } + if (is_proc_myself(pathname, "exe")) { if (safe) { return safe_openat(dirfd, exec_path, flags, mode); @@ -10576,28 +10605,20 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, v5 = tswapal(v[4]); v6 = tswapal(v[5]); unlock_user(v, arg1, 0); - ret = get_errno(target_mmap(v1, v2, v3, - target_to_host_bitmask(v4, mmap_flags_tbl), - v5, v6)); + return do_mmap(v1, v2, v3, v4, v5, v6); } #else /* mmap pointers are always untagged */ - ret = get_errno(target_mmap(arg1, arg2, arg3, - target_to_host_bitmask(arg4, mmap_flags_tbl), - arg5, - arg6)); + return do_mmap(arg1, arg2, arg3, arg4, arg5, arg6); #endif - return ret; #endif #ifdef TARGET_NR_mmap2 case TARGET_NR_mmap2: #ifndef MMAP_SHIFT #define MMAP_SHIFT 12 #endif - ret = target_mmap(arg1, arg2, arg3, - target_to_host_bitmask(arg4, mmap_flags_tbl), - arg5, (off_t)(abi_ulong)arg6 << MMAP_SHIFT); - return get_errno(ret); + return do_mmap(arg1, arg2, arg3, arg4, arg5, + (off_t)(abi_ulong)arg6 << MMAP_SHIFT); #endif case TARGET_NR_munmap: arg1 = cpu_untagged_addr(cpu, arg1); diff --git a/linux-user/thunk.c b/linux-user/thunk.c index dac4bf11c6..071aad4b5f 100644 --- a/linux-user/thunk.c +++ b/linux-user/thunk.c @@ -436,29 +436,29 @@ const argtype *thunk_print(void *arg, const argtype *type_ptr) /* Utility function: Table-driven functions to translate bitmasks * between host and target formats */ -unsigned int target_to_host_bitmask(unsigned int target_mask, - const bitmask_transtbl * trans_tbl) +unsigned int target_to_host_bitmask_len(unsigned int target_mask, + const bitmask_transtbl *tbl, + size_t len) { - const bitmask_transtbl *btp; unsigned int host_mask = 0; - for (btp = trans_tbl; btp->target_mask && btp->host_mask; btp++) { - if ((target_mask & btp->target_mask) == btp->target_bits) { - host_mask |= btp->host_bits; + for (size_t i = 0; i < len; ++i) { + if ((target_mask & tbl[i].target_mask) == tbl[i].target_bits) { + host_mask |= tbl[i].host_bits; } } return host_mask; } -unsigned int host_to_target_bitmask(unsigned int host_mask, - const bitmask_transtbl * trans_tbl) +unsigned int host_to_target_bitmask_len(unsigned int host_mask, + const bitmask_transtbl *tbl, + size_t len) { - const bitmask_transtbl *btp; unsigned int target_mask = 0; - for (btp = trans_tbl; btp->target_mask && btp->host_mask; btp++) { - if ((host_mask & btp->host_mask) == btp->host_bits) { - target_mask |= btp->target_bits; + for (size_t i = 0; i < len; ++i) { + if ((host_mask & tbl[i].host_mask) == tbl[i].host_bits) { + target_mask |= tbl[i].target_bits; } } return target_mask; diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h index 3fc986f92f..0f4883eb57 100644 --- a/linux-user/user-mmap.h +++ b/linux-user/user-mmap.h @@ -18,6 +18,34 @@ #ifndef LINUX_USER_USER_MMAP_H #define LINUX_USER_USER_MMAP_H +/* + * Guest parameters for the ADDR_COMPAT_LAYOUT personality + * (at present this is the only layout supported by QEMU). + * + * TASK_UNMAPPED_BASE: For mmap without hint (addr != 0), the search + * for unused virtual memory begins at TASK_UNMAPPED_BASE. + * + * ELF_ET_DYN_BASE: When the executable is ET_DYN (i.e. PIE), and requires + * an interpreter (i.e. not -static-pie), use ELF_ET_DYN_BASE instead of + * TASK_UNMAPPED_BASE for selecting the address of the executable. + * This provides some distance between the executable and the interpreter, + * which allows the initial brk to be placed immediately after the + * executable and also have room to grow. + * + * task_unmapped_base, elf_et_dyn_base: When the guest address space is + * limited via -R, the values of TASK_UNMAPPED_BASE and ELF_ET_DYN_BASE + * must be adjusted to fit. + */ +extern abi_ulong task_unmapped_base; +extern abi_ulong elf_et_dyn_base; + +/* + * mmap_next_start: The base address for the next mmap without hint, + * increased after each successful map, starting at task_unmapped_base. + * This is an optimization within QEMU and not part of ADDR_COMPAT_LAYOUT. + */ +extern abi_ulong mmap_next_start; + int target_mprotect(abi_ulong start, abi_ulong len, int prot); abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, int flags, int fd, off_t offset); @@ -26,8 +54,6 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, abi_ulong new_size, unsigned long flags, abi_ulong new_addr); abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice); -extern unsigned long last_brk; -extern abi_ulong mmap_next_start; abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong); void mmap_fork_start(void); void mmap_fork_end(int child); diff --git a/linux-user/x86_64/target_mman.h b/linux-user/x86_64/target_mman.h index e7ba6070fe..48fbf20b42 100644 --- a/linux-user/x86_64/target_mman.h +++ b/linux-user/x86_64/target_mman.h @@ -1 +1,16 @@ +/* + * arch/x86/include/asm/processor.h: + * TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) + * __TASK_UNMAPPED_BASE(S) PAGE_ALIGN(S / 3) + * + * arch/x86/include/asm/page_64_types.h: + * TASK_SIZE_LOW DEFAULT_MAP_WINDOW + * DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) + */ +#define TASK_UNMAPPED_BASE \ + TARGET_PAGE_ALIGN((1ull << TARGET_VIRT_ADDR_SPACE_BITS) / 3) + +/* arch/x86/include/asm/elf.h */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE * 2) + #include "../generic/target_mman.h" diff --git a/linux-user/xtensa/target_mman.h b/linux-user/xtensa/target_mman.h index 3933771b5b..8fa6337a97 100644 --- a/linux-user/xtensa/target_mman.h +++ b/linux-user/xtensa/target_mman.h @@ -14,6 +14,16 @@ #define TARGET_MAP_STACK 0x40000 #define TARGET_MAP_HUGETLB 0x80000 +/* + * arch/xtensa/include/asm/processor.h: + * TASK_UNMAPPED_BASE (TASK_SIZE / 2) + */ +#define TASK_UNMAPPED_BASE (1u << (TARGET_VIRT_ADDR_SPACE_BITS - 1)) + +/* arch/xtensa/include/asm/elf.h */ +#define ELF_ET_DYN_BASE \ + TARGET_PAGE_ALIGN((1u << TARGET_VIRT_ADDR_SPACE_BITS) / 3) + #include "../generic/target_mman.h" #endif diff --git a/stubs/colo.c b/stubs/colo.c index f33379d0fd..08c9f982d5 100644 --- a/stubs/colo.c +++ b/stubs/colo.c @@ -21,7 +21,7 @@ void colo_checkpoint_delay_set(void) void migrate_start_colo_process(MigrationState *s) { - error_report("Impossible happend: trying to start COLO when COLO " + error_report("Impossible happened: trying to start COLO when COLO " "module is not built in"); abort(); } diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h index 9fe79b1242..75c5c0ccf7 100644 --- a/target/hppa/cpu.h +++ b/target/hppa/cpu.h @@ -168,6 +168,9 @@ typedef struct { } hppa_tlb_entry; typedef struct CPUArchState { + target_ureg iaoq_f; /* front */ + target_ureg iaoq_b; /* back, aka next instruction */ + target_ureg gr[32]; uint64_t fr[32]; uint64_t sr[8]; /* stored shifted into place for gva */ @@ -186,8 +189,6 @@ typedef struct CPUArchState { target_ureg psw_cb; /* in least significant bit of next nibble */ target_ureg psw_cb_msb; /* boolean */ - target_ureg iaoq_f; /* front */ - target_ureg iaoq_b; /* back, aka next instruction */ uint64_t iasq_f; uint64_t iasq_b; diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 46afd9960b..8f93a239dd 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -1803,16 +1803,18 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) } break; - case X86_SPECIAL_MMX: - if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) { - gen_helper_enter_mmx(cpu_env); - } + default: break; } if (!validate_vex(s, &decode)) { return; } + if (decode.e.special == X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) { + gen_helper_enter_mmx(cpu_env); + } + if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) { gen_load_ea(s, &decode.mem, decode.e.vex_class == 12); } diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c index 3887812810..d4fdb8ce8e 100644 --- a/target/openrisc/interrupt.c +++ b/target/openrisc/interrupt.c @@ -34,9 +34,7 @@ void openrisc_cpu_do_interrupt(CPUState *cs) int exception = cs->exception_index; env->epcr = env->pc; - if (exception == EXCP_SYSCALL) { - env->epcr += 4; - } + /* When we have an illegal instruction the error effective address shall be set to the illegal instruction address. */ if (exception == EXCP_ILLEGAL) { @@ -63,6 +61,9 @@ void openrisc_cpu_do_interrupt(CPUState *cs) env->epcr -= 4; } else { env->sr &= ~SR_DSX; + if (exception == EXCP_SYSCALL || exception == EXCP_FPE) { + env->epcr += 4; + } } if (exception > 0 && exception < EXCP_NR) { diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 003805b202..9aa8e46566 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -2685,6 +2685,12 @@ void helper_pminsn(CPUPPCState *env, uint32_t insn) env->resume_as_sreset = (insn != PPC_PM_STOP) || (env->spr[SPR_PSSCR] & PSSCR_EC); + /* HDECR is not to wake from PM state, it may have already fired */ + if (env->resume_as_sreset) { + PowerPCCPU *cpu = env_archcpu(env); + ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0); + } + ppc_maybe_interrupt(env); } #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index 900f906990..d645c0bb94 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -770,7 +770,8 @@ static bool ppc_hash64_use_vrma(CPUPPCState *env) } } -static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code) +static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t slb_vsid, + uint64_t error_code) { CPUPPCState *env = &POWERPC_CPU(cs)->env; bool vpm; @@ -782,13 +783,15 @@ static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code) } if (vpm && !mmuidx_hv(mmu_idx)) { cs->exception_index = POWERPC_EXCP_HISI; + env->spr[SPR_ASDR] = slb_vsid; } else { cs->exception_index = POWERPC_EXCP_ISI; } env->error_code = error_code; } -static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t dsisr) +static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t slb_vsid, + uint64_t dar, uint64_t dsisr) { CPUPPCState *env = &POWERPC_CPU(cs)->env; bool vpm; @@ -802,6 +805,7 @@ static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t cs->exception_index = POWERPC_EXCP_HDSI; env->spr[SPR_HDAR] = dar; env->spr[SPR_HDSISR] = dsisr; + env->spr[SPR_ASDR] = slb_vsid; } else { cs->exception_index = POWERPC_EXCP_DSI; env->spr[SPR_DAR] = dar; @@ -870,12 +874,46 @@ static target_ulong rmls_limit(PowerPCCPU *cpu) return rma_sizes[rmls]; } -static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb) +/* Return the LLP in SLB_VSID format */ +static uint64_t get_vrma_llp(PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; - target_ulong lpcr = env->spr[SPR_LPCR]; - uint32_t vrmasd = (lpcr & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT; - target_ulong vsid = SLB_VSID_VRMA | ((vrmasd << 4) & SLB_VSID_LLP_MASK); + uint64_t llp; + + if (env->mmu_model == POWERPC_MMU_3_00) { + ppc_v3_pate_t pate; + uint64_t ps, l, lp; + + /* + * ISA v3.0 removes the LPCR[VRMASD] field and puts the VRMA base + * page size (L||LP equivalent) in the PS field in the HPT partition + * table entry. + */ + if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) { + error_report("Bad VRMA with no partition table entry"); + return 0; + } + ps = PATE0_GET_PS(pate.dw0); + /* PS has L||LP in 3 consecutive bits, put them into SLB LLP format */ + l = (ps >> 2) & 0x1; + lp = ps & 0x3; + llp = (l << SLB_VSID_L_SHIFT) | (lp << SLB_VSID_LP_SHIFT); + + } else { + uint64_t lpcr = env->spr[SPR_LPCR]; + target_ulong vrmasd = (lpcr & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT; + + /* VRMASD LLP matches SLB format, just shift and mask it */ + llp = (vrmasd << SLB_VSID_LP_SHIFT) & SLB_VSID_LLP_MASK; + } + + return llp; +} + +static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb) +{ + uint64_t llp = get_vrma_llp(cpu); + target_ulong vsid = SLB_VSID_VRMA | llp; int i; for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { @@ -893,8 +931,7 @@ static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb) } } - error_report("Bad page size encoding in LPCR[VRMASD]; LPCR=0x" - TARGET_FMT_lx, lpcr); + error_report("Bad VRMA page size encoding 0x" TARGET_FMT_lx, llp); return -1; } @@ -963,13 +1000,13 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, } switch (access_type) { case MMU_INST_FETCH: - ppc_hash64_set_isi(cs, mmu_idx, SRR1_PROTFAULT); + ppc_hash64_set_isi(cs, mmu_idx, 0, SRR1_PROTFAULT); break; case MMU_DATA_LOAD: - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_PROTFAULT); + ppc_hash64_set_dsi(cs, mmu_idx, 0, eaddr, DSISR_PROTFAULT); break; case MMU_DATA_STORE: - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, + ppc_hash64_set_dsi(cs, mmu_idx, 0, eaddr, DSISR_PROTFAULT | DSISR_ISSTORE); break; default: @@ -1022,7 +1059,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, /* 3. Check for segment level no-execute violation */ if (access_type == MMU_INST_FETCH && (slb->vsid & SLB_VSID_N)) { if (guest_visible) { - ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOEXEC_GUARD); + ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, SRR1_NOEXEC_GUARD); } return false; } @@ -1035,13 +1072,14 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, } switch (access_type) { case MMU_INST_FETCH: - ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOPTE); + ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, SRR1_NOPTE); break; case MMU_DATA_LOAD: - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE); + ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr, DSISR_NOPTE); break; case MMU_DATA_STORE: - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE | DSISR_ISSTORE); + ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr, + DSISR_NOPTE | DSISR_ISSTORE); break; default: g_assert_not_reached(); @@ -1075,7 +1113,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, if (PAGE_EXEC & ~amr_prot) { srr1 |= SRR1_IAMR; /* Access violates virt pg class key prot */ } - ppc_hash64_set_isi(cs, mmu_idx, srr1); + ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, srr1); } else { int dsisr = 0; if (need_prot & ~pp_prot) { @@ -1087,7 +1125,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, if (need_prot & ~amr_prot) { dsisr |= DSISR_AMR; } - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, dsisr); + ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr, dsisr); } return false; } diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h index 1496955d38..de653fcae5 100644 --- a/target/ppc/mmu-hash64.h +++ b/target/ppc/mmu-hash64.h @@ -41,8 +41,10 @@ void ppc_hash64_finalize(PowerPCCPU *cpu); #define SLB_VSID_KP 0x0000000000000400ULL #define SLB_VSID_N 0x0000000000000200ULL /* no-execute */ #define SLB_VSID_L 0x0000000000000100ULL +#define SLB_VSID_L_SHIFT PPC_BIT_NR(55) #define SLB_VSID_C 0x0000000000000080ULL /* class */ #define SLB_VSID_LP 0x0000000000000030ULL +#define SLB_VSID_LP_SHIFT PPC_BIT_NR(59) #define SLB_VSID_ATTR 0x0000000000000FFFULL #define SLB_VSID_LLP_MASK (SLB_VSID_L | SLB_VSID_LP) #define SLB_VSID_4K 0x0000000000000000ULL @@ -58,6 +60,9 @@ void ppc_hash64_finalize(PowerPCCPU *cpu); #define SDR_64_HTABSIZE 0x000000000000001FULL #define PATE0_HTABORG 0x0FFFFFFFFFFC0000ULL +#define PATE0_PS PPC_BITMASK(56, 58) +#define PATE0_GET_PS(dw0) (((dw0) & PATE0_PS) >> PPC_BIT_NR(58)) + #define HPTES_PER_GROUP 8 #define HASH_PTE_SIZE_64 16 #define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP) diff --git a/tests/guest-debug/run-test.py b/tests/guest-debug/run-test.py index de6106a5e5..a032e01f79 100755 --- a/tests/guest-debug/run-test.py +++ b/tests/guest-debug/run-test.py @@ -69,13 +69,10 @@ if __name__ == '__main__': # Launch QEMU with binary if "system" in args.qemu: - cmd = "%s %s %s -gdb unix:path=%s,server=on" % (args.qemu, - args.qargs, - args.binary, - socket_name) + cmd = f'{args.qemu} {args.qargs} {args.binary}' \ + f' -S -gdb unix:path={socket_name},server=on' else: - cmd = "%s %s -g %s %s" % (args.qemu, args.qargs, socket_name, - args.binary) + cmd = f'{args.qemu} {args.qargs} -g {socket_name} {args.binary}' log(output, "QEMU CMD: %s" % (cmd)) inferior = subprocess.Popen(shlex.split(cmd)) diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index 617f821613..681dfa077c 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -9,7 +9,7 @@ AARCH64_SRC=$(SRC_PATH)/tests/tcg/aarch64 VPATH += $(AARCH64_SRC) # Base architecture tests -AARCH64_TESTS=fcvt pcalign-a64 +AARCH64_TESTS=fcvt pcalign-a64 lse2-fault fcvt: LDFLAGS+=-lm diff --git a/tests/tcg/aarch64/lse2-fault.c b/tests/tcg/aarch64/lse2-fault.c new file mode 100644 index 0000000000..2187219a08 --- /dev/null +++ b/tests/tcg/aarch64/lse2-fault.c @@ -0,0 +1,38 @@ +#include <sys/mman.h> +#include <sys/shm.h> +#include <unistd.h> +#include <stdio.h> + +int main() +{ + int psize = getpagesize(); + int id; + void *p; + + /* + * We need a shared mapping to enter CF_PARALLEL mode. + * The easiest way to get that is shmat. + */ + id = shmget(IPC_PRIVATE, 2 * psize, IPC_CREAT | 0600); + if (id < 0) { + perror("shmget"); + return 2; + } + p = shmat(id, NULL, 0); + if (p == MAP_FAILED) { + perror("shmat"); + return 2; + } + + /* Protect the second page. */ + if (mprotect(p + psize, psize, PROT_NONE) < 0) { + perror("mprotect"); + return 2; + } + + /* + * Load 4 bytes, 6 bytes from the end of the page. + * On success this will load 0 from the newly allocated shm. + */ + return *(int *)(p + psize - 6); +} diff --git a/tests/tcg/multiarch/gdbstub/test-proc-mappings.py b/tests/tcg/multiarch/gdbstub/test-proc-mappings.py index 7b596ac21b..5e3e5a2fb7 100644 --- a/tests/tcg/multiarch/gdbstub/test-proc-mappings.py +++ b/tests/tcg/multiarch/gdbstub/test-proc-mappings.py @@ -33,7 +33,8 @@ def run_test(): return raise report(isinstance(mappings, str), "Fetched the mappings from the inferior") - report("/sha1" in mappings, "Found the test binary name in the mappings") + # Broken with host page size > guest page size + # report("/sha1" in mappings, "Found the test binary name in the mappings") def main(): diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c index 4c29ac10d0..a1060fd80f 100644 --- a/ui/gtk-egl.c +++ b/ui/gtk-egl.c @@ -246,6 +246,7 @@ void gd_egl_scanout_texture(DisplayChangeListener *dcl, eglMakeCurrent(qemu_egl_display, vc->gfx.esurface, vc->gfx.esurface, vc->gfx.ectx); + gtk_egl_set_scanout_mode(vc, true); egl_fb_setup_for_tex(&vc->gfx.guest_fb, backing_width, backing_height, backing_id, false); } diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c index 1ce34a249e..52dcac161e 100644 --- a/ui/gtk-gl-area.c +++ b/ui/gtk-gl-area.c @@ -268,6 +268,7 @@ void gd_gl_area_scanout_texture(DisplayChangeListener *dcl, return; } + gtk_gl_area_set_scanout_mode(vc, true); egl_fb_setup_for_tex(&vc->gfx.guest_fb, backing_width, backing_height, backing_id, false); } diff --git a/util/interval-tree.c b/util/interval-tree.c index f2866aa7d3..53465182e6 100644 --- a/util/interval-tree.c +++ b/util/interval-tree.c @@ -797,7 +797,7 @@ IntervalTreeNode *interval_tree_iter_first(IntervalTreeRoot *root, { IntervalTreeNode *node, *leftmost; - if (!root->rb_root.rb_node) { + if (!root || !root->rb_root.rb_node) { return NULL; } diff --git a/util/selfmap.c b/util/selfmap.c index 2c14f019ce..4db5b42651 100644 --- a/util/selfmap.c +++ b/util/selfmap.c @@ -10,74 +10,98 @@ #include "qemu/cutils.h" #include "qemu/selfmap.h" -GSList *read_self_maps(void) +IntervalTreeRoot *read_self_maps(void) { - gchar *maps; - GSList *map_info = NULL; + IntervalTreeRoot *root; + gchar *maps, **lines; + guint i, nlines; - if (g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) { - gchar **lines = g_strsplit(maps, "\n", 0); - int i, entries = g_strv_length(lines); + if (!g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) { + return NULL; + } + + root = g_new0(IntervalTreeRoot, 1); + lines = g_strsplit(maps, "\n", 0); + nlines = g_strv_length(lines); + + for (i = 0; i < nlines; i++) { + gchar **fields = g_strsplit(lines[i], " ", 6); + guint nfields = g_strv_length(fields); + + if (nfields > 4) { + uint64_t start, end, offset, inode; + int errors = 0; + const char *p; - for (i = 0; i < entries; i++) { - gchar **fields = g_strsplit(lines[i], " ", 6); - if (g_strv_length(fields) > 4) { - MapInfo *e = g_new0(MapInfo, 1); - int errors = 0; - const char *end; + errors |= qemu_strtou64(fields[0], &p, 16, &start); + errors |= qemu_strtou64(p + 1, NULL, 16, &end); + errors |= qemu_strtou64(fields[2], NULL, 16, &offset); + errors |= qemu_strtou64(fields[4], NULL, 10, &inode); + + if (!errors) { + size_t dev_len, path_len; + MapInfo *e; + + dev_len = strlen(fields[3]) + 1; + if (nfields == 6) { + p = fields[5]; + p += strspn(p, " "); + path_len = strlen(p) + 1; + } else { + p = NULL; + path_len = 0; + } - errors |= qemu_strtoul(fields[0], &end, 16, &e->start); - errors |= qemu_strtoul(end + 1, NULL, 16, &e->end); + e = g_malloc0(sizeof(*e) + dev_len + path_len); + + e->itree.start = start; + e->itree.last = end - 1; + e->offset = offset; + e->inode = inode; e->is_read = fields[1][0] == 'r'; e->is_write = fields[1][1] == 'w'; e->is_exec = fields[1][2] == 'x'; e->is_priv = fields[1][3] == 'p'; - errors |= qemu_strtoul(fields[2], NULL, 16, &e->offset); - e->dev = g_strdup(fields[3]); - errors |= qemu_strtou64(fields[4], NULL, 10, &e->inode); - - if (!errors) { - /* - * The last field may have leading spaces which we - * need to strip. - */ - if (g_strv_length(fields) == 6) { - e->path = g_strdup(g_strchug(fields[5])); - } - map_info = g_slist_prepend(map_info, e); - } else { - g_free(e->dev); - g_free(e); + memcpy(e->dev, fields[3], dev_len); + if (path_len) { + e->path = memcpy(e->dev + dev_len, p, path_len); } - } - g_strfreev(fields); + interval_tree_insert(&e->itree, root); + } } - g_strfreev(lines); - g_free(maps); + g_strfreev(fields); } + g_strfreev(lines); + g_free(maps); - /* ensure the map data is in the same order we collected it */ - return g_slist_reverse(map_info); + return root; } /** * free_self_maps: - * @info: a GSlist + * @root: an interval tree * - * Free a list of MapInfo structures. + * Free a tree of MapInfo structures. + * Since we allocated each MapInfo in one chunk, we need not consider the + * contents and can simply free each RBNode. */ -static void free_info(gpointer data) + +static void free_rbnode(RBNode *n) { - MapInfo *e = (MapInfo *) data; - g_free(e->dev); - g_free(e->path); - g_free(e); + if (n) { + free_rbnode(n->rb_left); + free_rbnode(n->rb_right); + g_free(n); + } } -void free_self_maps(GSList *info) +void free_self_maps(IntervalTreeRoot *root) { - g_slist_free_full(info, &free_info); + if (root) { + free_rbnode(root->rb_root.rb_node); + g_free(root); + } } |