From c6cc059eca18d9f6e4e26bb8b6d1135ddb35d81a Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:49 +0900 Subject: linux-user: Do not call get_errno() in do_brk() Later the returned value is compared with -1, and negated errno is not expected. Fixes: 00faf08c95 ("linux-user: Don't use MAP_FIXED in do_brk()") Reviewed-by: Helge Deller Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-4-akihiko.odaki@daynix.com> Signed-off-by: Richard Henderson --- linux-user/syscall.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 95727a816a..b08276bdf8 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -862,9 +862,9 @@ abi_long do_brk(abi_ulong brk_val) */ if (new_host_brk_page > brk_page) { new_alloc_size = new_host_brk_page - brk_page; - mapped_addr = get_errno(target_mmap(brk_page, new_alloc_size, - PROT_READ|PROT_WRITE, - MAP_ANON|MAP_PRIVATE, 0, 0)); + mapped_addr = target_mmap(brk_page, new_alloc_size, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); } else { new_alloc_size = 0; mapped_addr = brk_page; -- cgit 1.4.1 From e69e032d1a8ee8d754ca119009a3c2c997f8bb30 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:50 +0900 Subject: linux-user: Use MAP_FIXED_NOREPLACE for do_brk() MAP_FIXED_NOREPLACE can ensure the mapped address is fixed without concerning that the new mapping overwrites something else. Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-5-akihiko.odaki@daynix.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- linux-user/syscall.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index b08276bdf8..f64024273f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -854,17 +854,12 @@ abi_long do_brk(abi_ulong brk_val) return target_brk; } - /* We need to allocate more memory after the brk... Note that - * we don't use MAP_FIXED because that will map over the top of - * any existing mapping (like the one with the host libc or qemu - * itself); instead we treat "mapped but at wrong address" as - * a failure and unmap again. - */ if (new_host_brk_page > brk_page) { new_alloc_size = new_host_brk_page - brk_page; mapped_addr = target_mmap(brk_page, new_alloc_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE, + -1, 0); } else { new_alloc_size = 0; mapped_addr = brk_page; @@ -883,12 +878,6 @@ abi_long do_brk(abi_ulong brk_val) target_brk = brk_val; brk_page = new_host_brk_page; return target_brk; - } else if (mapped_addr != -1) { - /* Mapped but at wrong address, meaning there wasn't actually - * enough space for this brk. - */ - target_munmap(mapped_addr, new_alloc_size); - mapped_addr = -1; } #if defined(TARGET_ALPHA) -- cgit 1.4.1 From cb9d5d1fda0bc2312fc0c779b4ea1d7bf826f31f Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:51 +0900 Subject: linux-user: Do nothing if too small brk is specified Linux 6.4.7 does nothing when a value smaller than the initial brk is specified. Fixes: 86f04735ac ("linux-user: Fix brk() to release pages") Reviewed-by: Helge Deller Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-6-akihiko.odaki@daynix.com> Signed-off-by: Richard Henderson --- linux-user/syscall.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index f64024273f..e1436a3962 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -820,14 +820,14 @@ abi_long do_brk(abi_ulong brk_val) /* brk pointers are always untagged */ - /* return old brk value if brk_val unchanged or zero */ - if (!brk_val || brk_val == target_brk) { + /* return old brk value if brk_val unchanged */ + if (brk_val == target_brk) { return target_brk; } /* do not allow to shrink below initial brk value */ if (brk_val < initial_target_brk) { - brk_val = initial_target_brk; + return target_brk; } new_brk = TARGET_PAGE_ALIGN(brk_val); -- cgit 1.4.1 From 2aea137a425a87b930a33590177b04368fd7cc12 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 2 Aug 2023 16:17:52 +0900 Subject: linux-user: Do not align brk with host page size do_brk() minimizes calls into target_mmap() by aligning the address with host page size, which is potentially larger than the target page size. However, the current implementation of this optimization has two bugs: - The start of brk is rounded up with the host page size while brk advertises an address aligned with the target page size as the beginning of brk. This makes the beginning of brk unmapped. - Content clearing after mapping is flawed. The size to clear is specified as HOST_PAGE_ALIGN(brk_page) - brk_page, but brk_page is aligned with the host page size so it is always zero. This optimization actually has no practical benefit. It makes difference when brk() is called multiple times with values in a range of the host page size. However, sophisticated memory allocators try to avoid to make such frequent brk() calls. For example, glibc 2.37 calls brk() to shrink the heap only when there is a room more than 128 KiB. It is rare to have a page size larger than 128 KiB if it happens. Let's remove the optimization to fix the bugs and make the code simpler. Fixes: 86f04735ac ("linux-user: Fix brk() to release pages") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1616 Signed-off-by: Akihiko Odaki Message-Id: <20230802071754.14876-7-akihiko.odaki@daynix.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- linux-user/elfload.c | 4 ++-- linux-user/syscall.c | 54 ++++++++++++---------------------------------------- 2 files changed, 14 insertions(+), 44 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/elfload.c b/linux-user/elfload.c index a299ba7300..36e4026f05 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -3679,8 +3679,8 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) * to mmap pages in this space. */ if (info->reserve_brk) { - abi_ulong start_brk = HOST_PAGE_ALIGN(info->brk); - abi_ulong end_brk = HOST_PAGE_ALIGN(info->brk + info->reserve_brk); + abi_ulong start_brk = TARGET_PAGE_ALIGN(info->brk); + abi_ulong end_brk = TARGET_PAGE_ALIGN(info->brk + info->reserve_brk); target_munmap(start_brk, end_brk - start_brk); } diff --git a/linux-user/syscall.c b/linux-user/syscall.c index e1436a3962..7c2c2f6e2f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -802,81 +802,51 @@ static inline int host_to_target_sock_type(int host_type) } static abi_ulong target_brk, initial_target_brk; -static abi_ulong brk_page; void target_set_brk(abi_ulong new_brk) { target_brk = TARGET_PAGE_ALIGN(new_brk); initial_target_brk = target_brk; - brk_page = HOST_PAGE_ALIGN(target_brk); } /* do_brk() must return target values and target errnos. */ abi_long do_brk(abi_ulong brk_val) { abi_long mapped_addr; - abi_ulong new_alloc_size; - abi_ulong new_brk, new_host_brk_page; + abi_ulong new_brk; + abi_ulong old_brk; /* brk pointers are always untagged */ - /* return old brk value if brk_val unchanged */ - if (brk_val == target_brk) { - return target_brk; - } - /* do not allow to shrink below initial brk value */ if (brk_val < initial_target_brk) { return target_brk; } new_brk = TARGET_PAGE_ALIGN(brk_val); - new_host_brk_page = HOST_PAGE_ALIGN(brk_val); + old_brk = TARGET_PAGE_ALIGN(target_brk); - /* brk_val and old target_brk might be on the same page */ - if (new_brk == TARGET_PAGE_ALIGN(target_brk)) { - /* empty remaining bytes in (possibly larger) host page */ - memset(g2h_untagged(new_brk), 0, new_host_brk_page - new_brk); + /* new and old target_brk might be on the same page */ + if (new_brk == old_brk) { target_brk = brk_val; return target_brk; } /* Release heap if necesary */ - if (new_brk < target_brk) { - /* empty remaining bytes in (possibly larger) host page */ - memset(g2h_untagged(new_brk), 0, new_host_brk_page - new_brk); - - /* free unused host pages and set new brk_page */ - target_munmap(new_host_brk_page, brk_page - new_host_brk_page); - brk_page = new_host_brk_page; + if (new_brk < old_brk) { + target_munmap(new_brk, old_brk - new_brk); target_brk = brk_val; return target_brk; } - if (new_host_brk_page > brk_page) { - new_alloc_size = new_host_brk_page - brk_page; - mapped_addr = target_mmap(brk_page, new_alloc_size, - PROT_READ | PROT_WRITE, - MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE, - -1, 0); - } else { - new_alloc_size = 0; - mapped_addr = brk_page; - } - - if (mapped_addr == brk_page) { - /* Heap contents are initialized to zero, as for anonymous - * mapped pages. Technically the new pages are already - * initialized to zero since they *are* anonymous mapped - * pages, however we have to take care with the contents that - * come from the remaining part of the previous page: it may - * contains garbage data due to a previous heap usage (grown - * then shrunken). */ - memset(g2h_untagged(brk_page), 0, HOST_PAGE_ALIGN(brk_page) - brk_page); + mapped_addr = target_mmap(old_brk, new_brk - old_brk, + PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_ANON | MAP_PRIVATE, + -1, 0); + if (mapped_addr == old_brk) { target_brk = brk_val; - brk_page = new_host_brk_page; return target_brk; } -- cgit 1.4.1