From 9651cead2f1bb34b9b72f9c2c5dc81baea2b082e Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Tue, 1 Oct 2024 17:14:53 +0200 Subject: linux-user: add openat2 support in linux-user This commit adds support for the `openat2()` syscall in the `linux-user` userspace emulator. It is implemented by extracting a new helper `maybe_do_fake_open()` out of the exiting `do_guest_openat()` and share that with the new `do_guest_openat2()`. Unfortunately we cannot just make do_guest_openat2() a superset of do_guest_openat() because the openat2() syscall is stricter with the argument checking and will return an error for invalid flags or mode combinations (which open()/openat() will ignore). The implementation is similar to SYSCALL_DEFINE(openat2), i.e. a new `copy_struct_from_user()` is used that works the same as the kernels version to support backwards-compatibility for struct syscall argument. Instead of including openat2.h we create a copy of `open_how` as `open_how_ver0` to ensure that if the structure grows we can log a LOG_UNIMP warning. Note that in this commit using openat2() for a "faked" file in /proc will honor the "resolve" flags for RESOLVE_NO_{MAGIC,SYM}LINKS for path based access to /proc/self/exe (which is the only magic link we support for faked files). Note it will not catch special access via e.g. dirfd. This is not great but it seems similar to the exiting behavior when openat() is called with a dirfd to "/proc". Here too the fake file lookup may not catch the special file because no dirfd is used to determine if the path is in /proc. Signed-off-by: Michael Vogt Buglink: https://github.com/osbuild/bootc-image-builder/issues/619 Reviewed-by: Laurent Vivier Message-ID: <1c2c8c9db3731ed4c6fd9b10c63637c3e4caf8f5.1727795334.git.mvogt@redhat.com> Signed-off-by: Richard Henderson --- linux-user/syscall.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index a666986189..2febc3bc3f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -602,6 +602,34 @@ static int check_zeroed_user(abi_long addr, size_t ksize, size_t usize) return 1; } +/* + * Copies a target struct to a host struct, in a way that guarantees + * backwards-compatibility for struct syscall arguments. + * + * Similar to kernels uaccess.h:copy_struct_from_user() + */ +static int +copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize) +{ + size_t size = MIN(ksize, usize); + size_t rest = MAX(ksize, usize) - size; + + /* Deal with trailing bytes. */ + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + int ret = check_zeroed_user(src, ksize, usize); + if (ret <= 0) { + return ret ?: -TARGET_E2BIG; + } + } + /* Copy the interoperable parts of the struct. */ + if (copy_from_user(dst, src, size)) { + return -TARGET_EFAULT; + } + return 0; +} + #define safe_syscall0(type, name) \ static type safe_##name(void) \ { \ @@ -653,6 +681,15 @@ safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count) safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ int, flags, mode_t, mode) + +struct open_how_ver0 { + __u64 flags; + __u64 mode; + __u64 resolve; +}; +safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \ + const struct open_how_ver0 *, how, size_t, size) + #if defined(TARGET_NR_wait4) || defined(TARGET_NR_waitpid) safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ struct rusage *, rusage) @@ -8332,8 +8369,9 @@ static int open_net_route(CPUArchState *cpu_env, int fd) } #endif -int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, - int flags, mode_t mode, bool safe) +static int maybe_do_fake_open(CPUArchState *cpu_env, int dirfd, + const char *fname, int flags, mode_t mode, + int openat2_resolve, bool safe) { g_autofree char *proc_name = NULL; const char *pathname; @@ -8370,6 +8408,12 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, } if (is_proc_myself(pathname, "exe")) { + /* Honor openat2 resolve flags */ + if ((openat2_resolve & RESOLVE_NO_MAGICLINKS) || + (openat2_resolve & RESOLVE_NO_SYMLINKS)) { + errno = ELOOP; + return -1; + } if (safe) { return safe_openat(dirfd, exec_path, flags, mode); } else { @@ -8416,6 +8460,17 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, return fd; } + return -2; +} + +int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, + int flags, mode_t mode, bool safe) +{ + int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, flags, mode, 0, safe); + if (fd > -2) { + return fd; + } + if (safe) { return safe_openat(dirfd, path(pathname), flags, mode); } else { @@ -8423,6 +8478,49 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, } } + +static int do_openat2(CPUArchState *cpu_env, abi_long dirfd, + abi_ptr guest_pathname, abi_ptr guest_open_how, + abi_ulong guest_size) +{ + struct open_how_ver0 how = {0}; + char *pathname; + int ret; + + if (guest_size < sizeof(struct target_open_how_ver0)) { + return -TARGET_EINVAL; + } + ret = copy_struct_from_user(&how, sizeof(how), guest_open_how, guest_size); + if (ret) { + if (ret == -TARGET_E2BIG) { + qemu_log_mask(LOG_UNIMP, + "Unimplemented openat2 open_how size: " + TARGET_ABI_FMT_lu "\n", guest_size); + } + return ret; + } + pathname = lock_user_string(guest_pathname); + if (!pathname) { + return -TARGET_EFAULT; + } + + how.flags = target_to_host_bitmask(tswap64(how.flags), fcntl_flags_tbl); + how.mode = tswap64(how.mode); + how.resolve = tswap64(how.resolve); + int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, how.flags, how.mode, + how.resolve, true); + if (fd > -2) { + ret = get_errno(fd); + } else { + ret = get_errno(safe_openat2(dirfd, pathname, &how, + sizeof(struct open_how_ver0))); + } + + fd_trans_unregister(ret); + unlock_user(pathname, guest_pathname, 0); + return ret; +} + ssize_t do_guest_readlink(const char *pathname, char *buf, size_t bufsiz) { ssize_t ret; @@ -9195,6 +9293,9 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, fd_trans_unregister(ret); unlock_user(p, arg2, 0); return ret; + case TARGET_NR_openat2: + ret = do_openat2(cpu_env, arg1, arg2, arg3, arg4); + return ret; #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) case TARGET_NR_name_to_handle_at: ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5); -- cgit 1.4.1 From 9729930344687c7077531ab07cb9dc275795b413 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Tue, 1 Oct 2024 17:14:54 +0200 Subject: linux-user: add strace support for openat2 This commit adds support for the `openat2()` to `QEMU_STRACE`. It will use the `openat2.h` header if available to create user readable flags for the `resolve` argument but does not require the header otherwise. It also makes `copy_struct_from_user()` available via `qemu.h` and `open_how_ver0` via `syscall_defs.h` so that strace.c can use them. Signed-off-by: Michael Vogt Reviewed-by: Laurent Vivier Message-ID: [rth: Add braces around the expanded how structure, like strace(3)] Signed-off-by: Richard Henderson --- linux-user/qemu.h | 9 +++++++++ linux-user/strace.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ linux-user/strace.list | 3 +++ linux-user/syscall.c | 8 +------- linux-user/syscall_defs.h | 5 +++++ meson.build | 1 + 6 files changed, 66 insertions(+), 7 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 2e90a97175..98ad848ab2 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -313,6 +313,15 @@ static inline bool access_ok(CPUState *cpu, int type, int copy_from_user(void *hptr, abi_ulong gaddr, ssize_t len); int copy_to_user(abi_ulong gaddr, void *hptr, ssize_t len); +/* + * copy_struct_from_user() copies a target struct to a host struct, in + * a way that guarantees backwards-compatibility for struct syscall + * arguments. + * + * Similar to kernels uaccess.h:copy_struct_from_user() + */ +int copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize); + /* Functions for accessing guest memory. The tget and tput functions read/write single values, byteswapping as necessary. The lock_user function gets a pointer to a contiguous area of guest memory, but does not perform diff --git a/linux-user/strace.c b/linux-user/strace.c index b4d1098170..d3cdd09dc1 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -13,6 +13,9 @@ #include #include #include +#ifdef HAVE_OPENAT2_H +#include +#endif #include #include "qemu.h" #include "user-internals.h" @@ -1063,6 +1066,18 @@ UNUSED static const struct flags open_flags[] = { FLAG_END, }; +UNUSED static const struct flags openat2_resolve_flags[] = { +#ifdef HAVE_OPENAT2_H + FLAG_GENERIC(RESOLVE_NO_XDEV), + FLAG_GENERIC(RESOLVE_NO_MAGICLINKS), + FLAG_GENERIC(RESOLVE_NO_SYMLINKS), + FLAG_GENERIC(RESOLVE_BENEATH), + FLAG_GENERIC(RESOLVE_IN_ROOT), + FLAG_GENERIC(RESOLVE_CACHED), +#endif + FLAG_END, +}; + UNUSED static const struct flags mount_flags[] = { #ifdef MS_BIND FLAG_GENERIC(MS_BIND), @@ -3483,6 +3498,38 @@ print_openat(CPUArchState *cpu_env, const struct syscallname *name, } #endif +#ifdef TARGET_NR_openat2 +static void +print_openat2(CPUArchState *cpu_env, const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + struct open_how_ver0 how; + + print_syscall_prologue(name); + print_at_dirfd(arg0, 0); + print_string(arg1, 0); + + if ((abi_ulong)arg3 >= sizeof(struct target_open_how_ver0) && + copy_struct_from_user(&how, sizeof(how), arg2, arg3) == 0) { + how.flags = tswap64(how.flags); + how.mode = tswap64(how.mode); + how.resolve = tswap64(how.resolve); + qemu_log("{"); + print_open_flags(how.flags, 0); + if (how.flags & TARGET_O_CREAT) { + print_file_mode(how.mode, 0); + } + print_flags(openat2_resolve_flags, how.resolve, 1); + qemu_log("},"); + } else { + print_pointer(arg2, 0); + } + print_raw_param(TARGET_ABI_FMT_lu, arg3, 1); + print_syscall_epilogue(name); +} +#endif + #ifdef TARGET_NR_pidfd_send_signal static void print_pidfd_send_signal(CPUArchState *cpu_env, const struct syscallname *name, diff --git a/linux-user/strace.list b/linux-user/strace.list index dfd4237d14..ef658224fc 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -715,6 +715,9 @@ #ifdef TARGET_NR_openat { TARGET_NR_openat, "openat" , NULL, print_openat, NULL }, #endif +#ifdef TARGET_NR_openat2 +{ TARGET_NR_openat2, "openat2" , NULL, print_openat2, NULL }, +#endif #ifdef TARGET_NR_osf_adjtime { TARGET_NR_osf_adjtime, "osf_adjtime" , NULL, NULL, NULL }, #endif diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 2febc3bc3f..1354e75694 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -608,8 +608,7 @@ static int check_zeroed_user(abi_long addr, size_t ksize, size_t usize) * * Similar to kernels uaccess.h:copy_struct_from_user() */ -static int -copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize) +int copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize) { size_t size = MIN(ksize, usize); size_t rest = MAX(ksize, usize) - size; @@ -682,11 +681,6 @@ safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ int, flags, mode_t, mode) -struct open_how_ver0 { - __u64 flags; - __u64 mode; - __u64 resolve; -}; safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \ const struct open_how_ver0 *, how, size_t, size) diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index de5091c977..0ade83745e 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -2749,6 +2749,11 @@ struct target_sched_param { }; /* from kernel's include/uapi/linux/openat2.h */ +struct open_how_ver0 { + __u64 flags; + __u64 mode; + __u64 resolve; +}; struct target_open_how_ver0 { abi_ullong flags; abi_ullong mode; diff --git a/meson.build b/meson.build index 33954b3eba..4ea1984fc5 100644 --- a/meson.build +++ b/meson.build @@ -2481,6 +2481,7 @@ config_host_data.set('CONFIG_LINUX_MAGIC_H', cc.has_header('linux/magic.h')) config_host_data.set('CONFIG_VALGRIND_H', cc.has_header('valgrind/valgrind.h')) config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h')) config_host_data.set('HAVE_DRM_H', cc.has_header('libdrm/drm.h')) +config_host_data.set('HAVE_OPENAT2_H', cc.has_header('linux/openat2.h')) config_host_data.set('HAVE_PTY_H', cc.has_header('pty.h')) config_host_data.set('HAVE_SYS_DISK_H', cc.has_header('sys/disk.h')) config_host_data.set('HAVE_SYS_IOCCOM_H', cc.has_header('sys/ioccom.h')) -- cgit 1.4.1