diff options
| author | Arusekk <floss@arusekk.pl> | 2025-07-11 18:48:29 +0200 |
|---|---|---|
| committer | Richard Henderson <richard.henderson@linaro.org> | 2025-09-23 16:17:59 -0700 |
| commit | c8e5aed246914ff6438839350e414a3bea5a8041 (patch) | |
| tree | 7ce1d83c81e0014ec6644bda93cef60a48e6bbbe /linux-user/syscall.c | |
| parent | 2c75137623cb60f2ac3cedfff077d00dc977b842 (diff) | |
| download | focaccia-qemu-c8e5aed246914ff6438839350e414a3bea5a8041.tar.gz focaccia-qemu-c8e5aed246914ff6438839350e414a3bea5a8041.zip | |
linux-user: Add syscall dispatch support
This commit adds support for the `prctl(PR_SET_SYSCALL_USER_DISPATCH)` function in the Linux userspace emulator. It is implemented as a fully host-independent function, by forcing a SIGSYS early during syscall handling, if the PC is outside the allowed range. Since disabled SUD is indistinguishable from enabled SUD with always-allowed region length == ~0, this encoding is used instead of introducing a new flag. Tested on [uglendix][1], will probably also apply to software like tiny-wine, rpcsx, limbo, lazypoline, vicar, sysfail and endokernel, to name a few. [1]: https://sr.ht/~arusekk/uglendix Signed-off-by: Arusekk <floss@arusekk.pl> Message-ID: <20250711225226.14652-1-floss@arusekk.pl> [rth: Split out is_vdso_sigreturn region matching and other minor tweaks.] Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'linux-user/syscall.c')
| -rw-r--r-- | linux-user/syscall.c | 76 |
1 files changed, 75 insertions, 1 deletions
diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 91360a072c..9098cdb9fa 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6344,6 +6344,10 @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr) #endif #ifndef PR_SET_SYSCALL_USER_DISPATCH # define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif #ifndef PR_SME_SET_VL # define PR_SME_SET_VL 63 @@ -6398,6 +6402,36 @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2) #define do_prctl_sme_set_vl do_prctl_inval1 #endif +static abi_long do_prctl_syscall_user_dispatch(CPUArchState *env, + abi_ulong arg2, abi_ulong arg3, + abi_ulong arg4, abi_ulong arg5) +{ + CPUState *cpu = env_cpu(env); + TaskState *ts = get_task_state(cpu); + + switch (arg2) { + case PR_SYS_DISPATCH_OFF: + if (arg3 || arg4 || arg5) { + return -TARGET_EINVAL; + } + ts->sys_dispatch_len = -1; + return 0; + case PR_SYS_DISPATCH_ON: + if (arg3 && arg3 + arg4 <= arg3) { + return -TARGET_EINVAL; + } + if (arg5 && !access_ok(cpu, VERIFY_READ, arg5, 1)) { + return -TARGET_EFAULT; + } + ts->sys_dispatch = arg3; + ts->sys_dispatch_len = arg4; + ts->sys_dispatch_selector = arg5; + return 0; + default: + return -TARGET_EINVAL; + } +} + static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5) { @@ -6473,6 +6507,9 @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, case PR_SET_UNALIGN: return do_prctl_set_unalign(env, arg2); + case PR_SET_SYSCALL_USER_DISPATCH: + return do_prctl_syscall_user_dispatch(env, arg2, arg3, arg4, arg5); + case PR_CAP_AMBIENT: case PR_CAPBSET_READ: case PR_CAPBSET_DROP: @@ -6527,7 +6564,6 @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, case PR_SET_MM: case PR_GET_SECCOMP: case PR_SET_SECCOMP: - case PR_SET_SYSCALL_USER_DISPATCH: case PR_GET_THP_DISABLE: case PR_SET_THP_DISABLE: case PR_GET_TSC: @@ -13897,12 +13933,46 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, return ret; } +static bool sys_dispatch(CPUState *cpu, TaskState *ts) +{ + abi_ptr pc; + + if (likely(ts->sys_dispatch_len == -1)) { + return false; + } + + pc = cpu->cc->get_pc(cpu); + if (likely(pc - ts->sys_dispatch < ts->sys_dispatch_len)) { + return false; + } + if (unlikely(is_vdso_sigreturn(pc))) { + return false; + } + if (likely(ts->sys_dispatch_selector)) { + uint8_t sb; + if (get_user_u8(sb, ts->sys_dispatch_selector)) { + force_sig(TARGET_SIGSEGV); + return true; + } + if (likely(sb == SYSCALL_DISPATCH_FILTER_ALLOW)) { + return false; + } + if (unlikely(sb != SYSCALL_DISPATCH_FILTER_BLOCK)) { + force_sig(TARGET_SIGSYS); + return true; + } + } + force_sig_fault(TARGET_SIGSYS, TARGET_SYS_USER_DISPATCH, pc); + return true; +} + abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1, abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6, abi_long arg7, abi_long arg8) { CPUState *cpu = env_cpu(cpu_env); + TaskState *ts = get_task_state(cpu); abi_long ret; #ifdef DEBUG_ERESTARTSYS @@ -13919,6 +13989,10 @@ abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1, } #endif + if (sys_dispatch(cpu, ts)) { + return -QEMU_ESIGRETURN; + } + record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); |