From 6f2231e9b0931e1998d9ed0c509adf7aedc02db2 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Wed, 22 Aug 2018 19:02:47 +0200 Subject: seccomp: use SIGSYS signal instead of killing the thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The seccomp action SCMP_ACT_KILL results in immediate termination of the thread that made the bad system call. However, qemu being multi-threaded, it keeps running. There is no easy way for parent process / management layer (libvirt) to know about that situation. Instead, the default SIGSYS handler when invoked with SCMP_ACT_TRAP will terminate the program and core dump. This may not be the most secure solution, but probably better than just killing the offending thread. SCMP_ACT_KILL_PROCESS has been added in Linux 4.14 to improve the situation, which I propose to use by default if available in the next patch. Related to: https://bugzilla.redhat.com/show_bug.cgi?id=1594456 Signed-off-by: Marc-André Lureau Reviewed-by: Daniel P. Berrangé Acked-by: Eduardo Otubo --- qemu-seccomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'qemu-seccomp.c') diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 9cd8eb9499..b117a92559 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -125,7 +125,7 @@ static int seccomp_start(uint32_t seccomp_opts) continue; } - rc = seccomp_rule_add_array(ctx, SCMP_ACT_KILL, blacklist[i].num, + rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, blacklist[i].narg, blacklist[i].arg_cmp); if (rc < 0) { goto seccomp_return; -- cgit 1.4.1 From bda08a5764d470f101fa38635d30b41179a313e1 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Wed, 22 Aug 2018 19:02:48 +0200 Subject: seccomp: prefer SCMP_ACT_KILL_PROCESS if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The upcoming libseccomp release should have SCMP_ACT_KILL_PROCESS action (https://github.com/seccomp/libseccomp/issues/96). SCMP_ACT_KILL_PROCESS is preferable to immediately terminate the offending process, rather than having the SIGSYS handler running. Use SECCOMP_GET_ACTION_AVAIL to check availability of kernel support, as libseccomp will fallback on SCMP_ACT_KILL otherwise, and we still prefer SCMP_ACT_TRAP. Signed-off-by: Marc-André Lureau Reviewed-by: Daniel P. Berrangé Acked-by: Eduardo Otubo --- qemu-seccomp.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'qemu-seccomp.c') diff --git a/qemu-seccomp.c b/qemu-seccomp.c index b117a92559..f0c833f3ca 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -20,6 +20,7 @@ #include #include #include "sysemu/seccomp.h" +#include /* For some architectures (notably ARM) cacheflush is not supported until * libseccomp 2.2.3, but configure enforces that we are using a more recent @@ -107,12 +108,40 @@ static const struct QemuSeccompSyscall blacklist[] = { { SCMP_SYS(sched_get_priority_min), QEMU_SECCOMP_SET_RESOURCECTL }, }; +static inline __attribute__((unused)) int +qemu_seccomp(unsigned int operation, unsigned int flags, void *args) +{ +#ifdef __NR_seccomp + return syscall(__NR_seccomp, operation, flags, args); +#else + errno = ENOSYS; + return -1; +#endif +} + +static uint32_t qemu_seccomp_get_kill_action(void) +{ +#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ + defined(SECCOMP_RET_KILL_PROCESS) + { + uint32_t action = SECCOMP_RET_KILL_PROCESS; + + if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { + return SCMP_ACT_KILL_PROCESS; + } + } +#endif + + return SCMP_ACT_TRAP; +} + static int seccomp_start(uint32_t seccomp_opts) { int rc = 0; unsigned int i = 0; scmp_filter_ctx ctx; + uint32_t action = qemu_seccomp_get_kill_action(); ctx = seccomp_init(SCMP_ACT_ALLOW); if (ctx == NULL) { @@ -125,7 +154,7 @@ static int seccomp_start(uint32_t seccomp_opts) continue; } - rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, + rc = seccomp_rule_add_array(ctx, action, blacklist[i].num, blacklist[i].narg, blacklist[i].arg_cmp); if (rc < 0) { goto seccomp_return; -- cgit 1.4.1 From 70dfabeaa79ba4d7a3b699abe1a047c8012db114 Mon Sep 17 00:00:00 2001 From: Marc-André Lureau Date: Wed, 22 Aug 2018 19:02:50 +0200 Subject: seccomp: set the seccomp filter to all threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using "-seccomp on", the seccomp policy is only applied to the main thread, the vcpu worker thread and other worker threads created after seccomp policy is applied; the seccomp policy is not applied to e.g. the RCU thread because it is created before the seccomp policy is applied and SECCOMP_FILTER_FLAG_TSYNC isn't used. This can be verified with for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done Seccomp: 2 Seccomp: 0 Seccomp: 0 Seccomp: 2 Seccomp: 2 Seccomp: 2 Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy on all threads. libseccomp requirement was bumped to 2.2.0 in previous patch. libseccomp should fail to set the filter if it can't honour SCMP_FLTATR_CTL_TSYNC (untested), and thus -sandbox will now fail on kernel < 3.17. Signed-off-by: Marc-André Lureau Acked-by: Eduardo Otubo --- qemu-seccomp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'qemu-seccomp.c') diff --git a/qemu-seccomp.c b/qemu-seccomp.c index f0c833f3ca..4729eb107f 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -149,6 +149,11 @@ static int seccomp_start(uint32_t seccomp_opts) goto seccomp_return; } + rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); + if (rc != 0) { + goto seccomp_return; + } + for (i = 0; i < ARRAY_SIZE(blacklist); i++) { if (!(seccomp_opts & blacklist[i].set)) { continue; -- cgit 1.4.1