Skip to content

Commit b0c9bfb

Browse files
mixikees
authored andcommitted
selftests/seccomp: Add a test for the WAIT_KILLABLE_RECV fast reply race
If WAIT_KILLABLE_RECV was specified, and an event is received, the tracee's syscall is not supposed to be interruptible. This was not properly ensured if the reply was sent too fast, and an interrupting signal was received before the reply was processed on the tracee side. Add a test for this, that consists of: - a tracee with a timer that keeps sending it signals while repeatedly running a traced syscall in a loop, - a tracer that repeatedly handles all syscalls from the tracee in a loop, and - a shared pipe between both, on which the tracee sends one byte per syscall attempted and the tracer reads one byte per syscall handled. If the syscall for the tracee is restarted after the tracer received the event for it due to this bug, the tracee will not have sent a second token on the pipe, which the tracer will notice and fail the test. The tests also uses SECCOMP_IOCTL_NOTIF_ADDFD with SECCOMP_ADDFD_FLAG_SEND for the reply, as the fix for the bug has an additional code path change for handling addfd, which would not be exercised by a simple SECCOMP_IOCTL_NOTIF_SEND, and it is possible to fix the bug while leaving the same race intact for the addfd case. This test is not guaranteed to reproduce the bug on every run, but the parameters (signal frequency and number of repeated syscalls) have been chosen so that on my machine this test: - takes ~0.8s in the good case (+1s in the failure case), and - detects the bug in 999 of 1000 runs. Signed-off-by: Johannes Nixdorf <johannes@nixdorf.dev> Link: https://lore.kernel.org/r/20250725-seccomp-races-v2-2-cf8b9d139596@nixdorf.dev Signed-off-by: Kees Cook <kees@kernel.org>
1 parent cce436a commit b0c9bfb

1 file changed

Lines changed: 131 additions & 0 deletions

File tree

tools/testing/selftests/seccomp/seccomp_bpf.c

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/filter.h>
2525
#include <sys/prctl.h>
2626
#include <sys/ptrace.h>
27+
#include <sys/time.h>
2728
#include <sys/user.h>
2829
#include <linux/prctl.h>
2930
#include <linux/ptrace.h>
@@ -3547,6 +3548,10 @@ static void signal_handler(int signal)
35473548
perror("write from signal");
35483549
}
35493550

3551+
static void signal_handler_nop(int signal)
3552+
{
3553+
}
3554+
35503555
TEST(user_notification_signal)
35513556
{
35523557
pid_t pid;
@@ -4819,6 +4824,132 @@ TEST(user_notification_wait_killable_fatal)
48194824
EXPECT_EQ(SIGTERM, WTERMSIG(status));
48204825
}
48214826

4827+
/* Ensure signals after the reply do not interrupt */
4828+
TEST(user_notification_wait_killable_after_reply)
4829+
{
4830+
int i, max_iter = 100000;
4831+
int listener, status;
4832+
int pipe_fds[2];
4833+
pid_t pid;
4834+
long ret;
4835+
4836+
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
4837+
ASSERT_EQ(0, ret)
4838+
{
4839+
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
4840+
}
4841+
4842+
listener = user_notif_syscall(
4843+
__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER |
4844+
SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
4845+
ASSERT_GE(listener, 0);
4846+
4847+
/*
4848+
* Used to count invocations. One token is transferred from the child
4849+
* to the parent per syscall invocation, the parent tries to take
4850+
* one token per successful RECV. If the syscall is restarted after
4851+
* RECV the parent will try to get two tokens while the child only
4852+
* provided one.
4853+
*/
4854+
ASSERT_EQ(pipe(pipe_fds), 0);
4855+
4856+
pid = fork();
4857+
ASSERT_GE(pid, 0);
4858+
4859+
if (pid == 0) {
4860+
struct sigaction new_action = {
4861+
.sa_handler = signal_handler_nop,
4862+
.sa_flags = SA_RESTART,
4863+
};
4864+
struct itimerval timer = {
4865+
.it_value = { .tv_usec = 1000 },
4866+
.it_interval = { .tv_usec = 1000 },
4867+
};
4868+
char c = 'a';
4869+
4870+
close(pipe_fds[0]);
4871+
4872+
/* Setup the sigaction with SA_RESTART */
4873+
if (sigaction(SIGALRM, &new_action, NULL)) {
4874+
perror("sigaction");
4875+
exit(1);
4876+
}
4877+
4878+
/*
4879+
* Kill with SIGALRM repeatedly, to try to hit the race when
4880+
* handling the syscall.
4881+
*/
4882+
if (setitimer(ITIMER_REAL, &timer, NULL) < 0)
4883+
perror("setitimer");
4884+
4885+
for (i = 0; i < max_iter; ++i) {
4886+
int fd;
4887+
4888+
/* Send one token per iteration to catch repeats. */
4889+
if (write(pipe_fds[1], &c, sizeof(c)) != 1) {
4890+
perror("write");
4891+
exit(1);
4892+
}
4893+
4894+
fd = syscall(__NR_dup, 0);
4895+
if (fd < 0) {
4896+
perror("dup");
4897+
exit(1);
4898+
}
4899+
close(fd);
4900+
}
4901+
4902+
exit(0);
4903+
}
4904+
4905+
close(pipe_fds[1]);
4906+
4907+
for (i = 0; i < max_iter; ++i) {
4908+
struct seccomp_notif req = {};
4909+
struct seccomp_notif_addfd addfd = {};
4910+
struct pollfd pfd = {
4911+
.fd = pipe_fds[0],
4912+
.events = POLLIN,
4913+
};
4914+
char c;
4915+
4916+
/*
4917+
* Try to receive one token. If it failed, one child syscall
4918+
* was restarted after RECV and needed to be handled twice.
4919+
*/
4920+
ASSERT_EQ(poll(&pfd, 1, 1000), 1)
4921+
kill(pid, SIGKILL);
4922+
4923+
ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1)
4924+
kill(pid, SIGKILL);
4925+
4926+
/*
4927+
* Get the notification, reply to it as fast as possible to test
4928+
* whether the child wrongly skips going into the non-preemptible
4929+
* (TASK_KILLABLE) state.
4930+
*/
4931+
do
4932+
ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
4933+
while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */
4934+
ASSERT_EQ(ret, 0)
4935+
kill(pid, SIGKILL);
4936+
4937+
addfd.id = req.id;
4938+
addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4939+
addfd.srcfd = 0;
4940+
ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0)
4941+
kill(pid, SIGKILL);
4942+
}
4943+
4944+
/*
4945+
* Wait for the process to exit, and make sure the process terminated
4946+
* with a zero exit code..
4947+
*/
4948+
EXPECT_EQ(waitpid(pid, &status, 0), pid);
4949+
EXPECT_EQ(true, WIFEXITED(status));
4950+
EXPECT_EQ(0, WEXITSTATUS(status));
4951+
}
4952+
48224953
struct tsync_vs_thread_leader_args {
48234954
pthread_t leader;
48244955
};

0 commit comments

Comments
 (0)