|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +#define _GNU_SOURCE |
| 3 | +#include <errno.h> |
| 4 | +#include <sched.h> |
| 5 | +#include <signal.h> |
| 6 | +#include <stdio.h> |
| 7 | +#include <stdlib.h> |
| 8 | +#include <string.h> |
| 9 | +#include <sys/socket.h> |
| 10 | +#include <unistd.h> |
| 11 | +#include "../pidfd/pidfd.h" |
| 12 | +#include "../kselftest_harness.h" |
| 13 | + |
| 14 | +/* |
| 15 | + * Regression tests for the setns(pidfd) active reference counting bug. |
| 16 | + * |
| 17 | + * These tests are based on the reproducers that triggered the race condition |
| 18 | + * fixed by commit 1c465d0518dc ("ns: handle setns(pidfd, ...) cleanly"). |
| 19 | + * |
| 20 | + * The bug: When using setns() with a pidfd, if the target task exits between |
| 21 | + * prepare_nsset() and commit_nsset(), the namespaces would become inactive. |
| 22 | + * Then ns_ref_active_get() would increment from 0 without properly resurrecting |
| 23 | + * the owner chain, causing active reference count underflows. |
| 24 | + */ |
| 25 | + |
| 26 | +/* |
| 27 | + * Simple pidfd setns test using create_child()+unshare(). |
| 28 | + * |
| 29 | + * Without the fix, this would trigger active refcount warnings when the |
| 30 | + * parent exits after doing setns(pidfd) on a child that has already exited. |
| 31 | + */ |
| 32 | +TEST(simple_pidfd_setns) |
| 33 | +{ |
| 34 | + pid_t child_pid; |
| 35 | + int pidfd = -1; |
| 36 | + int ret; |
| 37 | + int sv[2]; |
| 38 | + char c; |
| 39 | + |
| 40 | + /* Ignore SIGCHLD for autoreap */ |
| 41 | + ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR); |
| 42 | + |
| 43 | + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0); |
| 44 | + |
| 45 | + /* Create a child process without namespaces initially */ |
| 46 | + child_pid = create_child(&pidfd, 0); |
| 47 | + ASSERT_GE(child_pid, 0); |
| 48 | + |
| 49 | + if (child_pid == 0) { |
| 50 | + close(sv[0]); |
| 51 | + |
| 52 | + if (unshare(CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUSER) < 0) { |
| 53 | + close(sv[1]); |
| 54 | + _exit(1); |
| 55 | + } |
| 56 | + |
| 57 | + /* Signal parent that namespaces are ready */ |
| 58 | + if (write_nointr(sv[1], "1", 1) < 0) { |
| 59 | + close(sv[1]); |
| 60 | + _exit(1); |
| 61 | + } |
| 62 | + |
| 63 | + close(sv[1]); |
| 64 | + _exit(0); |
| 65 | + } |
| 66 | + ASSERT_GE(pidfd, 0); |
| 67 | + EXPECT_EQ(close(sv[1]), 0); |
| 68 | + |
| 69 | + ret = read_nointr(sv[0], &c, 1); |
| 70 | + ASSERT_EQ(ret, 1); |
| 71 | + EXPECT_EQ(close(sv[0]), 0); |
| 72 | + |
| 73 | + /* Set to child's namespaces via pidfd */ |
| 74 | + ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC); |
| 75 | + TH_LOG("setns() returned %d", ret); |
| 76 | + close(pidfd); |
| 77 | +} |
| 78 | + |
| 79 | +/* |
| 80 | + * Simple pidfd setns test using create_child(). |
| 81 | + * |
| 82 | + * This variation uses create_child() with namespace flags directly. |
| 83 | + * Namespaces are created immediately at clone time. |
| 84 | + */ |
| 85 | +TEST(simple_pidfd_setns_clone) |
| 86 | +{ |
| 87 | + pid_t child_pid; |
| 88 | + int pidfd = -1; |
| 89 | + int ret; |
| 90 | + |
| 91 | + /* Ignore SIGCHLD for autoreap */ |
| 92 | + ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR); |
| 93 | + |
| 94 | + /* Create a child process with new namespaces using create_child() */ |
| 95 | + child_pid = create_child(&pidfd, CLONE_NEWUSER | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET); |
| 96 | + ASSERT_GE(child_pid, 0); |
| 97 | + |
| 98 | + if (child_pid == 0) { |
| 99 | + /* Child: sleep for a while so parent can setns to us */ |
| 100 | + sleep(2); |
| 101 | + _exit(0); |
| 102 | + } |
| 103 | + |
| 104 | + /* Parent: pidfd was already created by create_child() */ |
| 105 | + ASSERT_GE(pidfd, 0); |
| 106 | + |
| 107 | + /* Set to child's namespaces via pidfd */ |
| 108 | + ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC); |
| 109 | + close(pidfd); |
| 110 | + TH_LOG("setns() returned %d", ret); |
| 111 | +} |
| 112 | + |
| 113 | +TEST_HARNESS_MAIN |
0 commit comments