Skip to content

Commit ae901e5

Browse files
committed
Merge patch series "ns: fixes for namespace iteration and active reference counting"
Christian Brauner <brauner@kernel.org> says: * Make sure to initialize the active reference count for the initial network namespace and prevent __ns_common_init() from returning too early. * Make sure that passive reference counts are dropped outside of rcu read locks as some namespaces such as the mount namespace do in fact sleep when putting the last reference. * The setns() system call supports: (1) namespace file descriptors (nsfd) (2) process file descriptors (pidfd) When using nsfds the namespaces will remain active because they are pinned by the vfs. However, when pidfds are used things are more complicated. When the target task exits and passes through exit_nsproxy_namespaces() or is reaped and thus also passes through exit_cred_namespaces() after the setns()'ing task has called prepare_nsset() but before the active reference count of the set of namespaces it wants to setns() to might have been dropped already: P1 P2 pid_p1 = clone(CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWNS) pidfd = pidfd_open(pid_p1) setns(pidfd, CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWNS) prepare_nsset() exit(0) // ns->__ns_active_ref == 1 // parent_ns->__ns_active_ref == 1 -> exit_nsproxy_namespaces() -> exit_cred_namespaces() // ns_active_ref_put() will also put // the reference on the owner of the // namespace. If the only reason the // owning namespace was alive was // because it was a parent of @ns // it's active reference count now goes // to zero... -------------------------------- // | // ns->__ns_active_ref == 0 | // parent_ns->__ns_active_ref == 0 | | commit_nsset() -----------------> // If setns() // now manages to install the namespaces // it will call ns_active_ref_get() // on them thus bumping the active reference // count from zero again but without also // taking the required reference on the owner. // Thus we get: // // ns->__ns_active_ref == 1 // parent_ns->__ns_active_ref == 0 When later someone does ns_active_ref_put() on @ns it will underflow parent_ns->__ns_active_ref leading to a splat from our asserts thinking there are still active references when in fact the counter just underflowed. So resurrect the ownership chain if necessary as well. If the caller succeeded to grab passive references to the set of namespaces the setns() should simply succeed even if the target task exists or gets reaped in the meantime. The race is rare and can only be triggered when using pidfs to setns() to namespaces. Also note that active reference on initial namespaces are nops. Since we now always handle parent references directly we can drop ns_ref_active_get_owner() when adding a namespace to a namespace tree. This is now all handled uniformly in the places where the new namespaces actually become active. * patches from https://patch.msgid.link/20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org: selftests/namespaces: test for efault selftests/namespaces: add active reference count regression test ns: add asserts for active refcount underflow ns: handle setns(pidfd, ...) cleanly ns: return EFAULT on put_user() error ns: make sure reference are dropped outside of rcu lock ns: don't increment or decrement initial namespaces ns: don't skip active reference count initialization Link: https://patch.msgid.link/20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents 8ebfb98 + 07d7ad4 commit ae901e5

8 files changed

Lines changed: 724 additions & 74 deletions

File tree

fs/nsfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ static int nsfs_init_inode(struct inode *inode, void *data)
430430
* ioctl on such a socket will resurrect the relevant namespace
431431
* subtree.
432432
*/
433-
__ns_ref_active_resurrect(ns);
433+
__ns_ref_active_get(ns);
434434
return 0;
435435
}
436436

include/linux/ns_common.h

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,12 @@ static __always_inline bool is_initial_namespace(struct ns_common *ns)
141141
IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1));
142142
}
143143

144+
static __always_inline bool is_ns_init_id(const struct ns_common *ns)
145+
{
146+
VFS_WARN_ON_ONCE(ns->ns_id == 0);
147+
return ns->ns_id <= NS_LAST_INIT_ID;
148+
}
149+
144150
#define to_ns_common(__ns) \
145151
_Generic((__ns), \
146152
struct cgroup_namespace *: &(__ns)->ns, \
@@ -281,54 +287,25 @@ static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns
281287
#define ns_ref_active_read(__ns) \
282288
((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0)
283289

284-
void __ns_ref_active_get_owner(struct ns_common *ns);
285-
286-
static __always_inline void __ns_ref_active_get(struct ns_common *ns)
287-
{
288-
WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active));
289-
VFS_WARN_ON_ONCE(is_initial_namespace(ns) && __ns_ref_active_read(ns) <= 0);
290-
}
291-
#define ns_ref_active_get(__ns) \
292-
do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0)
293-
294-
static __always_inline bool __ns_ref_active_get_not_zero(struct ns_common *ns)
295-
{
296-
if (atomic_inc_not_zero(&ns->__ns_ref_active)) {
297-
VFS_WARN_ON_ONCE(!__ns_ref_read(ns));
298-
return true;
299-
}
300-
return false;
301-
}
302-
303-
#define ns_ref_active_get_owner(__ns) \
304-
do { if (__ns) __ns_ref_active_get_owner(to_ns_common(__ns)); } while (0)
305-
306-
void __ns_ref_active_put_owner(struct ns_common *ns);
290+
void __ns_ref_active_put(struct ns_common *ns);
307291

308-
static __always_inline void __ns_ref_active_put(struct ns_common *ns)
309-
{
310-
if (atomic_dec_and_test(&ns->__ns_ref_active)) {
311-
VFS_WARN_ON_ONCE(is_initial_namespace(ns));
312-
VFS_WARN_ON_ONCE(!__ns_ref_read(ns));
313-
__ns_ref_active_put_owner(ns);
314-
}
315-
}
316292
#define ns_ref_active_put(__ns) \
317293
do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0)
318294

319295
static __always_inline struct ns_common *__must_check ns_get_unless_inactive(struct ns_common *ns)
320296
{
321-
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) && !__ns_ref_read(ns));
322-
if (!__ns_ref_active_read(ns))
297+
if (!__ns_ref_active_read(ns)) {
298+
VFS_WARN_ON_ONCE(is_ns_init_id(ns));
323299
return NULL;
300+
}
324301
if (!__ns_ref_get(ns))
325302
return NULL;
326303
return ns;
327304
}
328305

329-
void __ns_ref_active_resurrect(struct ns_common *ns);
306+
void __ns_ref_active_get(struct ns_common *ns);
330307

331-
#define ns_ref_active_resurrect(__ns) \
332-
do { if (__ns) __ns_ref_active_resurrect(to_ns_common(__ns)); } while (0)
308+
#define ns_ref_active_get(__ns) \
309+
do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0)
333310

334311
#endif

kernel/nscommon.c

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ static void ns_debug(struct ns_common *ns, const struct proc_ns_operations *ops)
5454

5555
int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum)
5656
{
57-
int ret;
57+
int ret = 0;
5858

5959
refcount_set(&ns->__ns_ref, 1);
6060
ns->stashed = NULL;
@@ -74,11 +74,10 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_ope
7474
ns_debug(ns, ops);
7575
#endif
7676

77-
if (inum) {
77+
if (inum)
7878
ns->inum = inum;
79-
return 0;
80-
}
81-
ret = proc_alloc_inum(&ns->inum);
79+
else
80+
ret = proc_alloc_inum(&ns->inum);
8281
if (ret)
8382
return ret;
8483
/*
@@ -115,13 +114,6 @@ struct ns_common *__must_check ns_owner(struct ns_common *ns)
115114
return to_ns_common(owner);
116115
}
117116

118-
void __ns_ref_active_get_owner(struct ns_common *ns)
119-
{
120-
ns = ns_owner(ns);
121-
if (ns)
122-
WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active));
123-
}
124-
125117
/*
126118
* The active reference count works by having each namespace that gets
127119
* created take a single active reference on its owning user namespace.
@@ -172,14 +164,29 @@ void __ns_ref_active_get_owner(struct ns_common *ns)
172164
* The iteration stops once we reach a namespace that still has active
173165
* references.
174166
*/
175-
void __ns_ref_active_put_owner(struct ns_common *ns)
167+
void __ns_ref_active_put(struct ns_common *ns)
176168
{
169+
/* Initial namespaces are always active. */
170+
if (is_ns_init_id(ns))
171+
return;
172+
173+
if (!atomic_dec_and_test(&ns->__ns_ref_active)) {
174+
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) < 0);
175+
return;
176+
}
177+
178+
VFS_WARN_ON_ONCE(is_ns_init_id(ns));
179+
VFS_WARN_ON_ONCE(!__ns_ref_read(ns));
180+
177181
for (;;) {
178182
ns = ns_owner(ns);
179183
if (!ns)
180184
return;
181-
if (!atomic_dec_and_test(&ns->__ns_ref_active))
185+
VFS_WARN_ON_ONCE(is_ns_init_id(ns));
186+
if (!atomic_dec_and_test(&ns->__ns_ref_active)) {
187+
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) < 0);
182188
return;
189+
}
183190
}
184191
}
185192

@@ -275,10 +282,18 @@ void __ns_ref_active_put_owner(struct ns_common *ns)
275282
* it also needs to take another reference on its owning user namespace
276283
* and so on.
277284
*/
278-
void __ns_ref_active_resurrect(struct ns_common *ns)
285+
void __ns_ref_active_get(struct ns_common *ns)
279286
{
287+
int prev;
288+
289+
/* Initial namespaces are always active. */
290+
if (is_ns_init_id(ns))
291+
return;
292+
280293
/* If we didn't resurrect the namespace we're done. */
281-
if (atomic_fetch_add(1, &ns->__ns_ref_active))
294+
prev = atomic_fetch_add(1, &ns->__ns_ref_active);
295+
VFS_WARN_ON_ONCE(prev < 0);
296+
if (likely(prev))
282297
return;
283298

284299
/*
@@ -290,7 +305,10 @@ void __ns_ref_active_resurrect(struct ns_common *ns)
290305
if (!ns)
291306
return;
292307

293-
if (atomic_fetch_add(1, &ns->__ns_ref_active))
308+
VFS_WARN_ON_ONCE(is_ns_init_id(ns));
309+
prev = atomic_fetch_add(1, &ns->__ns_ref_active);
310+
VFS_WARN_ON_ONCE(prev < 0);
311+
if (likely(prev))
294312
return;
295313
}
296314
}

kernel/nstree.c

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,6 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
173173
write_sequnlock(&ns_tree_lock);
174174

175175
VFS_WARN_ON_ONCE(node);
176-
177-
/*
178-
* Take an active reference on the owner namespace. This ensures
179-
* that the owner remains visible while any of its child namespaces
180-
* are active. For init namespaces this is a no-op as ns_owner()
181-
* returns NULL for namespaces owned by init_user_ns.
182-
*/
183-
__ns_ref_active_get_owner(ns);
184176
}
185177

186178
void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree)
@@ -505,13 +497,13 @@ static inline bool __must_check may_list_ns(const struct klistns *kls,
505497
return false;
506498
}
507499

508-
static void __ns_put(struct ns_common *ns)
500+
static inline void ns_put(struct ns_common *ns)
509501
{
510-
if (ns->ops)
502+
if (ns && ns->ops)
511503
ns->ops->put(ns);
512504
}
513505

514-
DEFINE_FREE(ns_put, struct ns_common *, if (!IS_ERR_OR_NULL(_T)) __ns_put(_T))
506+
DEFINE_FREE(ns_put, struct ns_common *, if (!IS_ERR_OR_NULL(_T)) ns_put(_T))
515507

516508
static inline struct ns_common *__must_check legitimize_ns(const struct klistns *kls,
517509
struct ns_common *candidate)
@@ -535,7 +527,7 @@ static ssize_t do_listns_userns(struct klistns *kls)
535527
{
536528
u64 __user *ns_ids = kls->uns_ids;
537529
size_t nr_ns_ids = kls->nr_ns_ids;
538-
struct ns_common *ns = NULL, *first_ns = NULL;
530+
struct ns_common *ns = NULL, *first_ns = NULL, *prev = NULL;
539531
const struct list_head *head;
540532
ssize_t ret;
541533

@@ -568,25 +560,33 @@ static ssize_t do_listns_userns(struct klistns *kls)
568560

569561
if (!first_ns)
570562
first_ns = list_entry_rcu(head->next, typeof(*ns), ns_owner_entry);
563+
571564
for (ns = first_ns; &ns->ns_owner_entry != head && nr_ns_ids;
572565
ns = list_entry_rcu(ns->ns_owner_entry.next, typeof(*ns), ns_owner_entry)) {
573-
struct ns_common *valid __free(ns_put);
566+
struct ns_common *valid;
574567

575568
valid = legitimize_ns(kls, ns);
576569
if (!valid)
577570
continue;
578571

579572
rcu_read_unlock();
580573

581-
if (put_user(valid->ns_id, ns_ids + ret))
582-
return -EINVAL;
574+
ns_put(prev);
575+
prev = valid;
576+
577+
if (put_user(valid->ns_id, ns_ids + ret)) {
578+
ns_put(prev);
579+
return -EFAULT;
580+
}
581+
583582
nr_ns_ids--;
584583
ret++;
585584

586585
rcu_read_lock();
587586
}
588587

589588
rcu_read_unlock();
589+
ns_put(prev);
590590
return ret;
591591
}
592592

@@ -668,7 +668,7 @@ static ssize_t do_listns(struct klistns *kls)
668668
{
669669
u64 __user *ns_ids = kls->uns_ids;
670670
size_t nr_ns_ids = kls->nr_ns_ids;
671-
struct ns_common *ns, *first_ns = NULL;
671+
struct ns_common *ns, *first_ns = NULL, *prev = NULL;
672672
struct ns_tree *ns_tree = NULL;
673673
const struct list_head *head;
674674
u32 ns_type;
@@ -705,16 +705,21 @@ static ssize_t do_listns(struct klistns *kls)
705705

706706
for (ns = first_ns; !ns_common_is_head(ns, head, ns_tree) && nr_ns_ids;
707707
ns = next_ns_common(ns, ns_tree)) {
708-
struct ns_common *valid __free(ns_put);
708+
struct ns_common *valid;
709709

710710
valid = legitimize_ns(kls, ns);
711711
if (!valid)
712712
continue;
713713

714714
rcu_read_unlock();
715715

716-
if (put_user(valid->ns_id, ns_ids + ret))
717-
return -EINVAL;
716+
ns_put(prev);
717+
prev = valid;
718+
719+
if (put_user(valid->ns_id, ns_ids + ret)) {
720+
ns_put(prev);
721+
return -EFAULT;
722+
}
718723

719724
nr_ns_ids--;
720725
ret++;
@@ -723,6 +728,7 @@ static ssize_t do_listns(struct klistns *kls)
723728
}
724729

725730
rcu_read_unlock();
731+
ns_put(prev);
726732
return ret;
727733
}
728734

tools/testing/selftests/namespaces/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ init_ino_test
44
ns_active_ref_test
55
listns_test
66
listns_permissions_test
7+
listns_efault_test
78
siocgskns_test
89
cred_change_test
910
stress_test
1011
listns_pagination_bug
12+
regression_pidfd_setns_test

tools/testing/selftests/namespaces/Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,22 @@ TEST_GEN_PROGS := nsid_test \
88
ns_active_ref_test \
99
listns_test \
1010
listns_permissions_test \
11+
listns_efault_test \
1112
siocgskns_test \
1213
cred_change_test \
1314
stress_test \
14-
listns_pagination_bug
15+
listns_pagination_bug \
16+
regression_pidfd_setns_test
1517

1618
include ../lib.mk
1719

1820
$(OUTPUT)/ns_active_ref_test: ../filesystems/utils.c
1921
$(OUTPUT)/listns_test: ../filesystems/utils.c
2022
$(OUTPUT)/listns_permissions_test: ../filesystems/utils.c
23+
$(OUTPUT)/listns_efault_test: ../filesystems/utils.c
2124
$(OUTPUT)/siocgskns_test: ../filesystems/utils.c
2225
$(OUTPUT)/cred_change_test: ../filesystems/utils.c
2326
$(OUTPUT)/stress_test: ../filesystems/utils.c
2427
$(OUTPUT)/listns_pagination_bug: ../filesystems/utils.c
28+
$(OUTPUT)/regression_pidfd_setns_test: ../filesystems/utils.c
2529

0 commit comments

Comments
 (0)