Skip to content

Commit 18b5c40

Browse files
committed
Merge patch series "ns: header cleanups and initial namespace reference count improvements"
Christian Brauner <brauner@kernel.org> says: Cleanup the namespace headers by splitting them into types and helpers. Better separate common namepace types and functions from namespace tree types and functions. Fix the reference counts of initial namespaces so we don't do any pointless cacheline ping-pong for them when we know they can never go away. Add a bunch of asserts for both the passive and active reference counts to catch any changes that would break it. * patches from https://patch.msgid.link/20251110-work-namespace-nstree-fixes-v1-0-e8a9264e0fb9@kernel.org: selftests/namespaces: fix nsid tests ns: drop custom reference count initialization for initial namespaces pid: rely on common reference count behavior ns: add asserts for initial namespace active reference counts ns: add asserts for initial namespace reference counts ns: make all reference counts on initial namespace a nop ipc: enable is_ns_init_id() assertions fs: use boolean to indicate anonymous mount namespace ns: rename is_initial_namespace() ns: make is_initial_namespace() argument const nstree: use guards for ns_tree_lock nstree: simplify owner list iteration nstree: switch to new structures nstree: add helper to operate on struct ns_tree_{node,root} nstree: move nstree types into separate header nstree: decouple from ns_common header ns: move namespace types into separate header Link: https://patch.msgid.link/20251110-work-namespace-nstree-fixes-v1-0-e8a9264e0fb9@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents a67ee4e + 6453937 commit 18b5c40

18 files changed

Lines changed: 576 additions & 437 deletions

File tree

fs/mount.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct mnt_namespace {
2727
unsigned int nr_mounts; /* # of mounts in the namespace */
2828
unsigned int pending_mounts;
2929
refcount_t passive; /* number references not pinning @mounts */
30+
bool is_anon;
3031
} __randomize_layout;
3132

3233
struct mnt_pcp {
@@ -175,7 +176,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry)
175176

176177
static inline bool is_anon_ns(struct mnt_namespace *ns)
177178
{
178-
return ns->ns.ns_id == 0;
179+
return ns->is_anon;
179180
}
180181

181182
static inline bool anon_ns_root(const struct mount *m)

fs/namespace.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
138138

139139
if (!node)
140140
return NULL;
141-
ns = rb_entry(node, struct ns_common, ns_tree_node);
141+
ns = rb_entry(node, struct ns_common, ns_tree_node.ns_node);
142142
return container_of(ns, struct mnt_namespace, ns);
143143
}
144144

@@ -4093,8 +4093,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
40934093
dec_mnt_namespaces(ucounts);
40944094
return ERR_PTR(ret);
40954095
}
4096-
if (!anon)
4097-
ns_tree_gen_id(new_ns);
4096+
ns_tree_gen_id(new_ns);
4097+
4098+
new_ns->is_anon = anon;
40984099
refcount_set(&new_ns->passive, 1);
40994100
new_ns->mounts = RB_ROOT;
41004101
init_waitqueue_head(&new_ns->poll);
@@ -5985,7 +5986,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
59855986
}
59865987

59875988
struct mnt_namespace init_mnt_ns = {
5988-
.ns = NS_COMMON_INIT(init_mnt_ns, 1),
5989+
.ns = NS_COMMON_INIT(init_mnt_ns),
59895990
.user_ns = &init_user_ns,
59905991
.passive = REFCOUNT_INIT(1),
59915992
.mounts = RB_ROOT,

include/linux/ns/ns_common_types.h

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _LINUX_NS_COMMON_TYPES_H
3+
#define _LINUX_NS_COMMON_TYPES_H
4+
5+
#include <linux/atomic.h>
6+
#include <linux/ns/nstree_types.h>
7+
#include <linux/rbtree.h>
8+
#include <linux/refcount.h>
9+
#include <linux/types.h>
10+
11+
struct cgroup_namespace;
12+
struct dentry;
13+
struct ipc_namespace;
14+
struct mnt_namespace;
15+
struct net;
16+
struct pid_namespace;
17+
struct proc_ns_operations;
18+
struct time_namespace;
19+
struct user_namespace;
20+
struct uts_namespace;
21+
22+
extern struct cgroup_namespace init_cgroup_ns;
23+
extern struct ipc_namespace init_ipc_ns;
24+
extern struct mnt_namespace init_mnt_ns;
25+
extern struct net init_net;
26+
extern struct pid_namespace init_pid_ns;
27+
extern struct time_namespace init_time_ns;
28+
extern struct user_namespace init_user_ns;
29+
extern struct uts_namespace init_uts_ns;
30+
31+
extern const struct proc_ns_operations cgroupns_operations;
32+
extern const struct proc_ns_operations ipcns_operations;
33+
extern const struct proc_ns_operations mntns_operations;
34+
extern const struct proc_ns_operations netns_operations;
35+
extern const struct proc_ns_operations pidns_operations;
36+
extern const struct proc_ns_operations pidns_for_children_operations;
37+
extern const struct proc_ns_operations timens_operations;
38+
extern const struct proc_ns_operations timens_for_children_operations;
39+
extern const struct proc_ns_operations userns_operations;
40+
extern const struct proc_ns_operations utsns_operations;
41+
42+
/*
43+
* Namespace lifetimes are managed via a two-tier reference counting model:
44+
*
45+
* (1) __ns_ref (refcount_t): Main reference count tracking memory
46+
* lifetime. Controls when the namespace structure itself is freed.
47+
* It also pins the namespace on the namespace trees whereas (2)
48+
* only regulates their visibility to userspace.
49+
*
50+
* (2) __ns_ref_active (atomic_t): Reference count tracking active users.
51+
* Controls visibility of the namespace in the namespace trees.
52+
* Any live task that uses the namespace (via nsproxy or cred) holds
53+
* an active reference. Any open file descriptor or bind-mount of
54+
* the namespace holds an active reference. Once all tasks have
55+
* called exited their namespaces and all file descriptors and
56+
* bind-mounts have been released the active reference count drops
57+
* to zero and the namespace becomes inactive. IOW, the namespace
58+
* cannot be listed or opened via file handles anymore.
59+
*
60+
* Note that it is valid to transition from active to inactive and
61+
* back from inactive to active e.g., when resurrecting an inactive
62+
* namespace tree via the SIOCGSKNS ioctl().
63+
*
64+
* Relationship and lifecycle states:
65+
*
66+
* - Active (__ns_ref_active > 0):
67+
* Namespace is actively used and visible to userspace. The namespace
68+
* can be reopened via /proc/<pid>/ns/<ns_type>, via namespace file
69+
* handles, or discovered via listns().
70+
*
71+
* - Inactive (__ns_ref_active == 0, __ns_ref > 0):
72+
* No tasks are actively using the namespace and it isn't pinned by
73+
* any bind-mounts or open file descriptors anymore. But the namespace
74+
* is still kept alive by internal references. For example, the user
75+
* namespace could be pinned by an open file through file->f_cred
76+
* references when one of the now defunct tasks had opened a file and
77+
* handed the file descriptor off to another process via a UNIX
78+
* sockets. Such references keep the namespace structure alive through
79+
* __ns_ref but will not hold an active reference.
80+
*
81+
* - Destroyed (__ns_ref == 0):
82+
* No references remain. The namespace is removed from the tree and freed.
83+
*
84+
* State transitions:
85+
*
86+
* Active -> Inactive:
87+
* When the last task using the namespace exits it drops its active
88+
* references to all namespaces. However, user and pid namespaces
89+
* remain accessible until the task has been reaped.
90+
*
91+
* Inactive -> Active:
92+
* An inactive namespace tree might be resurrected due to e.g., the
93+
* SIOCGSKNS ioctl() on a socket.
94+
*
95+
* Inactive -> Destroyed:
96+
* When __ns_ref drops to zero the namespace is removed from the
97+
* namespaces trees and the memory is freed (after RCU grace period).
98+
*
99+
* Initial namespaces:
100+
* Boot-time namespaces (init_net, init_pid_ns, etc.) start with
101+
* __ns_ref_active = 1 and remain active forever.
102+
*
103+
* @ns_type: type of namespace (e.g., CLONE_NEWNET)
104+
* @stashed: cached dentry to be used by the vfs
105+
* @ops: namespace operations
106+
* @inum: namespace inode number (quickly recycled for non-initial namespaces)
107+
* @__ns_ref: main reference count (do not use directly)
108+
* @ns_tree: namespace tree nodes and active reference count
109+
*/
110+
struct ns_common {
111+
u32 ns_type;
112+
struct dentry *stashed;
113+
const struct proc_ns_operations *ops;
114+
unsigned int inum;
115+
refcount_t __ns_ref; /* do not use directly */
116+
union {
117+
struct ns_tree;
118+
struct rcu_head ns_rcu;
119+
};
120+
};
121+
122+
#define to_ns_common(__ns) \
123+
_Generic((__ns), \
124+
struct cgroup_namespace *: &(__ns)->ns, \
125+
const struct cgroup_namespace *: &(__ns)->ns, \
126+
struct ipc_namespace *: &(__ns)->ns, \
127+
const struct ipc_namespace *: &(__ns)->ns, \
128+
struct mnt_namespace *: &(__ns)->ns, \
129+
const struct mnt_namespace *: &(__ns)->ns, \
130+
struct net *: &(__ns)->ns, \
131+
const struct net *: &(__ns)->ns, \
132+
struct pid_namespace *: &(__ns)->ns, \
133+
const struct pid_namespace *: &(__ns)->ns, \
134+
struct time_namespace *: &(__ns)->ns, \
135+
const struct time_namespace *: &(__ns)->ns, \
136+
struct user_namespace *: &(__ns)->ns, \
137+
const struct user_namespace *: &(__ns)->ns, \
138+
struct uts_namespace *: &(__ns)->ns, \
139+
const struct uts_namespace *: &(__ns)->ns)
140+
141+
#define ns_init_inum(__ns) \
142+
_Generic((__ns), \
143+
struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
144+
struct ipc_namespace *: IPC_NS_INIT_INO, \
145+
struct mnt_namespace *: MNT_NS_INIT_INO, \
146+
struct net *: NET_NS_INIT_INO, \
147+
struct pid_namespace *: PID_NS_INIT_INO, \
148+
struct time_namespace *: TIME_NS_INIT_INO, \
149+
struct user_namespace *: USER_NS_INIT_INO, \
150+
struct uts_namespace *: UTS_NS_INIT_INO)
151+
152+
#define ns_init_ns(__ns) \
153+
_Generic((__ns), \
154+
struct cgroup_namespace *: &init_cgroup_ns, \
155+
struct ipc_namespace *: &init_ipc_ns, \
156+
struct mnt_namespace *: &init_mnt_ns, \
157+
struct net *: &init_net, \
158+
struct pid_namespace *: &init_pid_ns, \
159+
struct time_namespace *: &init_time_ns, \
160+
struct user_namespace *: &init_user_ns, \
161+
struct uts_namespace *: &init_uts_ns)
162+
163+
#define ns_init_id(__ns) \
164+
_Generic((__ns), \
165+
struct cgroup_namespace *: CGROUP_NS_INIT_ID, \
166+
struct ipc_namespace *: IPC_NS_INIT_ID, \
167+
struct mnt_namespace *: MNT_NS_INIT_ID, \
168+
struct net *: NET_NS_INIT_ID, \
169+
struct pid_namespace *: PID_NS_INIT_ID, \
170+
struct time_namespace *: TIME_NS_INIT_ID, \
171+
struct user_namespace *: USER_NS_INIT_ID, \
172+
struct uts_namespace *: UTS_NS_INIT_ID)
173+
174+
#define to_ns_operations(__ns) \
175+
_Generic((__ns), \
176+
struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
177+
struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
178+
struct mnt_namespace *: &mntns_operations, \
179+
struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
180+
struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
181+
struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
182+
struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
183+
struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
184+
185+
#define ns_common_type(__ns) \
186+
_Generic((__ns), \
187+
struct cgroup_namespace *: CLONE_NEWCGROUP, \
188+
struct ipc_namespace *: CLONE_NEWIPC, \
189+
struct mnt_namespace *: CLONE_NEWNS, \
190+
struct net *: CLONE_NEWNET, \
191+
struct pid_namespace *: CLONE_NEWPID, \
192+
struct time_namespace *: CLONE_NEWTIME, \
193+
struct user_namespace *: CLONE_NEWUSER, \
194+
struct uts_namespace *: CLONE_NEWUTS)
195+
196+
#endif /* _LINUX_NS_COMMON_TYPES_H */

include/linux/ns/nstree_types.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
3+
#ifndef _LINUX_NSTREE_TYPES_H
4+
#define _LINUX_NSTREE_TYPES_H
5+
6+
#include <linux/rbtree.h>
7+
#include <linux/list.h>
8+
9+
/**
10+
* struct ns_tree_root - Root of a namespace tree
11+
* @ns_rb: Red-black tree root for efficient lookups
12+
* @ns_list_head: List head for sequential iteration
13+
*
14+
* Each namespace tree maintains both an rbtree (for O(log n) lookups)
15+
* and a list (for efficient sequential iteration). The list is kept in
16+
* the same sorted order as the rbtree.
17+
*/
18+
struct ns_tree_root {
19+
struct rb_root ns_rb;
20+
struct list_head ns_list_head;
21+
};
22+
23+
/**
24+
* struct ns_tree_node - Node in a namespace tree
25+
* @ns_node: Red-black tree node
26+
* @ns_list_entry: List entry for sequential iteration
27+
*
28+
* Represents a namespace's position in a tree. Each namespace has
29+
* multiple tree nodes for different trees (unified, per-type, owner).
30+
*/
31+
struct ns_tree_node {
32+
struct rb_node ns_node;
33+
struct list_head ns_list_entry;
34+
};
35+
36+
/**
37+
* struct ns_tree - Namespace tree nodes and active reference count
38+
* @ns_id: Unique namespace identifier
39+
* @__ns_ref_active: Active reference count (do not use directly)
40+
* @ns_unified_node: Node in the global namespace tree
41+
* @ns_tree_node: Node in the per-type namespace tree
42+
* @ns_owner_node: Node in the owner namespace's tree of owned namespaces
43+
* @ns_owner_root: Root of the tree of namespaces owned by this namespace
44+
* (only used when this namespace is an owner)
45+
*/
46+
struct ns_tree {
47+
u64 ns_id;
48+
atomic_t __ns_ref_active;
49+
struct ns_tree_node ns_unified_node;
50+
struct ns_tree_node ns_tree_node;
51+
struct ns_tree_node ns_owner_node;
52+
struct ns_tree_root ns_owner_root;
53+
};
54+
55+
#endif /* _LINUX_NSTREE_TYPES_H */

0 commit comments

Comments
 (0)