Skip to content

Commit 5222470

Browse files
committed
nsfs: support file handles
A while ago we added support for file handles to pidfs so pidfds can be encoded and decoded as file handles. Userspace has adopted this quickly and it's proven very useful. Implement file handles for namespaces as well. A process is not always able to open /proc/self/ns/. That requires procfs to be mounted and for /proc/self/ or /proc/self/ns/ to not be overmounted. However, userspace can always derive a namespace fd from a pidfd. And that always works for a task's own namespace. There's no need to introduce unnecessary behavioral differences between /proc/self/ns/ fds, pidfd-derived namespace fds, and file-handle-derived namespace fds. So namespace file handles are always decodable if the caller is located in the namespace the file handle refers to. This also allows a task to e.g., store a set of file handles to its namespaces in a file on-disk so it can verify when it gets rexeced that they're still valid and so on. This is akin to the pidfd use-case. Or just plainly for namespace comparison reasons where a file handle to the task's own namespace can be easily compared against others. Reviewed-by: Amir Goldstein <amir73il@gmail.com> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent d2afdb7 commit 5222470

3 files changed

Lines changed: 173 additions & 0 deletions

File tree

fs/nsfs.c

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
#include <linux/nsfs.h>
1414
#include <linux/uaccess.h>
1515
#include <linux/mnt_namespace.h>
16+
#include <linux/ipc_namespace.h>
17+
#include <linux/time_namespace.h>
18+
#include <linux/utsname.h>
19+
#include <linux/exportfs.h>
20+
#include <linux/nstree.h>
21+
#include <net/net_namespace.h>
1622

1723
#include "mount.h"
1824
#include "internal.h"
@@ -417,12 +423,164 @@ static const struct stashed_operations nsfs_stashed_ops = {
417423
.put_data = nsfs_put_data,
418424
};
419425

426+
#define NSFS_FID_SIZE_U32_VER0 (NSFS_FILE_HANDLE_SIZE_VER0 / sizeof(u32))
427+
#define NSFS_FID_SIZE_U32_LATEST (NSFS_FILE_HANDLE_SIZE_LATEST / sizeof(u32))
428+
429+
static int nsfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
430+
struct inode *parent)
431+
{
432+
struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh;
433+
struct ns_common *ns = inode->i_private;
434+
int len = *max_len;
435+
436+
if (parent)
437+
return FILEID_INVALID;
438+
439+
if (len < NSFS_FID_SIZE_U32_VER0) {
440+
*max_len = NSFS_FID_SIZE_U32_LATEST;
441+
return FILEID_INVALID;
442+
} else if (len > NSFS_FID_SIZE_U32_LATEST) {
443+
*max_len = NSFS_FID_SIZE_U32_LATEST;
444+
}
445+
446+
fid->ns_id = ns->ns_id;
447+
fid->ns_type = ns->ops->type;
448+
fid->ns_inum = inode->i_ino;
449+
return FILEID_NSFS;
450+
}
451+
452+
static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
453+
int fh_len, int fh_type)
454+
{
455+
struct path path __free(path_put) = {};
456+
struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh;
457+
struct user_namespace *owning_ns = NULL;
458+
struct ns_common *ns;
459+
int ret;
460+
461+
if (fh_len < NSFS_FID_SIZE_U32_VER0)
462+
return NULL;
463+
464+
/* Check that any trailing bytes are zero. */
465+
if ((fh_len > NSFS_FID_SIZE_U32_LATEST) &&
466+
memchr_inv((void *)fid + NSFS_FID_SIZE_U32_LATEST, 0,
467+
fh_len - NSFS_FID_SIZE_U32_LATEST))
468+
return NULL;
469+
470+
switch (fh_type) {
471+
case FILEID_NSFS:
472+
break;
473+
default:
474+
return NULL;
475+
}
476+
477+
scoped_guard(rcu) {
478+
ns = ns_tree_lookup_rcu(fid->ns_id, fid->ns_type);
479+
if (!ns)
480+
return NULL;
481+
482+
VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id);
483+
VFS_WARN_ON_ONCE(ns->ops->type != fid->ns_type);
484+
VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum);
485+
486+
if (!refcount_inc_not_zero(&ns->count))
487+
return NULL;
488+
}
489+
490+
switch (ns->ops->type) {
491+
#ifdef CONFIG_CGROUPS
492+
case CLONE_NEWCGROUP:
493+
if (!current_in_namespace(to_cg_ns(ns)))
494+
owning_ns = to_cg_ns(ns)->user_ns;
495+
break;
496+
#endif
497+
#ifdef CONFIG_IPC_NS
498+
case CLONE_NEWIPC:
499+
if (!current_in_namespace(to_ipc_ns(ns)))
500+
owning_ns = to_ipc_ns(ns)->user_ns;
501+
break;
502+
#endif
503+
case CLONE_NEWNS:
504+
if (!current_in_namespace(to_mnt_ns(ns)))
505+
owning_ns = to_mnt_ns(ns)->user_ns;
506+
break;
507+
#ifdef CONFIG_NET_NS
508+
case CLONE_NEWNET:
509+
if (!current_in_namespace(to_net_ns(ns)))
510+
owning_ns = to_net_ns(ns)->user_ns;
511+
break;
512+
#endif
513+
#ifdef CONFIG_PID_NS
514+
case CLONE_NEWPID:
515+
if (!current_in_namespace(to_pid_ns(ns))) {
516+
owning_ns = to_pid_ns(ns)->user_ns;
517+
} else if (!READ_ONCE(to_pid_ns(ns)->child_reaper)) {
518+
ns->ops->put(ns);
519+
return ERR_PTR(-EPERM);
520+
}
521+
break;
522+
#endif
523+
#ifdef CONFIG_TIME_NS
524+
case CLONE_NEWTIME:
525+
if (!current_in_namespace(to_time_ns(ns)))
526+
owning_ns = to_time_ns(ns)->user_ns;
527+
break;
528+
#endif
529+
#ifdef CONFIG_USER_NS
530+
case CLONE_NEWUSER:
531+
if (!current_in_namespace(to_user_ns(ns)))
532+
owning_ns = to_user_ns(ns);
533+
break;
534+
#endif
535+
#ifdef CONFIG_UTS_NS
536+
case CLONE_NEWUTS:
537+
if (!current_in_namespace(to_uts_ns(ns)))
538+
owning_ns = to_uts_ns(ns)->user_ns;
539+
break;
540+
#endif
541+
default:
542+
return ERR_PTR(-EOPNOTSUPP);
543+
}
544+
545+
if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) {
546+
ns->ops->put(ns);
547+
return ERR_PTR(-EPERM);
548+
}
549+
550+
/* path_from_stashed() unconditionally consumes the reference. */
551+
ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
552+
if (ret)
553+
return ERR_PTR(ret);
554+
555+
return no_free_ptr(path.dentry);
556+
}
557+
558+
static int nsfs_export_permission(struct handle_to_path_ctx *ctx,
559+
unsigned int oflags)
560+
{
561+
/* nsfs_fh_to_dentry() performs all permission checks. */
562+
return 0;
563+
}
564+
565+
static struct file *nsfs_export_open(struct path *path, unsigned int oflags)
566+
{
567+
return file_open_root(path, "", oflags, 0);
568+
}
569+
570+
static const struct export_operations nsfs_export_operations = {
571+
.encode_fh = nsfs_encode_fh,
572+
.fh_to_dentry = nsfs_fh_to_dentry,
573+
.open = nsfs_export_open,
574+
.permission = nsfs_export_permission,
575+
};
576+
420577
static int nsfs_init_fs_context(struct fs_context *fc)
421578
{
422579
struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
423580
if (!ctx)
424581
return -ENOMEM;
425582
ctx->ops = &nsfs_ops;
583+
ctx->eops = &nsfs_export_operations;
426584
ctx->dops = &ns_dentry_operations;
427585
fc->s_fs_info = (void *)&nsfs_stashed_ops;
428586
return 0;

include/linux/exportfs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ enum fid_type {
122122
FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1,
123123
FILEID_BCACHEFS_WITH_PARENT = 0xb2,
124124

125+
/*
126+
*
127+
* 64 bit namespace identifier, 32 bit namespace type, 32 bit inode number.
128+
*/
129+
FILEID_NSFS = 0xf1,
130+
125131
/*
126132
* 64 bit unique kernfs id
127133
*/

include/uapi/linux/nsfs.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,13 @@ enum init_ns_ino {
5353
MNT_NS_INIT_INO = 0xEFFFFFF8U,
5454
};
5555

56+
struct nsfs_file_handle {
57+
__u64 ns_id;
58+
__u32 ns_type;
59+
__u32 ns_inum;
60+
};
61+
62+
#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
63+
#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
64+
5665
#endif /* __LINUX_NSFS_H */

0 commit comments

Comments
 (0)