@@ -75,6 +75,17 @@ static int __init initramfs_options_setup(char *str)
7575
7676__setup ("initramfs_options=" , initramfs_options_setup );
7777
78+ bool nullfs_rootfs = false;
79+
80+ static int __init nullfs_rootfs_setup (char * str )
81+ {
82+ if (* str )
83+ return 0 ;
84+ nullfs_rootfs = true;
85+ return 1 ;
86+ }
87+ __setup ("nullfs_rootfs" , nullfs_rootfs_setup );
88+
7889static u64 event ;
7990static DEFINE_XARRAY_FLAGS (mnt_id_xa , XA_FLAGS_ALLOC ) ;
8091static DEFINE_IDA (mnt_group_ida );
@@ -221,7 +232,7 @@ static int mnt_alloc_id(struct mount *mnt)
221232 int res ;
222233
223234 xa_lock (& mnt_id_xa );
224- res = __xa_alloc (& mnt_id_xa , & mnt -> mnt_id , mnt , XA_LIMIT ( 1 , INT_MAX ) , GFP_KERNEL );
235+ res = __xa_alloc (& mnt_id_xa , & mnt -> mnt_id , mnt , xa_limit_31b , GFP_KERNEL );
225236 if (!res )
226237 mnt -> mnt_id_unique = ++ mnt_id_ctr ;
227238 xa_unlock (& mnt_id_xa );
@@ -4498,65 +4509,27 @@ bool path_is_under(const struct path *path1, const struct path *path2)
44984509}
44994510EXPORT_SYMBOL (path_is_under );
45004511
4501- /*
4502- * pivot_root Semantics:
4503- * Moves the root file system of the current process to the directory put_old,
4504- * makes new_root as the new root file system of the current process, and sets
4505- * root/cwd of all processes which had them on the current root to new_root.
4506- *
4507- * Restrictions:
4508- * The new_root and put_old must be directories, and must not be on the
4509- * same file system as the current process root. The put_old must be
4510- * underneath new_root, i.e. adding a non-zero number of /.. to the string
4511- * pointed to by put_old must yield the same directory as new_root. No other
4512- * file system may be mounted on put_old. After all, new_root is a mountpoint.
4513- *
4514- * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
4515- * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
4516- * in this situation.
4517- *
4518- * Notes:
4519- * - we don't move root/cwd if they are not at the root (reason: if something
4520- * cared enough to change them, it's probably wrong to force them elsewhere)
4521- * - it's okay to pick a root that isn't the root of a file system, e.g.
4522- * /nfs/my_root where /nfs is the mount point. It must be a mountpoint,
4523- * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
4524- * first.
4525- */
4526- SYSCALL_DEFINE2 (pivot_root , const char __user * , new_root ,
4527- const char __user * , put_old )
4512+ int path_pivot_root (struct path * new , struct path * old )
45284513{
4529- struct path new __free (path_put ) = {};
4530- struct path old __free (path_put ) = {};
45314514 struct path root __free (path_put ) = {};
45324515 struct mount * new_mnt , * root_mnt , * old_mnt , * root_parent , * ex_parent ;
45334516 int error ;
45344517
45354518 if (!may_mount ())
45364519 return - EPERM ;
45374520
4538- error = user_path_at (AT_FDCWD , new_root ,
4539- LOOKUP_FOLLOW | LOOKUP_DIRECTORY , & new );
4540- if (error )
4541- return error ;
4542-
4543- error = user_path_at (AT_FDCWD , put_old ,
4544- LOOKUP_FOLLOW | LOOKUP_DIRECTORY , & old );
4545- if (error )
4546- return error ;
4547-
4548- error = security_sb_pivotroot (& old , & new );
4521+ error = security_sb_pivotroot (old , new );
45494522 if (error )
45504523 return error ;
45514524
45524525 get_fs_root (current -> fs , & root );
45534526
4554- LOCK_MOUNT (old_mp , & old );
4527+ LOCK_MOUNT (old_mp , old );
45554528 old_mnt = old_mp .parent ;
45564529 if (IS_ERR (old_mnt ))
45574530 return PTR_ERR (old_mnt );
45584531
4559- new_mnt = real_mount (new . mnt );
4532+ new_mnt = real_mount (new -> mnt );
45604533 root_mnt = real_mount (root .mnt );
45614534 ex_parent = new_mnt -> mnt_parent ;
45624535 root_parent = root_mnt -> mnt_parent ;
@@ -4568,23 +4541,23 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
45684541 return - EINVAL ;
45694542 if (new_mnt -> mnt .mnt_flags & MNT_LOCKED )
45704543 return - EINVAL ;
4571- if (d_unlinked (new . dentry ))
4544+ if (d_unlinked (new -> dentry ))
45724545 return - ENOENT ;
45734546 if (new_mnt == root_mnt || old_mnt == root_mnt )
45744547 return - EBUSY ; /* loop, on the same file system */
45754548 if (!path_mounted (& root ))
45764549 return - EINVAL ; /* not a mountpoint */
45774550 if (!mnt_has_parent (root_mnt ))
45784551 return - EINVAL ; /* absolute root */
4579- if (!path_mounted (& new ))
4552+ if (!path_mounted (new ))
45804553 return - EINVAL ; /* not a mountpoint */
45814554 if (!mnt_has_parent (new_mnt ))
45824555 return - EINVAL ; /* absolute root */
45834556 /* make sure we can reach put_old from new_root */
4584- if (!is_path_reachable (old_mnt , old_mp .mp -> m_dentry , & new ))
4557+ if (!is_path_reachable (old_mnt , old_mp .mp -> m_dentry , new ))
45854558 return - EINVAL ;
45864559 /* make certain new is below the root */
4587- if (!is_path_reachable (new_mnt , new . dentry , & root ))
4560+ if (!is_path_reachable (new_mnt , new -> dentry , & root ))
45884561 return - EINVAL ;
45894562 lock_mount_hash ();
45904563 umount_mnt (new_mnt );
@@ -4603,10 +4576,56 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
46034576 unlock_mount_hash ();
46044577 mnt_notify_add (root_mnt );
46054578 mnt_notify_add (new_mnt );
4606- chroot_fs_refs (& root , & new );
4579+ chroot_fs_refs (& root , new );
46074580 return 0 ;
46084581}
46094582
4583+ /*
4584+ * pivot_root Semantics:
4585+ * Moves the root file system of the current process to the directory put_old,
4586+ * makes new_root as the new root file system of the current process, and sets
4587+ * root/cwd of all processes which had them on the current root to new_root.
4588+ *
4589+ * Restrictions:
4590+ * The new_root and put_old must be directories, and must not be on the
4591+ * same file system as the current process root. The put_old must be
4592+ * underneath new_root, i.e. adding a non-zero number of /.. to the string
4593+ * pointed to by put_old must yield the same directory as new_root. No other
4594+ * file system may be mounted on put_old. After all, new_root is a mountpoint.
4595+ *
4596+ * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem
4597+ * unless the kernel was booted with "nullfs_rootfs". See
4598+ * Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
4599+ * in this situation.
4600+ *
4601+ * Notes:
4602+ * - we don't move root/cwd if they are not at the root (reason: if something
4603+ * cared enough to change them, it's probably wrong to force them elsewhere)
4604+ * - it's okay to pick a root that isn't the root of a file system, e.g.
4605+ * /nfs/my_root where /nfs is the mount point. It must be a mountpoint,
4606+ * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
4607+ * first.
4608+ */
4609+ SYSCALL_DEFINE2 (pivot_root , const char __user * , new_root ,
4610+ const char __user * , put_old )
4611+ {
4612+ struct path new __free (path_put ) = {};
4613+ struct path old __free (path_put ) = {};
4614+ int error ;
4615+
4616+ error = user_path_at (AT_FDCWD , new_root ,
4617+ LOOKUP_FOLLOW | LOOKUP_DIRECTORY , & new );
4618+ if (error )
4619+ return error ;
4620+
4621+ error = user_path_at (AT_FDCWD , put_old ,
4622+ LOOKUP_FOLLOW | LOOKUP_DIRECTORY , & old );
4623+ if (error )
4624+ return error ;
4625+
4626+ return path_pivot_root (& new , & old );
4627+ }
4628+
46104629static unsigned int recalc_flags (struct mount_kattr * kattr , struct mount * mnt )
46114630{
46124631 unsigned int flags = mnt -> mnt .mnt_flags ;
@@ -5969,24 +5988,72 @@ struct mnt_namespace init_mnt_ns = {
59695988
59705989static void __init init_mount_tree (void )
59715990{
5972- struct vfsmount * mnt ;
5973- struct mount * m ;
5991+ struct vfsmount * mnt , * nullfs_mnt ;
5992+ struct mount * mnt_root ;
59745993 struct path root ;
59755994
5995+ /*
5996+ * When nullfs is used, we create two mounts:
5997+ *
5998+ * (1) nullfs with mount id 1
5999+ * (2) mutable rootfs with mount id 2
6000+ *
6001+ * with (2) mounted on top of (1).
6002+ */
6003+ if (nullfs_rootfs ) {
6004+ nullfs_mnt = vfs_kern_mount (& nullfs_fs_type , 0 , "nullfs" , NULL );
6005+ if (IS_ERR (nullfs_mnt ))
6006+ panic ("VFS: Failed to create nullfs" );
6007+ }
6008+
59766009 mnt = vfs_kern_mount (& rootfs_fs_type , 0 , "rootfs" , initramfs_options );
59776010 if (IS_ERR (mnt ))
59786011 panic ("Can't create rootfs" );
59796012
5980- m = real_mount (mnt );
5981- init_mnt_ns .root = m ;
5982- init_mnt_ns .nr_mounts = 1 ;
5983- mnt_add_to_ns (& init_mnt_ns , m );
6013+ if (nullfs_rootfs ) {
6014+ VFS_WARN_ON_ONCE (real_mount (nullfs_mnt )-> mnt_id != 1 );
6015+ VFS_WARN_ON_ONCE (real_mount (mnt )-> mnt_id != 2 );
6016+
6017+ /* The namespace root is the nullfs mnt. */
6018+ mnt_root = real_mount (nullfs_mnt );
6019+ init_mnt_ns .root = mnt_root ;
6020+
6021+ /* Mount mutable rootfs on top of nullfs. */
6022+ root .mnt = nullfs_mnt ;
6023+ root .dentry = nullfs_mnt -> mnt_root ;
6024+
6025+ LOCK_MOUNT_EXACT (mp , & root );
6026+ if (unlikely (IS_ERR (mp .parent )))
6027+ panic ("VFS: Failed to mount rootfs on nullfs" );
6028+ scoped_guard (mount_writer )
6029+ attach_mnt (real_mount (mnt ), mp .parent , mp .mp );
6030+
6031+ pr_info ("VFS: Finished mounting rootfs on nullfs\n" );
6032+ } else {
6033+ VFS_WARN_ON_ONCE (real_mount (mnt )-> mnt_id != 1 );
6034+
6035+ /* The namespace root is the mutable rootfs. */
6036+ mnt_root = real_mount (mnt );
6037+ init_mnt_ns .root = mnt_root ;
6038+ }
6039+
6040+ /*
6041+ * We've dropped all locks here but that's fine. Not just are we
6042+ * the only task that's running, there's no other mount
6043+ * namespace in existence and the initial mount namespace is
6044+ * completely empty until we add the mounts we just created.
6045+ */
6046+ for (struct mount * p = mnt_root ; p ; p = next_mnt (p , mnt_root )) {
6047+ mnt_add_to_ns (& init_mnt_ns , p );
6048+ init_mnt_ns .nr_mounts ++ ;
6049+ }
6050+
59846051 init_task .nsproxy -> mnt_ns = & init_mnt_ns ;
59856052 get_mnt_ns (& init_mnt_ns );
59866053
5987- root . mnt = mnt ;
5988- root .dentry = mnt -> mnt_root ;
5989-
6054+ /* The root and pwd always point to the mutable rootfs. */
6055+ root .mnt = mnt ;
6056+ root . dentry = mnt -> mnt_root ;
59906057 set_fs_pwd (current -> fs , & root );
59916058 set_fs_root (current -> fs , & root );
59926059
0 commit comments