Skip to content

Commit 90006f2

Browse files
author
Al Viro
committed
do_lock_mount(): don't modify path.
Currently do_lock_mount() has the target path switched to whatever might be overmounting it. We _do_ want to have the parent mount/mountpoint chosen on top of the overmounting pile; however, the way it's done has unpleasant races - if umount propagation removes the overmount while we'd been trying to set the environment up, we might end up failing if our target path strays into that overmount just before the overmount gets kicked out. Users of do_lock_mount() do not need the target path changed - they have all information in res->{parent,mp}; only one place (in do_move_mount()) currently uses the resulting path->mnt, and that value is trivial to reconstruct by the original value of path->mnt + chosen parent mount. Let's keep the target path unchanged; it avoids a bunch of subtle races and it's not hard to do: do as mount_locked_reader find the prospective parent mount/mountpoint dentry grab references if it's not the original target lock the prospective mountpoint dentry take namespace_sem exclusive if prospective parent/mountpoint would be different now err = -EAGAIN else if location has been unmounted err = -ENOENT else if mountpoint dentry is not allowed to be mounted on err = -ENOENT else if beneath and the top of the pile was the absolute root err = -EINVAL else try to get struct mountpoint (by dentry), set err to 0 on success and -ENO{MEM,ENT} on failure if err != 0 res->parent = ERR_PTR(err) drop locks else res->parent = prospective parent drop temporary references while err == -EAGAIN A somewhat subtle part is that dropping temporary references is allowed. Neither mounts nor dentries should be evicted by a thread that holds namespace_sem. On success we are dropping those references under namespace_sem, so we need to be sure that these are not the last references remaining. However, on success we'd already verified (under namespace_sem) that original target is still mounted and that mount and dentry we are about to drop are still reachable from it via the mount tree. That guarantees that we are not about to drop the last remaining references. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 25423ed commit 90006f2

1 file changed

Lines changed: 63 additions & 56 deletions

File tree

fs/namespace.c

Lines changed: 63 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2727,6 +2727,27 @@ static int attach_recursive_mnt(struct mount *source_mnt,
27272727
return err;
27282728
}
27292729

2730+
static inline struct mount *where_to_mount(const struct path *path,
2731+
struct dentry **dentry,
2732+
bool beneath)
2733+
{
2734+
struct mount *m;
2735+
2736+
if (unlikely(beneath)) {
2737+
m = topmost_overmount(real_mount(path->mnt));
2738+
*dentry = m->mnt_mountpoint;
2739+
return m->mnt_parent;
2740+
}
2741+
m = __lookup_mnt(path->mnt, path->dentry);
2742+
if (unlikely(m)) {
2743+
m = topmost_overmount(m);
2744+
*dentry = m->mnt.mnt_root;
2745+
return m;
2746+
}
2747+
*dentry = path->dentry;
2748+
return real_mount(path->mnt);
2749+
}
2750+
27302751
/**
27312752
* do_lock_mount - acquire environment for mounting
27322753
* @path: target path
@@ -2758,81 +2779,65 @@ static int attach_recursive_mnt(struct mount *source_mnt,
27582779
* case we also require the location to be at the root of a mount
27592780
* that has a parent (i.e. is not a root of some namespace).
27602781
*/
2761-
static void do_lock_mount(struct path *path, struct pinned_mountpoint *res, bool beneath)
2782+
static void do_lock_mount(const struct path *path,
2783+
struct pinned_mountpoint *res,
2784+
bool beneath)
27622785
{
2763-
struct vfsmount *mnt = path->mnt;
2764-
struct dentry *dentry;
2765-
struct path under = {};
2766-
int err = -ENOENT;
2786+
int err;
27672787

27682788
if (unlikely(beneath) && !path_mounted(path)) {
27692789
res->parent = ERR_PTR(-EINVAL);
27702790
return;
27712791
}
27722792

2773-
for (;;) {
2774-
struct mount *m = real_mount(mnt);
2775-
2776-
if (beneath) {
2777-
path_put(&under);
2778-
read_seqlock_excl(&mount_lock);
2779-
if (unlikely(!mnt_has_parent(m))) {
2780-
read_sequnlock_excl(&mount_lock);
2781-
res->parent = ERR_PTR(-EINVAL);
2782-
return;
2793+
do {
2794+
struct dentry *dentry, *d;
2795+
struct mount *m, *n;
2796+
2797+
scoped_guard(mount_locked_reader) {
2798+
m = where_to_mount(path, &dentry, beneath);
2799+
if (&m->mnt != path->mnt) {
2800+
mntget(&m->mnt);
2801+
dget(dentry);
27832802
}
2784-
under.mnt = mntget(&m->mnt_parent->mnt);
2785-
under.dentry = dget(m->mnt_mountpoint);
2786-
read_sequnlock_excl(&mount_lock);
2787-
dentry = under.dentry;
2788-
} else {
2789-
dentry = path->dentry;
27902803
}
27912804

27922805
inode_lock(dentry->d_inode);
27932806
namespace_lock();
27942807

2795-
if (unlikely(cant_mount(dentry) || !is_mounted(mnt)))
2796-
break; // not to be mounted on
2808+
// check if the chain of mounts (if any) has changed.
2809+
scoped_guard(mount_locked_reader)
2810+
n = where_to_mount(path, &d, beneath);
27972811

2798-
if (beneath && unlikely(m->mnt_mountpoint != dentry ||
2799-
&m->mnt_parent->mnt != under.mnt)) {
2800-
namespace_unlock();
2801-
inode_unlock(dentry->d_inode);
2802-
continue; // got moved
2803-
}
2812+
if (unlikely(n != m || dentry != d))
2813+
err = -EAGAIN; // something moved, retry
2814+
else if (unlikely(cant_mount(dentry) || !is_mounted(path->mnt)))
2815+
err = -ENOENT; // not to be mounted on
2816+
else if (beneath && &m->mnt == path->mnt && !m->overmount)
2817+
err = -EINVAL;
2818+
else
2819+
err = get_mountpoint(dentry, res);
28042820

2805-
mnt = lookup_mnt(path);
2806-
if (unlikely(mnt)) {
2821+
if (unlikely(err)) {
2822+
res->parent = ERR_PTR(err);
28072823
namespace_unlock();
28082824
inode_unlock(dentry->d_inode);
2809-
path_put(path);
2810-
path->mnt = mnt;
2811-
path->dentry = dget(mnt->mnt_root);
2812-
continue; // got overmounted
2825+
} else {
2826+
res->parent = m;
28132827
}
2814-
err = get_mountpoint(dentry, res);
2815-
if (err)
2816-
break;
2817-
if (beneath) {
2818-
/*
2819-
* @under duplicates the references that will stay
2820-
* at least until namespace_unlock(), so the path_put()
2821-
* below is safe (and OK to do under namespace_lock -
2822-
* we are not dropping the final references here).
2823-
*/
2824-
path_put(&under);
2825-
res->parent = real_mount(path->mnt)->mnt_parent;
2826-
return;
2828+
/*
2829+
* Drop the temporary references. This is subtle - on success
2830+
* we are doing that under namespace_sem, which would normally
2831+
* be forbidden. However, in that case we are guaranteed that
2832+
* refcounts won't reach zero, since we know that path->mnt
2833+
* is mounted and thus all mounts reachable from it are pinned
2834+
* and stable, along with their mountpoints and roots.
2835+
*/
2836+
if (&m->mnt != path->mnt) {
2837+
dput(dentry);
2838+
mntput(&m->mnt);
28272839
}
2828-
res->parent = real_mount(path->mnt);
2829-
return;
2830-
}
2831-
namespace_unlock();
2832-
inode_unlock(dentry->d_inode);
2833-
if (beneath)
2834-
path_put(&under);
2835-
res->parent = ERR_PTR(err);
2840+
} while (err == -EAGAIN);
28362841
}
28372842

28382843
static void __unlock_mount(struct pinned_mountpoint *m)
@@ -3613,6 +3618,8 @@ static int do_move_mount(struct path *old_path,
36133618
if (beneath) {
36143619
struct mount *over = real_mount(new_path->mnt);
36153620

3621+
if (mp.parent != over->mnt_parent)
3622+
over = mp.parent->overmount;
36163623
err = can_move_mount_beneath(old, over, mp.mp);
36173624
if (err)
36183625
return err;

0 commit comments

Comments
 (0)