Skip to content

Commit 2bd5d41

Browse files
committed
Merge tag 'fuse-update-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: - Fix an issue with reusing the bdi in case of block based filesystems - Allow root (in init namespace) to access fuse filesystems in user namespaces if expicitly enabled with a module param - Misc fixes * tag 'fuse-update-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: fuse: retire block-device-based superblock on force unmount vfs: function to prevent re-use of block-device-based superblocks virtio_fs: Modify format for virtio_fs_direct_access virtiofs: delete unused parameter for virtio_fs_cleanup_vqs fuse: Add module param for CAP_SYS_ADMIN access bypassing allow_other fuse: Remove the control interface for virtio-fs fuse: ioctl: translate ENOSYS fuse: limit nsec fuse: avoid unnecessary spinlock bump fuse: fix deadlock between atomic O_TRUNC and page invalidation fuse: write inode in fuse_release()
2 parents 65512eb + 247861c commit 2bd5d41

10 files changed

Lines changed: 132 additions & 33 deletions

File tree

Documentation/filesystems/fuse.rst

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ How are requirements fulfilled?
279279
the filesystem or not.
280280

281281
Note that the *ptrace* check is not strictly necessary to
282-
prevent B/2/i, it is enough to check if mount owner has enough
282+
prevent C/2/i, it is enough to check if mount owner has enough
283283
privilege to send signal to the process accessing the
284284
filesystem, since *SIGSTOP* can be used to get a similar effect.
285285

@@ -288,10 +288,29 @@ I think these limitations are unacceptable?
288288

289289
If a sysadmin trusts the users enough, or can ensure through other
290290
measures, that system processes will never enter non-privileged
291-
mounts, it can relax the last limitation with a 'user_allow_other'
292-
config option. If this config option is set, the mounting user can
293-
add the 'allow_other' mount option which disables the check for other
294-
users' processes.
291+
mounts, it can relax the last limitation in several ways:
292+
293+
- With the 'user_allow_other' config option. If this config option is
294+
set, the mounting user can add the 'allow_other' mount option which
295+
disables the check for other users' processes.
296+
297+
User namespaces have an unintuitive interaction with 'allow_other':
298+
an unprivileged user - normally restricted from mounting with
299+
'allow_other' - could do so in a user namespace where they're
300+
privileged. If any process could access such an 'allow_other' mount
301+
this would give the mounting user the ability to manipulate
302+
processes in user namespaces where they're unprivileged. For this
303+
reason 'allow_other' restricts access to users in the same userns
304+
or a descendant.
305+
306+
- With the 'allow_sys_admin_access' module option. If this option is
307+
set, super user's processes have unrestricted access to mounts
308+
irrespective of allow_other setting or user namespace of the
309+
mounting user.
310+
311+
Note that both of these relaxations expose the system to potential
312+
information leak or *DoS* as described in points B and C/2/i-ii in the
313+
preceding section.
295314

296315
Kernel - userspace interface
297316
============================

fs/fuse/control.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
258258
struct dentry *parent;
259259
char name[32];
260260

261-
if (!fuse_control_sb)
261+
if (!fuse_control_sb || fc->no_control)
262262
return 0;
263263

264264
parent = fuse_control_sb->s_root;
@@ -296,7 +296,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
296296
{
297297
int i;
298298

299-
if (!fuse_control_sb)
299+
if (!fuse_control_sb || fc->no_control)
300300
return;
301301

302302
for (i = fc->ctl_ndents - 1; i >= 0; i--) {

fs/fuse/dax.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd)
138138
WARN_ON(fcd->nr_free_ranges <= 0);
139139
fcd->nr_free_ranges--;
140140
}
141+
__kick_dmap_free_worker(fcd, 0);
141142
spin_unlock(&fcd->lock);
142143

143-
kick_dmap_free_worker(fcd, 0);
144144
return dmap;
145145
}
146146

fs/fuse/dir.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/pagemap.h>
1212
#include <linux/file.h>
1313
#include <linux/fs_context.h>
14+
#include <linux/moduleparam.h>
1415
#include <linux/sched.h>
1516
#include <linux/namei.h>
1617
#include <linux/slab.h>
@@ -21,6 +22,11 @@
2122
#include <linux/types.h>
2223
#include <linux/kernel.h>
2324

25+
static bool __read_mostly allow_sys_admin_access;
26+
module_param(allow_sys_admin_access, bool, 0644);
27+
MODULE_PARM_DESC(allow_sys_admin_access,
28+
"Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
29+
2430
static void fuse_advise_use_readdirplus(struct inode *dir)
2531
{
2632
struct fuse_inode *fi = get_fuse_inode(dir);
@@ -537,6 +543,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
537543
struct fuse_file *ff;
538544
void *security_ctx = NULL;
539545
u32 security_ctxlen;
546+
bool trunc = flags & O_TRUNC;
540547

541548
/* Userspace expects S_IFREG in create mode */
542549
BUG_ON((mode & S_IFMT) != S_IFREG);
@@ -561,7 +568,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
561568
inarg.mode = mode;
562569
inarg.umask = current_umask();
563570

564-
if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
571+
if (fm->fc->handle_killpriv_v2 && trunc &&
565572
!(flags & O_EXCL) && !capable(CAP_FSETID)) {
566573
inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
567574
}
@@ -623,6 +630,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
623630
} else {
624631
file->private_data = ff;
625632
fuse_finish_open(inode, file);
633+
if (fm->fc->atomic_o_trunc && trunc)
634+
truncate_pagecache(inode, 0);
635+
else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
636+
invalidate_inode_pages2(inode->i_mapping);
626637
}
627638
return err;
628639

@@ -1224,6 +1235,9 @@ int fuse_allow_current_process(struct fuse_conn *fc)
12241235
{
12251236
const struct cred *cred;
12261237

1238+
if (allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1239+
return 1;
1240+
12271241
if (fc->allow_other)
12281242
return current_in_userns(fc->user_ns);
12291243

fs/fuse/file.c

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
210210
fi->attr_version = atomic64_inc_return(&fc->attr_version);
211211
i_size_write(inode, 0);
212212
spin_unlock(&fi->lock);
213-
truncate_pagecache(inode, 0);
214213
file_update_time(file);
215214
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
216-
} else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
217-
invalidate_inode_pages2(inode->i_mapping);
218215
}
219-
220216
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
221217
fuse_link_write_file(file);
222218
}
@@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
239235
if (err)
240236
return err;
241237

242-
if (is_wb_truncate || dax_truncate) {
238+
if (is_wb_truncate || dax_truncate)
243239
inode_lock(inode);
244-
fuse_set_nowrite(inode);
245-
}
246240

247241
if (dax_truncate) {
248242
filemap_invalidate_lock(inode->i_mapping);
249243
err = fuse_dax_break_layouts(inode, 0, 0);
250244
if (err)
251-
goto out;
245+
goto out_inode_unlock;
252246
}
253247

248+
if (is_wb_truncate || dax_truncate)
249+
fuse_set_nowrite(inode);
250+
254251
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
255252
if (!err)
256253
fuse_finish_open(inode, file);
257254

258-
out:
255+
if (is_wb_truncate || dax_truncate)
256+
fuse_release_nowrite(inode);
257+
if (!err) {
258+
struct fuse_file *ff = file->private_data;
259+
260+
if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
261+
truncate_pagecache(inode, 0);
262+
else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
263+
invalidate_inode_pages2(inode->i_mapping);
264+
}
259265
if (dax_truncate)
260266
filemap_invalidate_unlock(inode->i_mapping);
261-
262-
if (is_wb_truncate | dax_truncate) {
263-
fuse_release_nowrite(inode);
267+
out_inode_unlock:
268+
if (is_wb_truncate || dax_truncate)
264269
inode_unlock(inode);
265-
}
266270

267271
return err;
268272
}
@@ -338,6 +342,15 @@ static int fuse_open(struct inode *inode, struct file *file)
338342

339343
static int fuse_release(struct inode *inode, struct file *file)
340344
{
345+
struct fuse_conn *fc = get_fuse_conn(inode);
346+
347+
/*
348+
* Dirty pages might remain despite write_inode_now() call from
349+
* fuse_flush() due to writes racing with the close.
350+
*/
351+
if (fc->writeback_cache)
352+
write_inode_now(inode, 1);
353+
341354
fuse_release_common(file, false);
342355

343356
/* return value is ignored by VFS */

fs/fuse/inode.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,12 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
180180
inode->i_uid = make_kuid(fc->user_ns, attr->uid);
181181
inode->i_gid = make_kgid(fc->user_ns, attr->gid);
182182
inode->i_blocks = attr->blocks;
183+
184+
/* Sanitize nsecs */
185+
attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1);
186+
attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1);
187+
attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1);
188+
183189
inode->i_atime.tv_sec = attr->atime;
184190
inode->i_atime.tv_nsec = attr->atimensec;
185191
/* mtime from server may be stale due to local buffered write */
@@ -476,8 +482,14 @@ static void fuse_umount_begin(struct super_block *sb)
476482
{
477483
struct fuse_conn *fc = get_fuse_conn_super(sb);
478484

479-
if (!fc->no_force_umount)
480-
fuse_abort_conn(fc);
485+
if (fc->no_force_umount)
486+
return;
487+
488+
fuse_abort_conn(fc);
489+
490+
// Only retire block-device-based superblocks.
491+
if (sb->s_bdev != NULL)
492+
retire_super(sb);
481493
}
482494

483495
static void fuse_send_destroy(struct fuse_mount *fm)

fs/fuse/ioctl.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,17 @@
99
#include <linux/compat.h>
1010
#include <linux/fileattr.h>
1111

12+
static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args)
13+
{
14+
ssize_t ret = fuse_simple_request(fm, args);
15+
16+
/* Translate ENOSYS, which shouldn't be returned from fs */
17+
if (ret == -ENOSYS)
18+
ret = -ENOTTY;
19+
20+
return ret;
21+
}
22+
1223
/*
1324
* CUSE servers compiled on 32bit broke on 64bit kernels because the
1425
* ABI was defined to be 'struct iovec' which is different on 32bit
@@ -259,7 +270,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
259270
ap.args.out_pages = true;
260271
ap.args.out_argvar = true;
261272

262-
transferred = fuse_simple_request(fm, &ap.args);
273+
transferred = fuse_send_ioctl(fm, &ap.args);
263274
err = transferred;
264275
if (transferred < 0)
265276
goto out;
@@ -393,7 +404,7 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
393404
args.out_args[1].size = inarg.out_size;
394405
args.out_args[1].value = ptr;
395406

396-
err = fuse_simple_request(fm, &args);
407+
err = fuse_send_ioctl(fm, &args);
397408
if (!err) {
398409
if (outarg.result < 0)
399410
err = outarg.result;

fs/fuse/virtio_fs.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -741,8 +741,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
741741
}
742742

743743
/* Free virtqueues (device must already be reset) */
744-
static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
745-
struct virtio_fs *fs)
744+
static void virtio_fs_cleanup_vqs(struct virtio_device *vdev)
746745
{
747746
vdev->config->del_vqs(vdev);
748747
}
@@ -757,7 +756,7 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
757756
{
758757
struct virtio_fs *fs = dax_get_private(dax_dev);
759758
phys_addr_t offset = PFN_PHYS(pgoff);
760-
size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff;
759+
size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff;
761760

762761
if (kaddr)
763762
*kaddr = fs->window_kaddr + offset;
@@ -895,7 +894,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
895894

896895
out_vqs:
897896
virtio_reset_device(vdev);
898-
virtio_fs_cleanup_vqs(vdev, fs);
897+
virtio_fs_cleanup_vqs(vdev);
899898
kfree(fs->vqs);
900899

901900
out:
@@ -927,7 +926,7 @@ static void virtio_fs_remove(struct virtio_device *vdev)
927926
virtio_fs_stop_all_queues(fs);
928927
virtio_fs_drain_all_queues_locked(fs);
929928
virtio_reset_device(vdev);
930-
virtio_fs_cleanup_vqs(vdev, fs);
929+
virtio_fs_cleanup_vqs(vdev);
931930

932931
vdev->priv = NULL;
933932
/* Put device reference on virtio_fs object */

fs/super.c

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,35 @@ bool trylock_super(struct super_block *sb)
422422
return false;
423423
}
424424

425+
/**
426+
* retire_super - prevents superblock from being reused
427+
* @sb: superblock to retire
428+
*
429+
* The function marks superblock to be ignored in superblock test, which
430+
* prevents it from being reused for any new mounts. If the superblock has
431+
* a private bdi, it also unregisters it, but doesn't reduce the refcount
432+
* of the superblock to prevent potential races. The refcount is reduced
433+
* by generic_shutdown_super(). The function can not be called
434+
* concurrently with generic_shutdown_super(). It is safe to call the
435+
* function multiple times, subsequent calls have no effect.
436+
*
437+
* The marker will affect the re-use only for block-device-based
438+
* superblocks. Other superblocks will still get marked if this function
439+
* is used, but that will not affect their reusability.
440+
*/
441+
void retire_super(struct super_block *sb)
442+
{
443+
WARN_ON(!sb->s_bdev);
444+
down_write(&sb->s_umount);
445+
if (sb->s_iflags & SB_I_PERSB_BDI) {
446+
bdi_unregister(sb->s_bdi);
447+
sb->s_iflags &= ~SB_I_PERSB_BDI;
448+
}
449+
sb->s_iflags |= SB_I_RETIRED;
450+
up_write(&sb->s_umount);
451+
}
452+
EXPORT_SYMBOL(retire_super);
453+
425454
/**
426455
* generic_shutdown_super - common helper for ->kill_sb()
427456
* @sb: superblock to kill
@@ -1216,7 +1245,7 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
12161245

12171246
static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
12181247
{
1219-
return s->s_bdev == fc->sget_key;
1248+
return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key;
12201249
}
12211250

12221251
/**
@@ -1309,7 +1338,7 @@ EXPORT_SYMBOL(get_tree_bdev);
13091338

13101339
static int test_bdev_super(struct super_block *s, void *data)
13111340
{
1312-
return (void *)s->s_bdev == data;
1341+
return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data;
13131342
}
13141343

13151344
struct dentry *mount_bdev(struct file_system_type *fs_type,

include/linux/fs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,6 +1433,7 @@ extern int send_sigurg(struct fown_struct *fown);
14331433
#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
14341434
#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
14351435
#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
1436+
#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
14361437

14371438
/* Possible states of 'frozen' field */
14381439
enum {
@@ -2565,6 +2566,7 @@ extern struct dentry *mount_nodev(struct file_system_type *fs_type,
25652566
int flags, void *data,
25662567
int (*fill_super)(struct super_block *, void *, int));
25672568
extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
2569+
void retire_super(struct super_block *sb);
25682570
void generic_shutdown_super(struct super_block *sb);
25692571
void kill_block_super(struct super_block *sb);
25702572
void kill_anon_super(struct super_block *sb);

0 commit comments

Comments
 (0)