Skip to content

Commit 76c63ff

Browse files
committed
Merge patch series "vfs: recall-only directory delegations for knfsd"
Jeff Layton <jlayton@kernel.org> says: At the fall NFS Bakeathon last week, the NFS client and server maintainers had a discussion about how to merge support for directory delegations. We decided to start with just merging support for simple, recallable-only directory delegation support, for a number of reasons: 1/ RFC8881 has some gaps in coverage that we are hoping to have addressed in RFC8881bis. In particular, it's written such that CB_NOTIFY callbacks require directory position information. That will be hard to do properly under Linux, so we're planning to extend the spec to allow that information to be omitted. 2/ client-side support for CB_NOTIFY still lags a bit. The client side is tricky, as it involves heuristics about when to request a delegation. 3/ we have some early indication that simple, recallable-only delegations can help performance in some cases. Anna mentioned seeing a multi-minute speedup in xfstests runs with them enabled. This needs more investigation, but it's promising and seems like enough justification to merge support. This patchset is quite similar to the set I initially posted back in early 2024. We've merged some GET_DIR_DELEGATION handling patches since then, but the VFS layer support is basically the same. One thing that I want to make clear is that with this patchset, userspace can request a read lease on a directory that will be recalled on conflicting accesses. I saw no reason to prevent this, and I think it may be something useful for applications like Samba. As always, users can disable leases altogether via the fs.leases-enable sysctl if this is an issue, but I wanted to point this out in case anyone sees footguns here. * patches from https://patch.msgid.link/20251111-dir-deleg-ro-v6-0-52f3feebb2f2@kernel.org: vfs: expose delegation support to userland nfsd: wire up GET_DIR_DELEGATION handling nfsd: allow DELEGRETURN on directories nfsd: allow filecache to hold S_IFDIR files filelock: lift the ban on directory leases in generic_setlease vfs: make vfs_symlink break delegations on parent dir vfs: make vfs_mknod break delegations on parent directory vfs: make vfs_create break delegations on parent directory vfs: clean up argument list for vfs_create() vfs: break parent dir delegations in open(..., O_CREAT) codepath vfs: allow rmdir to wait for delegation break on parent vfs: allow mkdir to wait for delegation break on parent vfs: add try_break_deleg calls for parents to vfs_{link,rename,unlink} filelock: push the S_ISREG check down to ->setlease handlers filelock: add struct delegated_inode filelock: rework the __break_lease API to use flags filelock: make lease_alloc() take a flags argument Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-0-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents 3a86608 + 1602bad commit 76c63ff

32 files changed

Lines changed: 543 additions & 176 deletions

File tree

drivers/base/devtmpfs.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ static int dev_mkdir(const char *name, umode_t mode)
180180
if (IS_ERR(dentry))
181181
return PTR_ERR(dentry);
182182

183-
dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
183+
dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, NULL);
184184
if (!IS_ERR(dentry))
185185
/* mark as kernel-created inode */
186186
d_inode(dentry)->i_private = &thread;
@@ -231,7 +231,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
231231
return PTR_ERR(dentry);
232232

233233
err = vfs_mknod(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode,
234-
dev->devt);
234+
dev->devt, NULL);
235235
if (!err) {
236236
struct iattr newattrs;
237237

@@ -261,7 +261,7 @@ static int dev_rmdir(const char *name)
261261
return PTR_ERR(dentry);
262262
if (d_inode(dentry)->i_private == &thread)
263263
err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
264-
dentry);
264+
dentry, NULL);
265265
else
266266
err = -EPERM;
267267

fs/attr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ EXPORT_SYMBOL(may_setattr);
415415
* performed on the raw inode simply pass @nop_mnt_idmap.
416416
*/
417417
int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
418-
struct iattr *attr, struct inode **delegated_inode)
418+
struct iattr *attr, struct delegated_inode *delegated_inode)
419419
{
420420
struct inode *inode = dentry->d_inode;
421421
umode_t mode = inode->i_mode;

fs/cachefiles/namei.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
130130
goto mkdir_error;
131131
ret = cachefiles_inject_write_error();
132132
if (ret == 0)
133-
subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
133+
subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL);
134134
else
135135
subdir = ERR_PTR(ret);
136136
if (IS_ERR(subdir)) {

fs/ecryptfs/inode.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,7 @@ ecryptfs_do_create(struct inode *directory_inode,
188188

189189
rc = lock_parent(ecryptfs_dentry, &lower_dentry, &lower_dir);
190190
if (!rc)
191-
rc = vfs_create(&nop_mnt_idmap, lower_dir,
192-
lower_dentry, mode, true);
191+
rc = vfs_create(&nop_mnt_idmap, lower_dentry, mode, NULL);
193192
if (rc) {
194193
printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
195194
"rc = [%d]\n", __func__, rc);
@@ -480,7 +479,7 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap,
480479
if (rc)
481480
goto out_lock;
482481
rc = vfs_symlink(&nop_mnt_idmap, lower_dir, lower_dentry,
483-
encoded_symname);
482+
encoded_symname, NULL);
484483
kfree(encoded_symname);
485484
if (rc || d_really_is_negative(lower_dentry))
486485
goto out_lock;
@@ -508,7 +507,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
508507
goto out;
509508

510509
lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir,
511-
lower_dentry, mode);
510+
lower_dentry, mode, NULL);
512511
rc = PTR_ERR(lower_dentry);
513512
if (IS_ERR(lower_dentry))
514513
goto out;
@@ -540,7 +539,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
540539
if (d_unhashed(lower_dentry))
541540
rc = -EINVAL;
542541
else
543-
rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry);
542+
rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry, NULL);
544543
}
545544
if (!rc) {
546545
clear_nlink(d_inode(dentry));
@@ -565,7 +564,7 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
565564
rc = lock_parent(dentry, &lower_dentry, &lower_dir);
566565
if (!rc)
567566
rc = vfs_mknod(&nop_mnt_idmap, lower_dir,
568-
lower_dentry, mode, dev);
567+
lower_dentry, mode, dev, NULL);
569568
if (rc || d_really_is_negative(lower_dentry))
570569
goto out;
571570
rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);

fs/fcntl.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
445445
struct file *filp)
446446
{
447447
void __user *argp = (void __user *)arg;
448+
struct delegation deleg;
448449
int argi = (int)arg;
449450
struct flock flock;
450451
long err = -EINVAL;
@@ -550,6 +551,18 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
550551
case F_SET_RW_HINT:
551552
err = fcntl_set_rw_hint(filp, arg);
552553
break;
554+
case F_GETDELEG:
555+
if (copy_from_user(&deleg, argp, sizeof(deleg)))
556+
return -EFAULT;
557+
err = fcntl_getdeleg(filp, &deleg);
558+
if (!err && copy_to_user(argp, &deleg, sizeof(deleg)))
559+
return -EFAULT;
560+
break;
561+
case F_SETDELEG:
562+
if (copy_from_user(&deleg, argp, sizeof(deleg)))
563+
return -EFAULT;
564+
err = fcntl_setdeleg(fd, filp, &deleg);
565+
break;
553566
default:
554567
break;
555568
}

fs/fuse/dir.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2230,6 +2230,7 @@ static const struct file_operations fuse_dir_operations = {
22302230
.fsync = fuse_dir_fsync,
22312231
.unlocked_ioctl = fuse_dir_ioctl,
22322232
.compat_ioctl = fuse_dir_compat_ioctl,
2233+
.setlease = simple_nosetlease,
22332234
};
22342235

22352236
static const struct inode_operations fuse_common_inode_operations = {

fs/init.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
157157
error = security_path_mknod(&path, dentry, mode, dev);
158158
if (!error)
159159
error = vfs_mknod(mnt_idmap(path.mnt), path.dentry->d_inode,
160-
dentry, mode, new_decode_dev(dev));
160+
dentry, mode, new_decode_dev(dev), NULL);
161161
end_creating_path(&path, dentry);
162162
return error;
163163
}
@@ -209,7 +209,7 @@ int __init init_symlink(const char *oldname, const char *newname)
209209
error = security_path_symlink(&path, dentry, oldname);
210210
if (!error)
211211
error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
212-
dentry, oldname);
212+
dentry, oldname, NULL);
213213
end_creating_path(&path, dentry);
214214
return error;
215215
}
@@ -233,7 +233,7 @@ int __init init_mkdir(const char *pathname, umode_t mode)
233233
error = security_path_mkdir(&path, dentry, mode);
234234
if (!error) {
235235
dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
236-
dentry, mode);
236+
dentry, mode, NULL);
237237
if (IS_ERR(dentry))
238238
error = PTR_ERR(dentry);
239239
}

fs/locks.c

Lines changed: 75 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,7 @@ static const struct lease_manager_operations lease_manager_ops = {
585585
/*
586586
* Initialize a lease, use the default lock manager operations
587587
*/
588-
static int lease_init(struct file *filp, int type, struct file_lease *fl)
588+
static int lease_init(struct file *filp, unsigned int flags, int type, struct file_lease *fl)
589589
{
590590
if (assign_type(&fl->c, type) != 0)
591591
return -EINVAL;
@@ -594,21 +594,21 @@ static int lease_init(struct file *filp, int type, struct file_lease *fl)
594594
fl->c.flc_pid = current->tgid;
595595

596596
fl->c.flc_file = filp;
597-
fl->c.flc_flags = FL_LEASE;
597+
fl->c.flc_flags = flags;
598598
fl->fl_lmops = &lease_manager_ops;
599599
return 0;
600600
}
601601

602602
/* Allocate a file_lock initialised to this type of lease */
603-
static struct file_lease *lease_alloc(struct file *filp, int type)
603+
static struct file_lease *lease_alloc(struct file *filp, unsigned int flags, int type)
604604
{
605605
struct file_lease *fl = locks_alloc_lease();
606606
int error = -ENOMEM;
607607

608608
if (fl == NULL)
609609
return ERR_PTR(error);
610610

611-
error = lease_init(filp, type, fl);
611+
error = lease_init(filp, flags, type, fl);
612612
if (error) {
613613
locks_free_lease(fl);
614614
return ERR_PTR(error);
@@ -1529,29 +1529,35 @@ any_leases_conflict(struct inode *inode, struct file_lease *breaker)
15291529
/**
15301530
* __break_lease - revoke all outstanding leases on file
15311531
* @inode: the inode of the file to return
1532-
* @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1533-
* break all leases
1534-
* @type: FL_LEASE: break leases and delegations; FL_DELEG: break
1535-
* only delegations
1532+
* @flags: LEASE_BREAK_* flags
15361533
*
15371534
* break_lease (inlined for speed) has checked there already is at least
15381535
* some kind of lock (maybe a lease) on this file. Leases are broken on
1539-
* a call to open() or truncate(). This function can sleep unless you
1540-
* specified %O_NONBLOCK to your open().
1536+
* a call to open() or truncate(). This function can block waiting for the
1537+
* lease break unless you specify LEASE_BREAK_NONBLOCK.
15411538
*/
1542-
int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1539+
int __break_lease(struct inode *inode, unsigned int flags)
15431540
{
1544-
int error = 0;
1545-
struct file_lock_context *ctx;
15461541
struct file_lease *new_fl, *fl, *tmp;
1542+
struct file_lock_context *ctx;
15471543
unsigned long break_time;
1548-
int want_write = (mode & O_ACCMODE) != O_RDONLY;
1544+
unsigned int type;
15491545
LIST_HEAD(dispose);
1546+
bool want_write = !(flags & LEASE_BREAK_OPEN_RDONLY);
1547+
int error = 0;
15501548

1551-
new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1549+
if (flags & LEASE_BREAK_LEASE)
1550+
type = FL_LEASE;
1551+
else if (flags & LEASE_BREAK_DELEG)
1552+
type = FL_DELEG;
1553+
else if (flags & LEASE_BREAK_LAYOUT)
1554+
type = FL_LAYOUT;
1555+
else
1556+
return -EINVAL;
1557+
1558+
new_fl = lease_alloc(NULL, type, want_write ? F_WRLCK : F_RDLCK);
15521559
if (IS_ERR(new_fl))
15531560
return PTR_ERR(new_fl);
1554-
new_fl->c.flc_flags = type;
15551561

15561562
/* typically we will check that ctx is non-NULL before calling */
15571563
ctx = locks_inode_context(inode);
@@ -1596,7 +1602,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
15961602
if (list_empty(&ctx->flc_lease))
15971603
goto out;
15981604

1599-
if (mode & O_NONBLOCK) {
1605+
if (flags & LEASE_BREAK_NONBLOCK) {
16001606
trace_break_lease_noblock(inode, new_fl);
16011607
error = -EWOULDBLOCK;
16021608
goto out;
@@ -1697,7 +1703,7 @@ EXPORT_SYMBOL(lease_get_mtime);
16971703
* XXX: sfr & willy disagree over whether F_INPROGRESS
16981704
* should be returned to userspace.
16991705
*/
1700-
int fcntl_getlease(struct file *filp)
1706+
static int __fcntl_getlease(struct file *filp, unsigned int flavor)
17011707
{
17021708
struct file_lease *fl;
17031709
struct inode *inode = file_inode(filp);
@@ -1713,7 +1719,8 @@ int fcntl_getlease(struct file *filp)
17131719
list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
17141720
if (fl->c.flc_file != filp)
17151721
continue;
1716-
type = target_leasetype(fl);
1722+
if (fl->c.flc_flags & flavor)
1723+
type = target_leasetype(fl);
17171724
break;
17181725
}
17191726
spin_unlock(&ctx->flc_lock);
@@ -1724,6 +1731,19 @@ int fcntl_getlease(struct file *filp)
17241731
return type;
17251732
}
17261733

1734+
int fcntl_getlease(struct file *filp)
1735+
{
1736+
return __fcntl_getlease(filp, FL_LEASE);
1737+
}
1738+
1739+
int fcntl_getdeleg(struct file *filp, struct delegation *deleg)
1740+
{
1741+
if (deleg->d_flags != 0 || deleg->__pad != 0)
1742+
return -EINVAL;
1743+
deleg->d_type = __fcntl_getlease(filp, FL_DELEG);
1744+
return 0;
1745+
}
1746+
17271747
/**
17281748
* check_conflicting_open - see if the given file points to an inode that has
17291749
* an existing open that would conflict with the
@@ -1929,11 +1949,19 @@ static int generic_delete_lease(struct file *filp, void *owner)
19291949
int generic_setlease(struct file *filp, int arg, struct file_lease **flp,
19301950
void **priv)
19311951
{
1952+
struct inode *inode = file_inode(filp);
1953+
1954+
if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
1955+
return -EINVAL;
1956+
19321957
switch (arg) {
19331958
case F_UNLCK:
19341959
return generic_delete_lease(filp, *priv);
1935-
case F_RDLCK:
19361960
case F_WRLCK:
1961+
if (S_ISDIR(inode->i_mode))
1962+
return -EINVAL;
1963+
fallthrough;
1964+
case F_RDLCK:
19371965
if (!(*flp)->fl_lmops->lm_break) {
19381966
WARN_ON_ONCE(1);
19391967
return -ENOLCK;
@@ -2018,22 +2046,20 @@ vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv)
20182046

20192047
if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
20202048
return -EACCES;
2021-
if (!S_ISREG(inode->i_mode))
2022-
return -EINVAL;
20232049
error = security_file_lock(filp, arg);
20242050
if (error)
20252051
return error;
20262052
return kernel_setlease(filp, arg, lease, priv);
20272053
}
20282054
EXPORT_SYMBOL_GPL(vfs_setlease);
20292055

2030-
static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
2056+
static int do_fcntl_add_lease(unsigned int fd, struct file *filp, unsigned int flavor, int arg)
20312057
{
20322058
struct file_lease *fl;
20332059
struct fasync_struct *new;
20342060
int error;
20352061

2036-
fl = lease_alloc(filp, arg);
2062+
fl = lease_alloc(filp, flavor, arg);
20372063
if (IS_ERR(fl))
20382064
return PTR_ERR(fl);
20392065

@@ -2064,9 +2090,33 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
20642090
*/
20652091
int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
20662092
{
2093+
if (S_ISDIR(file_inode(filp)->i_mode))
2094+
return -EINVAL;
2095+
20672096
if (arg == F_UNLCK)
20682097
return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
2069-
return do_fcntl_add_lease(fd, filp, arg);
2098+
return do_fcntl_add_lease(fd, filp, FL_LEASE, arg);
2099+
}
2100+
2101+
/**
2102+
* fcntl_setdeleg - sets a delegation on an open file
2103+
* @fd: open file descriptor
2104+
* @filp: file pointer
2105+
* @deleg: delegation request from userland
2106+
*
2107+
* Call this fcntl to establish a delegation on the file.
2108+
* Note that you also need to call %F_SETSIG to
2109+
* receive a signal when the lease is broken.
2110+
*/
2111+
int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg)
2112+
{
2113+
/* For now, no flags are supported */
2114+
if (deleg->d_flags != 0 || deleg->__pad != 0)
2115+
return -EINVAL;
2116+
2117+
if (deleg->d_type == F_UNLCK)
2118+
return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
2119+
return do_fcntl_add_lease(fd, filp, FL_DELEG, deleg->d_type);
20702120
}
20712121

20722122
/**

0 commit comments

Comments
 (0)