Skip to content

Commit 887e977

Browse files
mjguzikbrauner
authored andcommitted
fs: track the inode having file locks with a flag in ->i_opflags
Opening and closing an inode dirties the ->i_readcount field. Depending on the alignment of the inode, it may happen to false-share with other fields loaded both for both operations to various extent. This notably concerns the ->i_flctx field. Since most inodes don't have the field populated, this bit can be managed with a flag in ->i_opflags instead which bypasses the problem. Here are results I obtained while opening a file read-only in a loop with 24 cores doing the work on Sapphire Rapids. Utilizing the flag as opposed to reading ->i_flctx field was toggled at runtime as the benchmark was running, to make sure both results come from the same alignment. before: 3233740 after: 3373346 (+4%) before: 3284313 after: 3518711 (+7%) before: 3505545 after: 4092806 (+16%) Or to put it differently, this varies wildly depending on how (un)lucky you get. The primary bottleneck before and after is the avoidable lockref trip in do_dentry_open(). Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com> Link: https://patch.msgid.link/20251203094837.290654-2-mjguzik@gmail.com Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 1fa4e69 commit 887e977

3 files changed

Lines changed: 24 additions & 6 deletions

File tree

fs/locks.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ locks_get_lock_context(struct inode *inode, int type)
178178
{
179179
struct file_lock_context *ctx;
180180

181-
/* paired with cmpxchg() below */
182181
ctx = locks_inode_context(inode);
183182
if (likely(ctx) || type == F_UNLCK)
184183
goto out;
@@ -196,7 +195,18 @@ locks_get_lock_context(struct inode *inode, int type)
196195
* Assign the pointer if it's not already assigned. If it is, then
197196
* free the context we just allocated.
198197
*/
199-
if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
198+
spin_lock(&inode->i_lock);
199+
if (!(inode->i_opflags & IOP_FLCTX)) {
200+
VFS_BUG_ON_INODE(inode->i_flctx, inode);
201+
WRITE_ONCE(inode->i_flctx, ctx);
202+
/*
203+
* Paired with locks_inode_context().
204+
*/
205+
smp_store_release(&inode->i_opflags, inode->i_opflags | IOP_FLCTX);
206+
spin_unlock(&inode->i_lock);
207+
} else {
208+
VFS_BUG_ON_INODE(!inode->i_flctx, inode);
209+
spin_unlock(&inode->i_lock);
200210
kmem_cache_free(flctx_cache, ctx);
201211
ctx = locks_inode_context(inode);
202212
}

include/linux/filelock.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,12 @@ static inline struct file_lock_context *
242242
locks_inode_context(const struct inode *inode)
243243
{
244244
/*
245-
* Paired with the fence in locks_get_lock_context().
245+
* Paired with smp_store_release in locks_get_lock_context().
246+
*
247+
* Ensures ->i_flctx will be visible if we spotted the flag.
246248
*/
249+
if (likely(!(smp_load_acquire(&inode->i_opflags) & IOP_FLCTX)))
250+
return NULL;
247251
return READ_ONCE(inode->i_flctx);
248252
}
249253

@@ -471,7 +475,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
471475
* could end up racing with tasks trying to set a new lease on this
472476
* file.
473477
*/
474-
flctx = READ_ONCE(inode->i_flctx);
478+
flctx = locks_inode_context(inode);
475479
if (!flctx)
476480
return 0;
477481
smp_mb();
@@ -490,7 +494,7 @@ static inline int break_deleg(struct inode *inode, unsigned int flags)
490494
* could end up racing with tasks trying to set a new lease on this
491495
* file.
492496
*/
493-
flctx = READ_ONCE(inode->i_flctx);
497+
flctx = locks_inode_context(inode);
494498
if (!flctx)
495499
return 0;
496500
smp_mb();
@@ -535,8 +539,11 @@ static inline int break_deleg_wait(struct delegated_inode *di)
535539

536540
static inline int break_layout(struct inode *inode, bool wait)
537541
{
542+
struct file_lock_context *flctx;
543+
538544
smp_mb();
539-
if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) {
545+
flctx = locks_inode_context(inode);
546+
if (flctx && !list_empty_careful(&flctx->flc_lease)) {
540547
unsigned int flags = LEASE_BREAK_LAYOUT;
541548

542549
if (!wait)

include/linux/fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ is_uncached_acl(struct posix_acl *acl)
631631
#define IOP_MGTIME 0x0020
632632
#define IOP_CACHED_LINK 0x0040
633633
#define IOP_FASTPERM_MAY_EXEC 0x0080
634+
#define IOP_FLCTX 0x0100
634635

635636
/*
636637
* Inode state bits. Protected by inode->i_lock

0 commit comments

Comments
 (0)