Skip to content

Commit 4af6ccb

Browse files
committed
Pull simple offset series from Chuck Lever In an effort to address slab fragmentation issues reported a few months ago, I've replaced the use of xarrays for the directory offset map in "simple" file systems (including tmpfs). Thanks to Liam Howlett for helping me get this working with Maple Trees. * series 'Use Maple Trees for simple_offset utilities' of https://lore.kernel.org/r/170820083431.6328.16233178852085891453.stgit@91.116.238.104.host.secureserver.net: (6 commits) libfs: Convert simple directory offsets to use a Maple Tree test_maple_tree: testing the cyclic allocation maple_tree: Add mtree_alloc_cyclic() libfs: Add simple_offset_empty() libfs: Define a minimum directory offset libfs: Re-arrange locking in offset_iterate_dir() Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents bae8bc4 + 0e4a862 commit 4af6ccb

6 files changed

Lines changed: 215 additions & 35 deletions

File tree

fs/libfs.c

Lines changed: 65 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -240,17 +240,22 @@ const struct inode_operations simple_dir_inode_operations = {
240240
};
241241
EXPORT_SYMBOL(simple_dir_inode_operations);
242242

243-
static void offset_set(struct dentry *dentry, u32 offset)
243+
/* 0 is '.', 1 is '..', so always start with offset 2 or more */
244+
enum {
245+
DIR_OFFSET_MIN = 2,
246+
};
247+
248+
static void offset_set(struct dentry *dentry, long offset)
244249
{
245-
dentry->d_fsdata = (void *)((uintptr_t)(offset));
250+
dentry->d_fsdata = (void *)offset;
246251
}
247252

248-
static u32 dentry2offset(struct dentry *dentry)
253+
static long dentry2offset(struct dentry *dentry)
249254
{
250-
return (u32)((uintptr_t)(dentry->d_fsdata));
255+
return (long)dentry->d_fsdata;
251256
}
252257

253-
static struct lock_class_key simple_offset_xa_lock;
258+
static struct lock_class_key simple_offset_lock_class;
254259

255260
/**
256261
* simple_offset_init - initialize an offset_ctx
@@ -259,32 +264,29 @@ static struct lock_class_key simple_offset_xa_lock;
259264
*/
260265
void simple_offset_init(struct offset_ctx *octx)
261266
{
262-
xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
263-
lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
264-
265-
/* 0 is '.', 1 is '..', so always start with offset 2 */
266-
octx->next_offset = 2;
267+
mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE);
268+
lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class);
269+
octx->next_offset = DIR_OFFSET_MIN;
267270
}
268271

269272
/**
270273
* simple_offset_add - Add an entry to a directory's offset map
271274
* @octx: directory offset ctx to be updated
272275
* @dentry: new dentry being added
273276
*
274-
* Returns zero on success. @so_ctx and the dentry offset are updated.
277+
* Returns zero on success. @octx and the dentry's offset are updated.
275278
* Otherwise, a negative errno value is returned.
276279
*/
277280
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
278281
{
279-
static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
280-
u32 offset;
282+
unsigned long offset;
281283
int ret;
282284

283285
if (dentry2offset(dentry) != 0)
284286
return -EBUSY;
285287

286-
ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
287-
&octx->next_offset, GFP_KERNEL);
288+
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
289+
LONG_MAX, &octx->next_offset, GFP_KERNEL);
288290
if (ret < 0)
289291
return ret;
290292

@@ -300,16 +302,48 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
300302
*/
301303
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
302304
{
303-
u32 offset;
305+
long offset;
304306

305307
offset = dentry2offset(dentry);
306308
if (offset == 0)
307309
return;
308310

309-
xa_erase(&octx->xa, offset);
311+
mtree_erase(&octx->mt, offset);
310312
offset_set(dentry, 0);
311313
}
312314

315+
/**
316+
* simple_offset_empty - Check if a dentry can be unlinked
317+
* @dentry: dentry to be tested
318+
*
319+
* Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
320+
*/
321+
int simple_offset_empty(struct dentry *dentry)
322+
{
323+
struct inode *inode = d_inode(dentry);
324+
struct offset_ctx *octx;
325+
struct dentry *child;
326+
unsigned long index;
327+
int ret = 1;
328+
329+
if (!inode || !S_ISDIR(inode->i_mode))
330+
return ret;
331+
332+
index = DIR_OFFSET_MIN;
333+
octx = inode->i_op->get_offset_ctx(inode);
334+
mt_for_each(&octx->mt, child, index, LONG_MAX) {
335+
spin_lock(&child->d_lock);
336+
if (simple_positive(child)) {
337+
spin_unlock(&child->d_lock);
338+
ret = 0;
339+
break;
340+
}
341+
spin_unlock(&child->d_lock);
342+
}
343+
344+
return ret;
345+
}
346+
313347
/**
314348
* simple_offset_rename_exchange - exchange rename with directory offsets
315349
* @old_dir: parent of dentry being moved
@@ -327,8 +361,8 @@ int simple_offset_rename_exchange(struct inode *old_dir,
327361
{
328362
struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
329363
struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
330-
u32 old_index = dentry2offset(old_dentry);
331-
u32 new_index = dentry2offset(new_dentry);
364+
long old_index = dentry2offset(old_dentry);
365+
long new_index = dentry2offset(new_dentry);
332366
int ret;
333367

334368
simple_offset_remove(old_ctx, old_dentry);
@@ -354,9 +388,9 @@ int simple_offset_rename_exchange(struct inode *old_dir,
354388

355389
out_restore:
356390
offset_set(old_dentry, old_index);
357-
xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
391+
mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL);
358392
offset_set(new_dentry, new_index);
359-
xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
393+
mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
360394
return ret;
361395
}
362396

@@ -369,7 +403,7 @@ int simple_offset_rename_exchange(struct inode *old_dir,
369403
*/
370404
void simple_offset_destroy(struct offset_ctx *octx)
371405
{
372-
xa_destroy(&octx->xa);
406+
mtree_destroy(&octx->mt);
373407
}
374408

375409
/**
@@ -399,15 +433,16 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
399433

400434
/* In this case, ->private_data is protected by f_pos_lock */
401435
file->private_data = NULL;
402-
return vfs_setpos(file, offset, U32_MAX);
436+
return vfs_setpos(file, offset, LONG_MAX);
403437
}
404438

405-
static struct dentry *offset_find_next(struct xa_state *xas)
439+
static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
406440
{
441+
MA_STATE(mas, &octx->mt, offset, offset);
407442
struct dentry *child, *found = NULL;
408443

409444
rcu_read_lock();
410-
child = xas_next_entry(xas, U32_MAX);
445+
child = mas_find(&mas, LONG_MAX);
411446
if (!child)
412447
goto out;
413448
spin_lock(&child->d_lock);
@@ -421,21 +456,20 @@ static struct dentry *offset_find_next(struct xa_state *xas)
421456

422457
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
423458
{
424-
u32 offset = dentry2offset(dentry);
425459
struct inode *inode = d_inode(dentry);
460+
long offset = dentry2offset(dentry);
426461

427462
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
428463
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
429464
}
430465

431466
static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
432467
{
433-
struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
434-
XA_STATE(xas, &so_ctx->xa, ctx->pos);
468+
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
435469
struct dentry *dentry;
436470

437471
while (true) {
438-
dentry = offset_find_next(&xas);
472+
dentry = offset_find_next(octx, ctx->pos);
439473
if (!dentry)
440474
return ERR_PTR(-ENOENT);
441475

@@ -444,8 +478,8 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
444478
break;
445479
}
446480

481+
ctx->pos = dentry2offset(dentry) + 1;
447482
dput(dentry);
448-
ctx->pos = xas.xa_index + 1;
449483
}
450484
return NULL;
451485
}
@@ -481,7 +515,7 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
481515
return 0;
482516

483517
/* In this case, ->private_data is protected by f_pos_lock */
484-
if (ctx->pos == 2)
518+
if (ctx->pos == DIR_OFFSET_MIN)
485519
file->private_data = NULL;
486520
else if (file->private_data == ERR_PTR(-ENOENT))
487521
return 0;

include/linux/fs.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <linux/cred.h>
4444
#include <linux/mnt_idmapping.h>
4545
#include <linux/slab.h>
46+
#include <linux/maple_tree.h>
4647

4748
#include <asm/byteorder.h>
4849
#include <uapi/linux/fs.h>
@@ -3288,13 +3289,14 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
32883289
const void __user *from, size_t count);
32893290

32903291
struct offset_ctx {
3291-
struct xarray xa;
3292-
u32 next_offset;
3292+
struct maple_tree mt;
3293+
unsigned long next_offset;
32933294
};
32943295

32953296
void simple_offset_init(struct offset_ctx *octx);
32963297
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
32973298
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
3299+
int simple_offset_empty(struct dentry *dentry);
32983300
int simple_offset_rename_exchange(struct inode *old_dir,
32993301
struct dentry *old_dentry,
33003302
struct inode *new_dir,

include/linux/maple_tree.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ enum maple_type {
171171
#define MT_FLAGS_LOCK_IRQ 0x100
172172
#define MT_FLAGS_LOCK_BH 0x200
173173
#define MT_FLAGS_LOCK_EXTERN 0x300
174+
#define MT_FLAGS_ALLOC_WRAPPED 0x0800
174175

175176
#define MAPLE_HEIGHT_MAX 31
176177

@@ -319,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first,
319320
int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
320321
void *entry, unsigned long size, unsigned long min,
321322
unsigned long max, gfp_t gfp);
323+
int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
324+
void *entry, unsigned long range_lo, unsigned long range_hi,
325+
unsigned long *next, gfp_t gfp);
322326
int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
323327
void *entry, unsigned long size, unsigned long min,
324328
unsigned long max, gfp_t gfp);
@@ -499,6 +503,9 @@ void *mas_find_range(struct ma_state *mas, unsigned long max);
499503
void *mas_find_rev(struct ma_state *mas, unsigned long min);
500504
void *mas_find_range_rev(struct ma_state *mas, unsigned long max);
501505
int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp);
506+
int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
507+
void *entry, unsigned long range_lo, unsigned long range_hi,
508+
unsigned long *next, gfp_t gfp);
502509

503510
bool mas_nomem(struct ma_state *mas, gfp_t gfp);
504511
void mas_pause(struct ma_state *mas);

lib/maple_tree.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4290,6 +4290,56 @@ static inline void *mas_insert(struct ma_state *mas, void *entry)
42904290

42914291
}
42924292

4293+
/**
4294+
* mas_alloc_cyclic() - Internal call to find somewhere to store an entry
4295+
* @mas: The maple state.
4296+
* @startp: Pointer to ID.
4297+
* @range_lo: Lower bound of range to search.
4298+
* @range_hi: Upper bound of range to search.
4299+
* @entry: The entry to store.
4300+
* @next: Pointer to next ID to allocate.
4301+
* @gfp: The GFP_FLAGS to use for allocations.
4302+
*
4303+
* Return: 0 if the allocation succeeded without wrapping, 1 if the
4304+
* allocation succeeded after wrapping, or -EBUSY if there are no
4305+
* free entries.
4306+
*/
4307+
int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
4308+
void *entry, unsigned long range_lo, unsigned long range_hi,
4309+
unsigned long *next, gfp_t gfp)
4310+
{
4311+
unsigned long min = range_lo;
4312+
int ret = 0;
4313+
4314+
range_lo = max(min, *next);
4315+
ret = mas_empty_area(mas, range_lo, range_hi, 1);
4316+
if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
4317+
mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
4318+
ret = 1;
4319+
}
4320+
if (ret < 0 && range_lo > min) {
4321+
ret = mas_empty_area(mas, min, range_hi, 1);
4322+
if (ret == 0)
4323+
ret = 1;
4324+
}
4325+
if (ret < 0)
4326+
return ret;
4327+
4328+
do {
4329+
mas_insert(mas, entry);
4330+
} while (mas_nomem(mas, gfp));
4331+
if (mas_is_err(mas))
4332+
return xa_err(mas->node);
4333+
4334+
*startp = mas->index;
4335+
*next = *startp + 1;
4336+
if (*next == 0)
4337+
mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
4338+
4339+
return ret;
4340+
}
4341+
EXPORT_SYMBOL(mas_alloc_cyclic);
4342+
42934343
static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
42944344
{
42954345
retry:
@@ -6443,6 +6493,49 @@ int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
64436493
}
64446494
EXPORT_SYMBOL(mtree_alloc_range);
64456495

6496+
/**
6497+
* mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
6498+
* @mt: The maple tree.
6499+
* @startp: Pointer to ID.
6500+
* @range_lo: Lower bound of range to search.
6501+
* @range_hi: Upper bound of range to search.
6502+
* @entry: The entry to store.
6503+
* @next: Pointer to next ID to allocate.
6504+
* @gfp: The GFP_FLAGS to use for allocations.
6505+
*
6506+
* Finds an empty entry in @mt after @next, stores the new index into
6507+
* the @id pointer, stores the entry at that index, then updates @next.
6508+
*
6509+
* @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
6510+
*
6511+
* Context: Any context. Takes and releases the mt.lock. May sleep if
6512+
* the @gfp flags permit.
6513+
*
6514+
* Return: 0 if the allocation succeeded without wrapping, 1 if the
6515+
* allocation succeeded after wrapping, -ENOMEM if memory could not be
6516+
* allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
6517+
* free entries.
6518+
*/
6519+
int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
6520+
void *entry, unsigned long range_lo, unsigned long range_hi,
6521+
unsigned long *next, gfp_t gfp)
6522+
{
6523+
int ret;
6524+
6525+
MA_STATE(mas, mt, 0, 0);
6526+
6527+
if (!mt_is_alloc(mt))
6528+
return -EINVAL;
6529+
if (WARN_ON_ONCE(mt_is_reserved(entry)))
6530+
return -EINVAL;
6531+
mtree_lock(mt);
6532+
ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
6533+
next, gfp);
6534+
mtree_unlock(mt);
6535+
return ret;
6536+
}
6537+
EXPORT_SYMBOL(mtree_alloc_cyclic);
6538+
64466539
int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
64476540
void *entry, unsigned long size, unsigned long min,
64486541
unsigned long max, gfp_t gfp)

0 commit comments

Comments
 (0)