Skip to content

Commit dffc7f2

Browse files
fs/ntfs3: allow readdir() to finish after directory mutations without rewinddir()
This patch introduces a per-directory version counter that increments on each directory modification (indx_insert_entry() / indx_delete_entry()). ntfs_readdir() uses this version to detect whether the directory has changed since enumeration began. If readdir() reaches end-of-directory but the version has changed, the walk restarts from the beginning of the index tree instead of returning prematurely. This provides rmdir-like behavior for tools that remove entries as they enumerate them. Prior to this change, bonnie++ directory operations could fail due to premature termination of readdir() during concurrent index updates. With this patch applied, bonnie++ completes successfully with no errors. Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
1 parent 989e294 commit dffc7f2

3 files changed

Lines changed: 76 additions & 29 deletions

File tree

fs/ntfs3/dir.c

Lines changed: 73 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -392,33 +392,77 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
392392
* ntfs_readdir - file_operations::iterate_shared
393393
*
394394
* Use non sorted enumeration.
395-
* We have an example of broken volume where sorted enumeration
396-
* counts each name twice.
395+
* Sorted enumeration may result infinite loop if names tree contains loop.
397396
*/
398397
static int ntfs_readdir(struct file *file, struct dir_context *ctx)
399398
{
400399
const struct INDEX_ROOT *root;
401-
u64 vbo;
402400
size_t bit;
403-
loff_t eod;
404401
int err = 0;
405402
struct inode *dir = file_inode(file);
406403
struct ntfs_inode *ni = ntfs_i(dir);
407404
struct super_block *sb = dir->i_sb;
408405
struct ntfs_sb_info *sbi = sb->s_fs_info;
409406
loff_t i_size = i_size_read(dir);
410-
u32 pos = ctx->pos;
407+
u64 pos = ctx->pos;
411408
u8 *name = NULL;
412409
struct indx_node *node = NULL;
413410
u8 index_bits = ni->dir.index_bits;
411+
size_t max_bit = i_size >> ni->dir.index_bits;
412+
loff_t eod = i_size + sbi->record_size;
414413

415414
/* Name is a buffer of PATH_MAX length. */
416415
static_assert(NTFS_NAME_LEN * 4 < PATH_MAX);
417416

418-
eod = i_size + sbi->record_size;
417+
if (!pos) {
418+
/*
419+
* ni->dir.version increments each directory change.
420+
* Save the initial value of ni->dir.version.
421+
*/
422+
file->private_data = (void *)ni->dir.version;
423+
}
419424

420-
if (pos >= eod)
421-
return 0;
425+
if (pos >= eod) {
426+
if (file->private_data == (void *)ni->dir.version) {
427+
/* No changes since first readdir. */
428+
return 0;
429+
}
430+
431+
/*
432+
* Handle directories that changed after the initial readdir().
433+
*
434+
* Some user space code implements recursive removal like this instead
435+
* of calling rmdir(2) directly:
436+
*
437+
* fd = opendir(path);
438+
* while ((dent = readdir(fd)))
439+
* unlinkat(dirfd(fd), dent->d_name, 0);
440+
* closedir(fd);
441+
*
442+
* POSIX leaves unspecified what readdir() should return once the
443+
* directory has been modified after opendir()/rewinddir(), so this
444+
* pattern is not guaranteed to work on all filesystems or platforms.
445+
*
446+
* In ntfs3 the internal name tree may be reshaped while entries are
447+
* being removed, so there is no stable anchor for continuing a
448+
* single-pass walk based on the original readdir() order.
449+
*
450+
* In practice some widely used tools (for example certain rm(1)
451+
* implementations) have used this readdir()/unlink() loop, and some
452+
* filesystems behave in a way that effectively makes it work in the
453+
* common case.
454+
*
455+
* The code below follows that practice and tries to provide
456+
* "rmdir-like" behaviour for such callers on ntfs3, even though the
457+
* situation is not strictly defined by the APIs.
458+
*
459+
* Apple documents the same readdir()/unlink() issue and a workaround
460+
* for HFS file systems in:
461+
* https://web.archive.org/web/20220122122948/https:/support.apple.com/kb/TA21420?locale=en_US
462+
*/
463+
ctx->pos = pos = 3;
464+
file->private_data = (void *)ni->dir.version;
465+
}
422466

423467
if (!dir_emit_dots(file, ctx))
424468
return 0;
@@ -454,35 +498,31 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx)
454498
if (pos >= sbi->record_size) {
455499
bit = (pos - sbi->record_size) >> index_bits;
456500
} else {
501+
/*
502+
* Add each name from root in 'ctx'.
503+
*/
457504
err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx);
458505
if (err)
459506
goto out;
460507
bit = 0;
461508
}
462509

463-
if (!i_size) {
464-
ctx->pos = eod;
465-
goto out;
466-
}
467-
468-
for (;;) {
469-
vbo = (u64)bit << index_bits;
470-
if (vbo >= i_size) {
471-
ctx->pos = eod;
472-
goto out;
473-
}
474-
510+
/*
511+
* Enumerate indexes until the end of dir.
512+
*/
513+
for (; bit < max_bit; bit += 1) {
514+
/* Get the next used index. */
475515
err = indx_used_bit(&ni->dir, ni, &bit);
476516
if (err)
477517
goto out;
478518

479519
if (bit == MINUS_ONE_T) {
480-
ctx->pos = eod;
481-
goto out;
520+
/* no more used indexes. end of dir. */
521+
break;
482522
}
483523

484-
vbo = (u64)bit << index_bits;
485-
if (vbo >= i_size) {
524+
if (bit >= max_bit) {
525+
/* Corrupted directory. */
486526
err = -EINVAL;
487527
goto out;
488528
}
@@ -492,20 +532,24 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx)
492532
if (err)
493533
goto out;
494534

535+
/*
536+
* Add each name from index in 'ctx'.
537+
*/
495538
err = ntfs_read_hdr(sbi, ni, &node->index->ihdr,
496-
vbo + sbi->record_size, pos, name, ctx);
539+
((u64)bit << index_bits) + sbi->record_size,
540+
pos, name, ctx);
497541
if (err)
498542
goto out;
499-
500-
bit += 1;
501543
}
502544

503545
out:
504-
505546
__putname(name);
506547
put_indx_node(node);
507548

508-
if (err == 1) {
549+
if (!err) {
550+
/* End of directory. */
551+
ctx->pos = eod;
552+
} else if (err == 1) {
509553
/* 'ctx' is full. */
510554
err = 0;
511555
} else if (err == -ENOENT) {

fs/ntfs3/index.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2002,6 +2002,7 @@ int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
20022002
fnd->level - 1, fnd);
20032003
}
20042004

2005+
indx->version += 1;
20052006
out:
20062007
fnd_put(fnd_a);
20072008
out1:
@@ -2649,6 +2650,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
26492650
mi->dirty = true;
26502651
}
26512652

2653+
indx->version += 1;
26522654
out:
26532655
fnd_put(fnd2);
26542656
out1:

fs/ntfs3/ntfs_fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ struct ntfs_index {
191191
struct runs_tree alloc_run;
192192
/* read/write access to 'bitmap_run'/'alloc_run' while ntfs_readdir */
193193
struct rw_semaphore run_lock;
194+
size_t version; /* increment each change */
194195

195196
/*TODO: Remove 'cmp'. */
196197
NTFS_CMP_FUNC cmp;

0 commit comments

Comments
 (0)