Skip to content

Commit 278d9a2

Browse files
Zhihao Chengrichardweinberger
authored andcommitted
ubifs: Rename whiteout atomically
Currently, rename whiteout has 3 steps: 1. create tmpfile(which associates old dentry to tmpfile inode) for whiteout, and store tmpfile to disk 2. link whiteout, associate whiteout inode to old dentry agagin and store old dentry, old inode, new dentry on disk 3. writeback dirty whiteout inode to disk Suddenly power-cut or error occurring(eg. ENOSPC returned by budget, memory allocation failure) during above steps may cause kinds of problems: Problem 1: ENOSPC returned by whiteout space budget (before step 2), old dentry will disappear after rename syscall, whiteout file cannot be found either. ls dir // we get file, whiteout rename(dir/file, dir/whiteout, REANME_WHITEOUT) ENOSPC = ubifs_budget_space(&wht_req) // return ls dir // empty (no file, no whiteout) Problem 2: Power-cut happens before step 3, whiteout inode with 'nlink=1' is not stored on disk, whiteout dentry(old dentry) is written on disk, whiteout file is lost on next mount (We get "dead directory entry" after executing 'ls -l' on whiteout file). Now, we use following 3 steps to finish rename whiteout: 1. create an in-mem inode with 'nlink = 1' as whiteout 2. ubifs_jnl_rename (Write on disk to finish associating old dentry to whiteout inode, associating new dentry with old inode) 3. iput(whiteout) Rely writing in-mem inode on disk by ubifs_jnl_rename() to finish rename whiteout, which avoids middle disk state caused by suddenly power-cut and error occurring. Fixes: 9e0a1ff ("ubifs: Implement RENAME_WHITEOUT") Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
1 parent 716b457 commit 278d9a2

2 files changed

Lines changed: 136 additions & 60 deletions

File tree

fs/ubifs/dir.c

Lines changed: 92 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -349,8 +349,56 @@ static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir,
349349
return err;
350350
}
351351

352-
static int do_tmpfile(struct inode *dir, struct dentry *dentry,
353-
umode_t mode, struct inode **whiteout)
352+
static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry)
353+
{
354+
int err;
355+
umode_t mode = S_IFCHR | WHITEOUT_MODE;
356+
struct inode *inode;
357+
struct ubifs_info *c = dir->i_sb->s_fs_info;
358+
struct fscrypt_name nm;
359+
360+
/*
361+
* Create an inode('nlink = 1') for whiteout without updating journal,
362+
* let ubifs_jnl_rename() store it on flash to complete rename whiteout
363+
* atomically.
364+
*/
365+
366+
dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
367+
dentry, mode, dir->i_ino);
368+
369+
err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
370+
if (err)
371+
return ERR_PTR(err);
372+
373+
inode = ubifs_new_inode(c, dir, mode);
374+
if (IS_ERR(inode)) {
375+
err = PTR_ERR(inode);
376+
goto out_free;
377+
}
378+
379+
init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
380+
ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations);
381+
382+
err = ubifs_init_security(dir, inode, &dentry->d_name);
383+
if (err)
384+
goto out_inode;
385+
386+
/* The dir size is updated by do_rename. */
387+
insert_inode_hash(inode);
388+
389+
return inode;
390+
391+
out_inode:
392+
make_bad_inode(inode);
393+
iput(inode);
394+
out_free:
395+
fscrypt_free_filename(&nm);
396+
ubifs_err(c, "cannot create whiteout file, error %d", err);
397+
return ERR_PTR(err);
398+
}
399+
400+
static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
401+
struct dentry *dentry, umode_t mode)
354402
{
355403
struct inode *inode;
356404
struct ubifs_info *c = dir->i_sb->s_fs_info;
@@ -392,25 +440,13 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry,
392440
}
393441
ui = ubifs_inode(inode);
394442

395-
if (whiteout) {
396-
init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
397-
ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations);
398-
}
399-
400443
err = ubifs_init_security(dir, inode, &dentry->d_name);
401444
if (err)
402445
goto out_inode;
403446

404447
mutex_lock(&ui->ui_mutex);
405448
insert_inode_hash(inode);
406-
407-
if (whiteout) {
408-
mark_inode_dirty(inode);
409-
drop_nlink(inode);
410-
*whiteout = inode;
411-
} else {
412-
d_tmpfile(dentry, inode);
413-
}
449+
d_tmpfile(dentry, inode);
414450
ubifs_assert(c, ui->dirty);
415451

416452
instantiated = 1;
@@ -432,8 +468,6 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry,
432468
make_bad_inode(inode);
433469
if (!instantiated)
434470
iput(inode);
435-
else if (whiteout)
436-
iput(*whiteout);
437471
out_budg:
438472
ubifs_release_budget(c, &req);
439473
if (!instantiated)
@@ -443,12 +477,6 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry,
443477
return err;
444478
}
445479

446-
static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
447-
struct dentry *dentry, umode_t mode)
448-
{
449-
return do_tmpfile(dir, dentry, mode, NULL);
450-
}
451-
452480
/**
453481
* vfs_dent_type - get VFS directory entry type.
454482
* @type: UBIFS directory entry type
@@ -1266,17 +1294,19 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
12661294
.dirtied_ino = 3 };
12671295
struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
12681296
.dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
1297+
struct ubifs_budget_req wht_req;
12691298
struct timespec64 time;
12701299
unsigned int saved_nlink;
12711300
struct fscrypt_name old_nm, new_nm;
12721301

12731302
/*
1274-
* Budget request settings: deletion direntry, new direntry, removing
1275-
* the old inode, and changing old and new parent directory inodes.
1303+
* Budget request settings:
1304+
* req: deletion direntry, new direntry, removing the old inode,
1305+
* and changing old and new parent directory inodes.
12761306
*
1277-
* However, this operation also marks the target inode as dirty and
1278-
* does not write it, so we allocate budget for the target inode
1279-
* separately.
1307+
* wht_req: new whiteout inode for RENAME_WHITEOUT.
1308+
*
1309+
* ino_req: marks the target inode as dirty and does not write it.
12801310
*/
12811311

12821312
dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x",
@@ -1326,39 +1356,45 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
13261356

13271357
if (flags & RENAME_WHITEOUT) {
13281358
union ubifs_dev_desc *dev = NULL;
1329-
struct ubifs_budget_req wht_req;
13301359

13311360
dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
13321361
if (!dev) {
13331362
err = -ENOMEM;
13341363
goto out_release;
13351364
}
13361365

1337-
err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout);
1338-
if (err) {
1366+
/*
1367+
* The whiteout inode without dentry is pinned in memory,
1368+
* umount won't happen during rename process because we
1369+
* got parent dentry.
1370+
*/
1371+
whiteout = create_whiteout(old_dir, old_dentry);
1372+
if (IS_ERR(whiteout)) {
1373+
err = PTR_ERR(whiteout);
13391374
kfree(dev);
13401375
goto out_release;
13411376
}
13421377

1343-
spin_lock(&whiteout->i_lock);
1344-
whiteout->i_state |= I_LINKABLE;
1345-
spin_unlock(&whiteout->i_lock);
1346-
13471378
whiteout_ui = ubifs_inode(whiteout);
13481379
whiteout_ui->data = dev;
13491380
whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0));
13501381
ubifs_assert(c, !whiteout_ui->dirty);
13511382

13521383
memset(&wht_req, 0, sizeof(struct ubifs_budget_req));
1353-
wht_req.dirtied_ino = 1;
1354-
wht_req.dirtied_ino_d = ALIGN(whiteout_ui->data_len, 8);
1384+
wht_req.new_ino = 1;
1385+
wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8);
13551386
/*
13561387
* To avoid deadlock between space budget (holds ui_mutex and
13571388
* waits wb work) and writeback work(waits ui_mutex), do space
13581389
* budget before ubifs inodes locked.
13591390
*/
13601391
err = ubifs_budget_space(c, &wht_req);
13611392
if (err) {
1393+
/*
1394+
* Whiteout inode can not be written on flash by
1395+
* ubifs_jnl_write_inode(), because it's neither
1396+
* dirty nor zero-nlink.
1397+
*/
13621398
iput(whiteout);
13631399
goto out_release;
13641400
}
@@ -1433,17 +1469,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
14331469
sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
14341470
if (unlink && IS_SYNC(new_inode))
14351471
sync = 1;
1436-
}
1437-
1438-
if (whiteout) {
1439-
inc_nlink(whiteout);
1440-
mark_inode_dirty(whiteout);
1441-
1442-
spin_lock(&whiteout->i_lock);
1443-
whiteout->i_state &= ~I_LINKABLE;
1444-
spin_unlock(&whiteout->i_lock);
1445-
1446-
iput(whiteout);
1472+
/*
1473+
* S_SYNC flag of whiteout inherits from the old_dir, and we
1474+
* have already checked the old dir inode. So there is no need
1475+
* to check whiteout.
1476+
*/
14471477
}
14481478

14491479
err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir,
@@ -1454,6 +1484,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
14541484
unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
14551485
ubifs_release_budget(c, &req);
14561486

1487+
if (whiteout) {
1488+
ubifs_release_budget(c, &wht_req);
1489+
iput(whiteout);
1490+
}
1491+
14571492
mutex_lock(&old_inode_ui->ui_mutex);
14581493
release = old_inode_ui->dirty;
14591494
mark_inode_dirty_sync(old_inode);
@@ -1462,11 +1497,16 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
14621497
if (release)
14631498
ubifs_release_budget(c, &ino_req);
14641499
if (IS_SYNC(old_inode))
1465-
err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
1500+
/*
1501+
* Rename finished here. Although old inode cannot be updated
1502+
* on flash, old ctime is not a big problem, don't return err
1503+
* code to userspace.
1504+
*/
1505+
old_inode->i_sb->s_op->write_inode(old_inode, NULL);
14661506

14671507
fscrypt_free_filename(&old_nm);
14681508
fscrypt_free_filename(&new_nm);
1469-
return err;
1509+
return 0;
14701510

14711511
out_cancel:
14721512
if (unlink) {
@@ -1487,11 +1527,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
14871527
inc_nlink(old_dir);
14881528
}
14891529
}
1530+
unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
14901531
if (whiteout) {
1491-
drop_nlink(whiteout);
1532+
ubifs_release_budget(c, &wht_req);
14921533
iput(whiteout);
14931534
}
1494-
unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
14951535
out_release:
14961536
ubifs_release_budget(c, &ino_req);
14971537
ubifs_release_budget(c, &req);

fs/ubifs/journal.c

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,9 +1207,9 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
12071207
* @sync: non-zero if the write-buffer has to be synchronized
12081208
*
12091209
* This function implements the re-name operation which may involve writing up
1210-
* to 4 inodes and 2 directory entries. It marks the written inodes as clean
1211-
* and returns zero on success. In case of failure, a negative error code is
1212-
* returned.
1210+
* to 4 inodes(new inode, whiteout inode, old and new parent directory inodes)
1211+
* and 2 directory entries. It marks the written inodes as clean and returns
1212+
* zero on success. In case of failure, a negative error code is returned.
12131213
*/
12141214
int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
12151215
const struct inode *old_inode,
@@ -1222,14 +1222,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
12221222
void *p;
12231223
union ubifs_key key;
12241224
struct ubifs_dent_node *dent, *dent2;
1225-
int err, dlen1, dlen2, ilen, lnum, offs, len, orphan_added = 0;
1225+
int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0;
12261226
int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ;
12271227
int last_reference = !!(new_inode && new_inode->i_nlink == 0);
12281228
int move = (old_dir != new_dir);
1229-
struct ubifs_inode *new_ui;
1229+
struct ubifs_inode *new_ui, *whiteout_ui;
12301230
u8 hash_old_dir[UBIFS_HASH_ARR_SZ];
12311231
u8 hash_new_dir[UBIFS_HASH_ARR_SZ];
12321232
u8 hash_new_inode[UBIFS_HASH_ARR_SZ];
1233+
u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ];
12331234
u8 hash_dent1[UBIFS_HASH_ARR_SZ];
12341235
u8 hash_dent2[UBIFS_HASH_ARR_SZ];
12351236

@@ -1249,9 +1250,20 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
12491250
} else
12501251
ilen = 0;
12511252

1253+
if (whiteout) {
1254+
whiteout_ui = ubifs_inode(whiteout);
1255+
ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex));
1256+
ubifs_assert(c, whiteout->i_nlink == 1);
1257+
ubifs_assert(c, !whiteout_ui->dirty);
1258+
wlen = UBIFS_INO_NODE_SZ;
1259+
wlen += whiteout_ui->data_len;
1260+
} else
1261+
wlen = 0;
1262+
12521263
aligned_dlen1 = ALIGN(dlen1, 8);
12531264
aligned_dlen2 = ALIGN(dlen2, 8);
1254-
len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
1265+
len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) +
1266+
ALIGN(wlen, 8) + ALIGN(plen, 8);
12551267
if (move)
12561268
len += plen;
12571269

@@ -1313,6 +1325,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
13131325
p += ALIGN(ilen, 8);
13141326
}
13151327

1328+
if (whiteout) {
1329+
pack_inode(c, p, whiteout, 0);
1330+
err = ubifs_node_calc_hash(c, p, hash_whiteout_inode);
1331+
if (err)
1332+
goto out_release;
1333+
1334+
p += ALIGN(wlen, 8);
1335+
}
1336+
13161337
if (!move) {
13171338
pack_inode(c, p, old_dir, 1);
13181339
err = ubifs_node_calc_hash(c, p, hash_old_dir);
@@ -1352,6 +1373,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
13521373
if (new_inode)
13531374
ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
13541375
new_inode->i_ino);
1376+
if (whiteout)
1377+
ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
1378+
whiteout->i_ino);
13551379
}
13561380
release_head(c, BASEHD);
13571381

@@ -1368,8 +1392,6 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
13681392
err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm);
13691393
if (err)
13701394
goto out_ro;
1371-
1372-
ubifs_delete_orphan(c, whiteout->i_ino);
13731395
} else {
13741396
err = ubifs_add_dirt(c, lnum, dlen2);
13751397
if (err)
@@ -1390,6 +1412,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
13901412
offs += ALIGN(ilen, 8);
13911413
}
13921414

1415+
if (whiteout) {
1416+
ino_key_init(c, &key, whiteout->i_ino);
1417+
err = ubifs_tnc_add(c, &key, lnum, offs, wlen,
1418+
hash_whiteout_inode);
1419+
if (err)
1420+
goto out_ro;
1421+
offs += ALIGN(wlen, 8);
1422+
}
1423+
13931424
ino_key_init(c, &key, old_dir->i_ino);
13941425
err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir);
13951426
if (err)
@@ -1410,6 +1441,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
14101441
new_ui->synced_i_size = new_ui->ui_size;
14111442
spin_unlock(&new_ui->ui_lock);
14121443
}
1444+
/*
1445+
* No need to mark whiteout inode clean.
1446+
* Whiteout doesn't have non-zero size, no need to update
1447+
* synced_i_size for whiteout_ui.
1448+
*/
14131449
mark_inode_clean(c, ubifs_inode(old_dir));
14141450
if (move)
14151451
mark_inode_clean(c, ubifs_inode(new_dir));

0 commit comments

Comments
 (0)