Skip to content

Commit 83a21c1

Browse files
author
Chandan Babu R
committed
xfs: Directory's data fork extent counter can never overflow
The maximum file size that can be represented by the data fork extent counter in the worst case occurs when all extents are 1 block in length and each block is 1KB in size. With XFS_MAX_EXTCNT_DATA_FORK_SMALL representing maximum extent count and with 1KB sized blocks, a file can reach upto, (2^31) * 1KB = 2TB This is much larger than the theoretical maximum size of a directory i.e. XFS_DIR2_SPACE_SIZE * 3 = ~96GB. Since a directory's inode can never overflow its data fork extent counter, this commit removes all the overflow checks associated with it. xfs_dinode_verify() now performs a rough check to verify if a diretory's data fork is larger than 96GB. Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
1 parent 52a4a14 commit 83a21c1

9 files changed

Lines changed: 28 additions & 91 deletions

File tree

fs/xfs/libxfs/xfs_bmap.c

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5147,26 +5147,6 @@ xfs_bmap_del_extent_real(
51475147
* Deleting the middle of the extent.
51485148
*/
51495149

5150-
/*
5151-
* For directories, -ENOSPC is returned since a directory entry
5152-
* remove operation must not fail due to low extent count
5153-
* availability. -ENOSPC will be handled by higher layers of XFS
5154-
* by letting the corresponding empty Data/Free blocks to linger
5155-
* until a future remove operation. Dabtree blocks would be
5156-
* swapped with the last block in the leaf space and then the
5157-
* new last block will be unmapped.
5158-
*
5159-
* The above logic also applies to the source directory entry of
5160-
* a rename operation.
5161-
*/
5162-
error = xfs_iext_count_may_overflow(ip, whichfork, 1);
5163-
if (error) {
5164-
ASSERT(S_ISDIR(VFS_I(ip)->i_mode) &&
5165-
whichfork == XFS_DATA_FORK);
5166-
error = -ENOSPC;
5167-
goto done;
5168-
}
5169-
51705150
old = got;
51715151

51725152
got.br_blockcount = del->br_startoff - got.br_startoff;

fs/xfs/libxfs/xfs_da_btree.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ struct xfs_da_geometry {
3030
unsigned int free_hdr_size; /* dir2 free header size */
3131
unsigned int free_max_bests; /* # of bests entries in dir2 free */
3232
xfs_dablk_t freeblk; /* blockno of free data v2 */
33+
xfs_extnum_t max_extents; /* Max. extents in corresponding fork */
3334

3435
xfs_dir2_data_aoff_t data_first_offset;
3536
size_t data_entry_offset;

fs/xfs/libxfs/xfs_da_format.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
277277
* Directory address space divided into sections,
278278
* spaces separated by 32GB.
279279
*/
280+
#define XFS_DIR2_MAX_SPACES 3
280281
#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
281282
#define XFS_DIR2_DATA_SPACE 0
282283
#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)

fs/xfs/libxfs/xfs_dir2.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ xfs_da_mount(
150150
dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
151151
dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) /
152152
(uint)sizeof(xfs_da_node_entry_t);
153+
dageo->max_extents = (XFS_DIR2_MAX_SPACES * XFS_DIR2_SPACE_SIZE) >>
154+
mp->m_sb.sb_blocklog;
153155
dageo->magicpct = (dageo->blksize * 37) / 100;
154156

155157
/* set up attribute geometry - single fsb only */
@@ -161,6 +163,12 @@ xfs_da_mount(
161163
dageo->node_hdr_size = mp->m_dir_geo->node_hdr_size;
162164
dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) /
163165
(uint)sizeof(xfs_da_node_entry_t);
166+
167+
if (xfs_has_large_extent_counts(mp))
168+
dageo->max_extents = XFS_MAX_EXTCNT_ATTR_FORK_LARGE;
169+
else
170+
dageo->max_extents = XFS_MAX_EXTCNT_ATTR_FORK_SMALL;
171+
164172
dageo->magicpct = (dageo->blksize * 37) / 100;
165173
return 0;
166174
}

fs/xfs/libxfs/xfs_format.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,19 @@ enum xfs_dinode_fmt {
915915
*
916916
* Rounding up 47 to the nearest multiple of bits-per-byte results in 48. Hence
917917
* 2^48 was chosen as the maximum data fork extent count.
918+
*
919+
* The maximum file size that can be represented by the data fork extent counter
920+
* in the worst case occurs when all extents are 1 block in length and each
921+
* block is 1KB in size.
922+
*
923+
* With XFS_MAX_EXTCNT_DATA_FORK_SMALL representing maximum extent count and
924+
* with 1KB sized blocks, a file can reach upto,
925+
* 1KB * (2^31) = 2TB
926+
*
927+
* This is much larger than the theoretical maximum size of a directory
928+
* i.e. XFS_DIR2_SPACE_SIZE * XFS_DIR2_MAX_SPACES = ~96GB.
929+
*
930+
* Hence, a directory inode can never overflow its data fork extent counter.
918931
*/
919932
#define XFS_MAX_EXTCNT_DATA_FORK_LARGE ((xfs_extnum_t)((1ULL << 48) - 1))
920933
#define XFS_MAX_EXTCNT_ATTR_FORK_LARGE ((xfs_extnum_t)((1ULL << 32) - 1))

fs/xfs/libxfs/xfs_inode_buf.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,9 @@ xfs_dinode_verify(
491491
if (mode && nextents + naextents > nblocks)
492492
return __this_address;
493493

494+
if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
495+
return __this_address;
496+
494497
if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
495498
return __this_address;
496499

fs/xfs/libxfs/xfs_inode_fork.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,6 @@ struct xfs_ifork {
3939
*/
4040
#define XFS_IEXT_PUNCH_HOLE_CNT (1)
4141

42-
/*
43-
* Directory entry addition can cause the following,
44-
* 1. Data block can be added/removed.
45-
* A new extent can cause extent count to increase by 1.
46-
* 2. Free disk block can be added/removed.
47-
* Same behaviour as described above for Data block.
48-
* 3. Dabtree blocks.
49-
* XFS_DA_NODE_MAXDEPTH blocks can be added. Each of these can be new
50-
* extents. Hence extent count can increase by XFS_DA_NODE_MAXDEPTH.
51-
*/
52-
#define XFS_IEXT_DIR_MANIP_CNT(mp) \
53-
((XFS_DA_NODE_MAXDEPTH + 1 + 1) * (mp)->m_dir_geo->fsbcount)
54-
5542
/*
5643
* Adding/removing an xattr can cause XFS_DA_NODE_MAXDEPTH extents to
5744
* be added. One extra extent for dabtree in case a local attr is

fs/xfs/xfs_inode.c

Lines changed: 2 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,11 +1024,6 @@ xfs_create(
10241024
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
10251025
unlock_dp_on_error = true;
10261026

1027-
error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
1028-
XFS_IEXT_DIR_MANIP_CNT(mp));
1029-
if (error)
1030-
goto out_trans_cancel;
1031-
10321027
/*
10331028
* A newly created regular or special file just has one directory
10341029
* entry pointing to them, but a directory also the "." entry
@@ -1242,11 +1237,6 @@ xfs_link(
12421237
if (error)
12431238
goto std_return;
12441239

1245-
error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
1246-
XFS_IEXT_DIR_MANIP_CNT(mp));
1247-
if (error)
1248-
goto error_return;
1249-
12501240
/*
12511241
* If we are using project inheritance, we only allow hard link
12521242
* creation in our tree when the project IDs are the same; else
@@ -3210,35 +3200,6 @@ xfs_rename(
32103200
/*
32113201
* Check for expected errors before we dirty the transaction
32123202
* so we can return an error without a transaction abort.
3213-
*
3214-
* Extent count overflow check:
3215-
*
3216-
* From the perspective of src_dp, a rename operation is essentially a
3217-
* directory entry remove operation. Hence the only place where we check
3218-
* for extent count overflow for src_dp is in
3219-
* xfs_bmap_del_extent_real(). xfs_bmap_del_extent_real() returns
3220-
* -ENOSPC when it detects a possible extent count overflow and in
3221-
* response, the higher layers of directory handling code do the
3222-
* following:
3223-
* 1. Data/Free blocks: XFS lets these blocks linger until a
3224-
* future remove operation removes them.
3225-
* 2. Dabtree blocks: XFS swaps the blocks with the last block in the
3226-
* Leaf space and unmaps the last block.
3227-
*
3228-
* For target_dp, there are two cases depending on whether the
3229-
* destination directory entry exists or not.
3230-
*
3231-
* When destination directory entry does not exist (i.e. target_ip ==
3232-
* NULL), extent count overflow check is performed only when transaction
3233-
* has a non-zero sized space reservation associated with it. With a
3234-
* zero-sized space reservation, XFS allows a rename operation to
3235-
* continue only when the directory has sufficient free space in its
3236-
* data/leaf/free space blocks to hold the new entry.
3237-
*
3238-
* When destination directory entry exists (i.e. target_ip != NULL), all
3239-
* we need to do is change the inode number associated with the already
3240-
* existing entry. Hence there is no need to perform an extent count
3241-
* overflow check.
32423203
*/
32433204
if (target_ip == NULL) {
32443205
/*
@@ -3249,12 +3210,6 @@ xfs_rename(
32493210
error = xfs_dir_canenter(tp, target_dp, target_name);
32503211
if (error)
32513212
goto out_trans_cancel;
3252-
} else {
3253-
error = xfs_iext_count_may_overflow(target_dp,
3254-
XFS_DATA_FORK,
3255-
XFS_IEXT_DIR_MANIP_CNT(mp));
3256-
if (error)
3257-
goto out_trans_cancel;
32583213
}
32593214
} else {
32603215
/*
@@ -3422,18 +3377,12 @@ xfs_rename(
34223377
* inode number of the whiteout inode rather than removing it
34233378
* altogether.
34243379
*/
3425-
if (wip) {
3380+
if (wip)
34263381
error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
34273382
spaceres);
3428-
} else {
3429-
/*
3430-
* NOTE: We don't need to check for extent count overflow here
3431-
* because the dir remove name code will leave the dir block in
3432-
* place if the extent count would overflow.
3433-
*/
3383+
else
34343384
error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
34353385
spaceres);
3436-
}
34373386

34383387
if (error)
34393388
goto out_trans_cancel;

fs/xfs/xfs_symlink.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,6 @@ xfs_symlink(
226226
goto out_trans_cancel;
227227
}
228228

229-
error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
230-
XFS_IEXT_DIR_MANIP_CNT(mp));
231-
if (error)
232-
goto out_trans_cancel;
233-
234229
/*
235230
* Allocate an inode for the symlink.
236231
*/

0 commit comments

Comments
 (0)