@@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
24802480 struct fiemap_cache * cache ,
24812481 u64 offset , u64 phys , u64 len , u32 flags )
24822482{
2483+ u64 cache_end ;
24832484 int ret = 0 ;
24842485
24852486 /* Set at the end of extent_fiemap(). */
@@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
24892490 goto assign ;
24902491
24912492 /*
2492- * Sanity check, extent_fiemap() should have ensured that new
2493- * fiemap extent won't overlap with cached one.
2494- * Not recoverable.
2493+ * When iterating the extents of the inode, at extent_fiemap(), we may
2494+ * find an extent that starts at an offset behind the end offset of the
2495+ * previous extent we processed. This happens if fiemap is called
2496+ * without FIEMAP_FLAG_SYNC and there are ordered extents completing
2497+ * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
24952498 *
2496- * NOTE: Physical address can overlap, due to compression
2499+ * For example we are in leaf X processing its last item, which is the
2500+ * file extent item for file range [512K, 1M[, and after
2501+ * btrfs_next_leaf() releases the path, there's an ordered extent that
2502+ * completes for the file range [768K, 2M[, and that results in trimming
2503+ * the file extent item so that it now corresponds to the file range
2504+ * [512K, 768K[ and a new file extent item is inserted for the file
2505+ * range [768K, 2M[, which may end up as the last item of leaf X or as
2506+ * the first item of the next leaf - in either case btrfs_next_leaf()
2507+ * will leave us with a path pointing to the new extent item, for the
2508+ * file range [768K, 2M[, since that's the first key that follows the
2509+ * last one we processed. So in order not to report overlapping extents
2510+ * to user space, we trim the length of the previously cached extent and
2511+ * emit it.
2512+ *
2513+ * Upon calling btrfs_next_leaf() we may also find an extent with an
2514+ * offset smaller than or equals to cache->offset, and this happens
2515+ * when we had a hole or prealloc extent with several delalloc ranges in
2516+ * it, but after btrfs_next_leaf() released the path, delalloc was
2517+ * flushed and the resulting ordered extents were completed, so we can
2518+ * now have found a file extent item for an offset that is smaller than
2519+ * or equals to what we have in cache->offset. We deal with this as
2520+ * described below.
24972521 */
2498- if (cache -> offset + cache -> len > offset ) {
2499- WARN_ON (1 );
2500- return - EINVAL ;
2522+ cache_end = cache -> offset + cache -> len ;
2523+ if (cache_end > offset ) {
2524+ if (offset == cache -> offset ) {
2525+ /*
2526+ * We cached a dealloc range (found in the io tree) for
2527+ * a hole or prealloc extent and we have now found a
2528+ * file extent item for the same offset. What we have
2529+ * now is more recent and up to date, so discard what
2530+ * we had in the cache and use what we have just found.
2531+ */
2532+ goto assign ;
2533+ } else if (offset > cache -> offset ) {
2534+ /*
2535+ * The extent range we previously found ends after the
2536+ * offset of the file extent item we found and that
2537+ * offset falls somewhere in the middle of that previous
2538+ * extent range. So adjust the range we previously found
2539+ * to end at the offset of the file extent item we have
2540+ * just found, since this extent is more up to date.
2541+ * Emit that adjusted range and cache the file extent
2542+ * item we have just found. This corresponds to the case
2543+ * where a previously found file extent item was split
2544+ * due to an ordered extent completing.
2545+ */
2546+ cache -> len = offset - cache -> offset ;
2547+ goto emit ;
2548+ } else {
2549+ const u64 range_end = offset + len ;
2550+
2551+ /*
2552+ * The offset of the file extent item we have just found
2553+ * is behind the cached offset. This means we were
2554+ * processing a hole or prealloc extent for which we
2555+ * have found delalloc ranges (in the io tree), so what
2556+ * we have in the cache is the last delalloc range we
2557+ * found while the file extent item we found can be
2558+ * either for a whole delalloc range we previously
2559+ * emmitted or only a part of that range.
2560+ *
2561+ * We have two cases here:
2562+ *
2563+ * 1) The file extent item's range ends at or behind the
2564+ * cached extent's end. In this case just ignore the
2565+ * current file extent item because we don't want to
2566+ * overlap with previous ranges that may have been
2567+ * emmitted already;
2568+ *
2569+ * 2) The file extent item starts behind the currently
2570+ * cached extent but its end offset goes beyond the
2571+ * end offset of the cached extent. We don't want to
2572+ * overlap with a previous range that may have been
2573+ * emmitted already, so we emit the currently cached
2574+ * extent and then partially store the current file
2575+ * extent item's range in the cache, for the subrange
2576+ * going the cached extent's end to the end of the
2577+ * file extent item.
2578+ */
2579+ if (range_end <= cache_end )
2580+ return 0 ;
2581+
2582+ if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC )))
2583+ phys += cache_end - offset ;
2584+
2585+ offset = cache_end ;
2586+ len = range_end - cache_end ;
2587+ goto emit ;
2588+ }
25012589 }
25022590
25032591 /*
@@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
25172605 return 0 ;
25182606 }
25192607
2608+ emit :
25202609 /* Not mergeable, need to submit cached one */
25212610 ret = fiemap_fill_next_extent (fieinfo , cache -> offset , cache -> phys ,
25222611 cache -> len , cache -> flags );
@@ -2907,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
29072996 range_end = round_up (start + len , sectorsize );
29082997 prev_extent_end = range_start ;
29092998
2910- btrfs_inode_lock (inode , BTRFS_ILOCK_SHARED );
2911-
29122999 ret = fiemap_find_last_extent_offset (inode , path , & last_extent_end );
29133000 if (ret < 0 )
2914- goto out_unlock ;
3001+ goto out ;
29153002 btrfs_release_path (path );
29163003
29173004 path -> reada = READA_FORWARD ;
29183005 ret = fiemap_search_slot (inode , path , range_start );
29193006 if (ret < 0 ) {
2920- goto out_unlock ;
3007+ goto out ;
29213008 } else if (ret > 0 ) {
29223009 /*
29233010 * No file extent item found, but we may have delalloc between
@@ -2964,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
29643051 backref_ctx , 0 , 0 , 0 ,
29653052 prev_extent_end , hole_end );
29663053 if (ret < 0 ) {
2967- goto out_unlock ;
3054+ goto out ;
29683055 } else if (ret > 0 ) {
29693056 /* fiemap_fill_next_extent() told us to stop. */
29703057 stopped = true;
@@ -3020,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30203107 extent_gen ,
30213108 backref_ctx );
30223109 if (ret < 0 )
3023- goto out_unlock ;
3110+ goto out ;
30243111 else if (ret > 0 )
30253112 flags |= FIEMAP_EXTENT_SHARED ;
30263113 }
@@ -3031,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30313118 }
30323119
30333120 if (ret < 0 ) {
3034- goto out_unlock ;
3121+ goto out ;
30353122 } else if (ret > 0 ) {
30363123 /* fiemap_fill_next_extent() told us to stop. */
30373124 stopped = true;
@@ -3042,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30423129next_item :
30433130 if (fatal_signal_pending (current )) {
30443131 ret = - EINTR ;
3045- goto out_unlock ;
3132+ goto out ;
30463133 }
30473134
30483135 ret = fiemap_next_leaf_item (inode , path );
30493136 if (ret < 0 ) {
3050- goto out_unlock ;
3137+ goto out ;
30513138 } else if (ret > 0 ) {
30523139 /* No more file extent items for this inode. */
30533140 break ;
@@ -3071,7 +3158,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30713158 & delalloc_cached_state , backref_ctx ,
30723159 0 , 0 , 0 , prev_extent_end , range_end - 1 );
30733160 if (ret < 0 )
3074- goto out_unlock ;
3161+ goto out ;
30753162 prev_extent_end = range_end ;
30763163 }
30773164
@@ -3109,9 +3196,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
31093196 }
31103197
31113198 ret = emit_last_fiemap_cache (fieinfo , & cache );
3112-
3113- out_unlock :
3114- btrfs_inode_unlock (inode , BTRFS_ILOCK_SHARED );
31153199out :
31163200 free_extent_state (delalloc_cached_state );
31173201 btrfs_free_backref_share_ctx (backref_ctx );
0 commit comments