@@ -29,11 +29,6 @@ struct kmem_cache *xfs_buf_cache;
2929/*
3030 * Locking orders
3131 *
32- * xfs_buf_ioacct_inc:
33- * xfs_buf_ioacct_dec:
34- * b_sema (caller holds)
35- * b_lock
36- *
3732 * xfs_buf_stale:
3833 * b_sema (caller holds)
3934 * b_lock
@@ -81,51 +76,6 @@ xfs_buf_vmap_len(
8176 return (bp -> b_page_count * PAGE_SIZE );
8277}
8378
84- /*
85- * Bump the I/O in flight count on the buftarg if we haven't yet done so for
86- * this buffer. The count is incremented once per buffer (per hold cycle)
87- * because the corresponding decrement is deferred to buffer release. Buffers
88- * can undergo I/O multiple times in a hold-release cycle and per buffer I/O
89- * tracking adds unnecessary overhead. This is used for sychronization purposes
90- * with unmount (see xfs_buftarg_drain()), so all we really need is a count of
91- * in-flight buffers.
92- *
93- * Buffers that are never released (e.g., superblock, iclog buffers) must set
94- * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count
95- * never reaches zero and unmount hangs indefinitely.
96- */
97- static inline void
98- xfs_buf_ioacct_inc (
99- struct xfs_buf * bp )
100- {
101- if (bp -> b_flags & XBF_NO_IOACCT )
102- return ;
103-
104- ASSERT (bp -> b_flags & XBF_ASYNC );
105- spin_lock (& bp -> b_lock );
106- if (!(bp -> b_state & XFS_BSTATE_IN_FLIGHT )) {
107- bp -> b_state |= XFS_BSTATE_IN_FLIGHT ;
108- percpu_counter_inc (& bp -> b_target -> bt_io_count );
109- }
110- spin_unlock (& bp -> b_lock );
111- }
112-
113- /*
114- * Clear the in-flight state on a buffer about to be released to the LRU or
115- * freed and unaccount from the buftarg.
116- */
117- static inline void
118- __xfs_buf_ioacct_dec (
119- struct xfs_buf * bp )
120- {
121- lockdep_assert_held (& bp -> b_lock );
122-
123- if (bp -> b_state & XFS_BSTATE_IN_FLIGHT ) {
124- bp -> b_state &= ~XFS_BSTATE_IN_FLIGHT ;
125- percpu_counter_dec (& bp -> b_target -> bt_io_count );
126- }
127- }
128-
12979/*
13080 * When we mark a buffer stale, we remove the buffer from the LRU and clear the
13181 * b_lru_ref count so that the buffer is freed immediately when the buffer
@@ -149,15 +99,7 @@ xfs_buf_stale(
14999 */
150100 bp -> b_flags &= ~_XBF_DELWRI_Q ;
151101
152- /*
153- * Once the buffer is marked stale and unlocked, a subsequent lookup
154- * could reset b_flags. There is no guarantee that the buffer is
155- * unaccounted (released to LRU) before that occurs. Drop in-flight
156- * status now to preserve accounting consistency.
157- */
158102 spin_lock (& bp -> b_lock );
159- __xfs_buf_ioacct_dec (bp );
160-
161103 atomic_set (& bp -> b_lru_ref , 0 );
162104 if (!(bp -> b_state & XFS_BSTATE_DISPOSE ) &&
163105 (list_lru_del_obj (& bp -> b_target -> bt_lru , & bp -> b_lru )))
@@ -794,18 +736,13 @@ xfs_buf_get_map(
794736
795737int
796738_xfs_buf_read (
797- struct xfs_buf * bp ,
798- xfs_buf_flags_t flags )
739+ struct xfs_buf * bp )
799740{
800- ASSERT (!(flags & XBF_WRITE ));
801741 ASSERT (bp -> b_maps [0 ].bm_bn != XFS_BUF_DADDR_NULL );
802742
803743 bp -> b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE );
804- bp -> b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD );
805-
744+ bp -> b_flags |= XBF_READ ;
806745 xfs_buf_submit (bp );
807- if (flags & XBF_ASYNC )
808- return 0 ;
809746 return xfs_buf_iowait (bp );
810747}
811748
@@ -857,6 +794,8 @@ xfs_buf_read_map(
857794 struct xfs_buf * bp ;
858795 int error ;
859796
797+ ASSERT (!(flags & (XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD )));
798+
860799 flags |= XBF_READ ;
861800 * bpp = NULL ;
862801
@@ -870,21 +809,11 @@ xfs_buf_read_map(
870809 /* Initiate the buffer read and wait. */
871810 XFS_STATS_INC (target -> bt_mount , xb_get_read );
872811 bp -> b_ops = ops ;
873- error = _xfs_buf_read (bp , flags );
874-
875- /* Readahead iodone already dropped the buffer, so exit. */
876- if (flags & XBF_ASYNC )
877- return 0 ;
812+ error = _xfs_buf_read (bp );
878813 } else {
879814 /* Buffer already read; all we need to do is check it. */
880815 error = xfs_buf_reverify (bp , ops );
881816
882- /* Readahead already finished; drop the buffer and exit. */
883- if (flags & XBF_ASYNC ) {
884- xfs_buf_relse (bp );
885- return 0 ;
886- }
887-
888817 /* We do not want read in the flags */
889818 bp -> b_flags &= ~XBF_READ ;
890819 ASSERT (bp -> b_ops != NULL || ops == NULL );
@@ -936,6 +865,7 @@ xfs_buf_readahead_map(
936865 int nmaps ,
937866 const struct xfs_buf_ops * ops )
938867{
868+ const xfs_buf_flags_t flags = XBF_READ | XBF_ASYNC | XBF_READ_AHEAD ;
939869 struct xfs_buf * bp ;
940870
941871 /*
@@ -945,9 +875,21 @@ xfs_buf_readahead_map(
945875 if (xfs_buftarg_is_mem (target ))
946876 return ;
947877
948- xfs_buf_read_map (target , map , nmaps ,
949- XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD , & bp , ops ,
950- __this_address );
878+ if (xfs_buf_get_map (target , map , nmaps , flags | XBF_TRYLOCK , & bp ))
879+ return ;
880+ trace_xfs_buf_readahead (bp , 0 , _RET_IP_ );
881+
882+ if (bp -> b_flags & XBF_DONE ) {
883+ xfs_buf_reverify (bp , ops );
884+ xfs_buf_relse (bp );
885+ return ;
886+ }
887+ XFS_STATS_INC (target -> bt_mount , xb_get_read );
888+ bp -> b_ops = ops ;
889+ bp -> b_flags &= ~(XBF_WRITE | XBF_DONE );
890+ bp -> b_flags |= flags ;
891+ percpu_counter_inc (& target -> bt_readahead_count );
892+ xfs_buf_submit (bp );
951893}
952894
953895/*
@@ -1003,10 +945,12 @@ xfs_buf_get_uncached(
1003945 struct xfs_buf * bp ;
1004946 DEFINE_SINGLE_BUF_MAP (map , XFS_BUF_DADDR_NULL , numblks );
1005947
948+ /* there are currently no valid flags for xfs_buf_get_uncached */
949+ ASSERT (flags == 0 );
950+
1006951 * bpp = NULL ;
1007952
1008- /* flags might contain irrelevant bits, pass only what we care about */
1009- error = _xfs_buf_alloc (target , & map , 1 , flags & XBF_NO_IOACCT , & bp );
953+ error = _xfs_buf_alloc (target , & map , 1 , flags , & bp );
1010954 if (error )
1011955 return error ;
1012956
@@ -1060,7 +1004,6 @@ xfs_buf_rele_uncached(
10601004 spin_unlock (& bp -> b_lock );
10611005 return ;
10621006 }
1063- __xfs_buf_ioacct_dec (bp );
10641007 spin_unlock (& bp -> b_lock );
10651008 xfs_buf_free (bp );
10661009}
@@ -1079,20 +1022,12 @@ xfs_buf_rele_cached(
10791022 spin_lock (& bp -> b_lock );
10801023 ASSERT (bp -> b_hold >= 1 );
10811024 if (bp -> b_hold > 1 ) {
1082- /*
1083- * Drop the in-flight state if the buffer is already on the LRU
1084- * and it holds the only reference. This is racy because we
1085- * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT
1086- * ensures the decrement occurs only once per-buf.
1087- */
1088- if (-- bp -> b_hold == 1 && !list_empty (& bp -> b_lru ))
1089- __xfs_buf_ioacct_dec (bp );
1025+ bp -> b_hold -- ;
10901026 goto out_unlock ;
10911027 }
10921028
10931029 /* we are asked to drop the last reference */
1094- __xfs_buf_ioacct_dec (bp );
1095- if (!(bp -> b_flags & XBF_STALE ) && atomic_read (& bp -> b_lru_ref )) {
1030+ if (atomic_read (& bp -> b_lru_ref )) {
10961031 /*
10971032 * If the buffer is added to the LRU, keep the reference to the
10981033 * buffer for the LRU and clear the (now stale) dispose list
@@ -1345,6 +1280,7 @@ xfs_buf_ioend_handle_error(
13451280resubmit :
13461281 xfs_buf_ioerror (bp , 0 );
13471282 bp -> b_flags |= (XBF_DONE | XBF_WRITE_FAIL );
1283+ reinit_completion (& bp -> b_iowait );
13481284 xfs_buf_submit (bp );
13491285 return true;
13501286out_stale :
@@ -1355,8 +1291,9 @@ xfs_buf_ioend_handle_error(
13551291 return false;
13561292}
13571293
1358- static void
1359- xfs_buf_ioend (
1294+ /* returns false if the caller needs to resubmit the I/O, else true */
1295+ static bool
1296+ __xfs_buf_ioend (
13601297 struct xfs_buf * bp )
13611298{
13621299 trace_xfs_buf_iodone (bp , _RET_IP_ );
@@ -1369,14 +1306,16 @@ xfs_buf_ioend(
13691306 bp -> b_ops -> verify_read (bp );
13701307 if (!bp -> b_error )
13711308 bp -> b_flags |= XBF_DONE ;
1309+ if (bp -> b_flags & XBF_READ_AHEAD )
1310+ percpu_counter_dec (& bp -> b_target -> bt_readahead_count );
13721311 } else {
13731312 if (!bp -> b_error ) {
13741313 bp -> b_flags &= ~XBF_WRITE_FAIL ;
13751314 bp -> b_flags |= XBF_DONE ;
13761315 }
13771316
13781317 if (unlikely (bp -> b_error ) && xfs_buf_ioend_handle_error (bp ))
1379- return ;
1318+ return false ;
13801319
13811320 /* clear the retry state */
13821321 bp -> b_last_error = 0 ;
@@ -1397,7 +1336,15 @@ xfs_buf_ioend(
13971336
13981337 bp -> b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
13991338 _XBF_LOGRECOVERY );
1339+ return true;
1340+ }
14001341
1342+ static void
1343+ xfs_buf_ioend (
1344+ struct xfs_buf * bp )
1345+ {
1346+ if (!__xfs_buf_ioend (bp ))
1347+ return ;
14011348 if (bp -> b_flags & XBF_ASYNC )
14021349 xfs_buf_relse (bp );
14031350 else
@@ -1411,15 +1358,8 @@ xfs_buf_ioend_work(
14111358 struct xfs_buf * bp =
14121359 container_of (work , struct xfs_buf , b_ioend_work );
14131360
1414- xfs_buf_ioend (bp );
1415- }
1416-
1417- static void
1418- xfs_buf_ioend_async (
1419- struct xfs_buf * bp )
1420- {
1421- INIT_WORK (& bp -> b_ioend_work , xfs_buf_ioend_work );
1422- queue_work (bp -> b_mount -> m_buf_workqueue , & bp -> b_ioend_work );
1361+ if (__xfs_buf_ioend (bp ))
1362+ xfs_buf_relse (bp );
14231363}
14241364
14251365void
@@ -1491,7 +1431,13 @@ xfs_buf_bio_end_io(
14911431 XFS_TEST_ERROR (false, bp -> b_mount , XFS_ERRTAG_BUF_IOERROR ))
14921432 xfs_buf_ioerror (bp , - EIO );
14931433
1494- xfs_buf_ioend_async (bp );
1434+ if (bp -> b_flags & XBF_ASYNC ) {
1435+ INIT_WORK (& bp -> b_ioend_work , xfs_buf_ioend_work );
1436+ queue_work (bp -> b_mount -> m_buf_workqueue , & bp -> b_ioend_work );
1437+ } else {
1438+ complete (& bp -> b_iowait );
1439+ }
1440+
14951441 bio_put (bio );
14961442}
14971443
@@ -1568,9 +1514,11 @@ xfs_buf_iowait(
15681514{
15691515 ASSERT (!(bp -> b_flags & XBF_ASYNC ));
15701516
1571- trace_xfs_buf_iowait (bp , _RET_IP_ );
1572- wait_for_completion (& bp -> b_iowait );
1573- trace_xfs_buf_iowait_done (bp , _RET_IP_ );
1517+ do {
1518+ trace_xfs_buf_iowait (bp , _RET_IP_ );
1519+ wait_for_completion (& bp -> b_iowait );
1520+ trace_xfs_buf_iowait_done (bp , _RET_IP_ );
1521+ } while (!__xfs_buf_ioend (bp ));
15741522
15751523 return bp -> b_error ;
15761524}
@@ -1648,9 +1596,6 @@ xfs_buf_submit(
16481596 */
16491597 bp -> b_error = 0 ;
16501598
1651- if (bp -> b_flags & XBF_ASYNC )
1652- xfs_buf_ioacct_inc (bp );
1653-
16541599 if ((bp -> b_flags & XBF_WRITE ) && !xfs_buf_verify_write (bp )) {
16551600 xfs_force_shutdown (bp -> b_mount , SHUTDOWN_CORRUPT_INCORE );
16561601 xfs_buf_ioend (bp );
@@ -1776,9 +1721,8 @@ xfs_buftarg_wait(
17761721 struct xfs_buftarg * btp )
17771722{
17781723 /*
1779- * First wait on the buftarg I/O count for all in-flight buffers to be
1780- * released. This is critical as new buffers do not make the LRU until
1781- * they are released.
1724+ * First wait for all in-flight readahead buffers to be released. This is
1725+ * critical as new buffers do not make the LRU until they are released.
17821726 *
17831727 * Next, flush the buffer workqueue to ensure all completion processing
17841728 * has finished. Just waiting on buffer locks is not sufficient for
@@ -1787,7 +1731,7 @@ xfs_buftarg_wait(
17871731 * all reference counts have been dropped before we start walking the
17881732 * LRU list.
17891733 */
1790- while (percpu_counter_sum (& btp -> bt_io_count ))
1734+ while (percpu_counter_sum (& btp -> bt_readahead_count ))
17911735 delay (100 );
17921736 flush_workqueue (btp -> bt_mount -> m_buf_workqueue );
17931737}
@@ -1904,8 +1848,8 @@ xfs_destroy_buftarg(
19041848 struct xfs_buftarg * btp )
19051849{
19061850 shrinker_free (btp -> bt_shrinker );
1907- ASSERT (percpu_counter_sum (& btp -> bt_io_count ) == 0 );
1908- percpu_counter_destroy (& btp -> bt_io_count );
1851+ ASSERT (percpu_counter_sum (& btp -> bt_readahead_count ) == 0 );
1852+ percpu_counter_destroy (& btp -> bt_readahead_count );
19091853 list_lru_destroy (& btp -> bt_lru );
19101854}
19111855
@@ -1959,7 +1903,7 @@ xfs_init_buftarg(
19591903
19601904 if (list_lru_init (& btp -> bt_lru ))
19611905 return - ENOMEM ;
1962- if (percpu_counter_init (& btp -> bt_io_count , 0 , GFP_KERNEL ))
1906+ if (percpu_counter_init (& btp -> bt_readahead_count , 0 , GFP_KERNEL ))
19631907 goto out_destroy_lru ;
19641908
19651909 btp -> bt_shrinker =
@@ -1973,7 +1917,7 @@ xfs_init_buftarg(
19731917 return 0 ;
19741918
19751919out_destroy_io_count :
1976- percpu_counter_destroy (& btp -> bt_io_count );
1920+ percpu_counter_destroy (& btp -> bt_readahead_count );
19771921out_destroy_lru :
19781922 list_lru_destroy (& btp -> bt_lru );
19791923 return - ENOMEM ;
0 commit comments