@@ -1610,30 +1610,34 @@ static void iomap_writepage_end_bio(struct bio *bio)
16101610 * Submit the final bio for an ioend.
16111611 *
16121612 * If @error is non-zero, it means that we have a situation where some part of
1613- * the submission process has failed after we've marked pages for writeback
1614- * and unlocked them. In this situation, we need to fail the bio instead of
1615- * submitting it. This typically only happens on a filesystem shutdown.
1613+ * the submission process has failed after we've marked pages for writeback.
1614+ * We cannot cancel ioend directly in that case, so call the bio end I/O handler
1615+ * with the error status here to run the normal I/O completion handler to clear
1616+ * the writeback bit and let the file system proess the errors.
16161617 */
1617- static int
1618- iomap_submit_ioend (struct iomap_writepage_ctx * wpc , struct iomap_ioend * ioend ,
1619- int error )
1618+ static int iomap_submit_ioend (struct iomap_writepage_ctx * wpc , int error )
16201619{
1620+ if (!wpc -> ioend )
1621+ return error ;
1622+
1623+ /*
1624+ * Let the file systems prepare the I/O submission and hook in an I/O
1625+ * comletion handler. This also needs to happen in case after a
1626+ * failure happened so that the file system end I/O handler gets called
1627+ * to clean up.
1628+ */
16211629 if (wpc -> ops -> prepare_ioend )
1622- error = wpc -> ops -> prepare_ioend (ioend , error );
1630+ error = wpc -> ops -> prepare_ioend (wpc -> ioend , error );
1631+
16231632 if (error ) {
1624- /*
1625- * If we're failing the IO now, just mark the ioend with an
1626- * error and finish it. This will run IO completion immediately
1627- * as there is only one reference to the ioend at this point in
1628- * time.
1629- */
1630- ioend -> io_bio .bi_status = errno_to_blk_status (error );
1631- bio_endio (& ioend -> io_bio );
1632- return error ;
1633+ wpc -> ioend -> io_bio .bi_status = errno_to_blk_status (error );
1634+ bio_endio (& wpc -> ioend -> io_bio );
1635+ } else {
1636+ submit_bio (& wpc -> ioend -> io_bio );
16331637 }
16341638
1635- submit_bio ( & ioend -> io_bio ) ;
1636- return 0 ;
1639+ wpc -> ioend = NULL ;
1640+ return error ;
16371641}
16381642
16391643static struct iomap_ioend * iomap_alloc_ioend (struct iomap_writepage_ctx * wpc ,
@@ -1687,19 +1691,28 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
16871691/*
16881692 * Test to see if we have an existing ioend structure that we could append to
16891693 * first; otherwise finish off the current ioend and start another.
1694+ *
1695+ * If a new ioend is created and cached, the old ioend is submitted to the block
1696+ * layer instantly. Batching optimisations are provided by higher level block
1697+ * plugging.
1698+ *
1699+ * At the end of a writeback pass, there will be a cached ioend remaining on the
1700+ * writepage context that the caller will need to submit.
16901701 */
1691- static void iomap_add_to_ioend (struct iomap_writepage_ctx * wpc ,
1702+ static int iomap_add_to_ioend (struct iomap_writepage_ctx * wpc ,
16921703 struct writeback_control * wbc , struct folio * folio ,
1693- struct inode * inode , loff_t pos , struct list_head * iolist )
1704+ struct inode * inode , loff_t pos )
16941705{
16951706 struct iomap_folio_state * ifs = folio -> private ;
16961707 unsigned len = i_blocksize (inode );
16971708 size_t poff = offset_in_folio (folio , pos );
1709+ int error ;
16981710
16991711 if (!wpc -> ioend || !iomap_can_add_to_ioend (wpc , pos )) {
17001712new_ioend :
1701- if (wpc -> ioend )
1702- list_add (& wpc -> ioend -> io_list , iolist );
1713+ error = iomap_submit_ioend (wpc , 0 );
1714+ if (error )
1715+ return error ;
17031716 wpc -> ioend = iomap_alloc_ioend (wpc , wbc , inode , pos );
17041717 }
17051718
@@ -1710,12 +1723,12 @@ static void iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17101723 atomic_add (len , & ifs -> write_bytes_pending );
17111724 wpc -> ioend -> io_size += len ;
17121725 wbc_account_cgroup_owner (wbc , & folio -> page , len );
1726+ return 0 ;
17131727}
17141728
17151729static int iomap_writepage_map_blocks (struct iomap_writepage_ctx * wpc ,
17161730 struct writeback_control * wbc , struct folio * folio ,
1717- struct inode * inode , u64 pos , unsigned * count ,
1718- struct list_head * submit_list )
1731+ struct inode * inode , u64 pos , unsigned * count )
17191732{
17201733 int error ;
17211734
@@ -1732,8 +1745,9 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
17321745 case IOMAP_HOLE :
17331746 break ;
17341747 default :
1735- iomap_add_to_ioend (wpc , wbc , folio , inode , pos , submit_list );
1736- (* count )++ ;
1748+ error = iomap_add_to_ioend (wpc , wbc , folio , inode , pos );
1749+ if (!error )
1750+ (* count )++ ;
17371751 }
17381752
17391753fail :
@@ -1809,35 +1823,21 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
18091823 return true;
18101824}
18111825
1812- /*
1813- * We implement an immediate ioend submission policy here to avoid needing to
1814- * chain multiple ioends and hence nest mempool allocations which can violate
1815- * the forward progress guarantees we need to provide. The current ioend we're
1816- * adding blocks to is cached in the writepage context, and if the new block
1817- * doesn't append to the cached ioend, it will create a new ioend and cache that
1818- * instead.
1819- *
1820- * If a new ioend is created and cached, the old ioend is returned and queued
1821- * locally for submission once the entire page is processed or an error has been
1822- * detected. While ioends are submitted immediately after they are completed,
1823- * batching optimisations are provided by higher level block plugging.
1824- *
1825- * At the end of a writeback pass, there will be a cached ioend remaining on the
1826- * writepage context that the caller will need to submit.
1827- */
18281826static int iomap_writepage_map (struct iomap_writepage_ctx * wpc ,
18291827 struct writeback_control * wbc , struct folio * folio )
18301828{
18311829 struct iomap_folio_state * ifs = folio -> private ;
18321830 struct inode * inode = folio -> mapping -> host ;
1833- struct iomap_ioend * ioend , * next ;
18341831 unsigned len = i_blocksize (inode );
18351832 unsigned nblocks = i_blocks_per_folio (inode , folio );
18361833 u64 pos = folio_pos (folio );
18371834 u64 end_pos = pos + folio_size (folio );
18381835 unsigned count = 0 ;
18391836 int error = 0 , i ;
1840- LIST_HEAD (submit_list );
1837+
1838+ WARN_ON_ONCE (!folio_test_locked (folio ));
1839+ WARN_ON_ONCE (folio_test_dirty (folio ));
1840+ WARN_ON_ONCE (folio_test_writeback (folio ));
18411841
18421842 trace_iomap_writepage (inode , pos , folio_size (folio ));
18431843
@@ -1847,12 +1847,27 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18471847 }
18481848 WARN_ON_ONCE (end_pos <= pos );
18491849
1850- if (!ifs && nblocks > 1 ) {
1851- ifs = ifs_alloc (inode , folio , 0 );
1852- iomap_set_range_dirty (folio , 0 , end_pos - pos );
1850+ if (nblocks > 1 ) {
1851+ if (!ifs ) {
1852+ ifs = ifs_alloc (inode , folio , 0 );
1853+ iomap_set_range_dirty (folio , 0 , end_pos - pos );
1854+ }
1855+
1856+ /*
1857+ * Keep the I/O completion handler from clearing the writeback
1858+ * bit until we have submitted all blocks by adding a bias to
1859+ * ifs->write_bytes_pending, which is dropped after submitting
1860+ * all blocks.
1861+ */
1862+ WARN_ON_ONCE (atomic_read (& ifs -> write_bytes_pending ) != 0 );
1863+ atomic_inc (& ifs -> write_bytes_pending );
18531864 }
18541865
1855- WARN_ON_ONCE (ifs && atomic_read (& ifs -> write_bytes_pending ) != 0 );
1866+ /*
1867+ * Set the writeback bit ASAP, as the I/O completion for the single
1868+ * block per folio case happen hit as soon as we're submitting the bio.
1869+ */
1870+ folio_start_writeback (folio );
18561871
18571872 /*
18581873 * Walk through the folio to find areas to write back. If we
@@ -1863,18 +1878,13 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18631878 if (ifs && !ifs_block_is_dirty (folio , ifs , i ))
18641879 continue ;
18651880 error = iomap_writepage_map_blocks (wpc , wbc , folio , inode , pos ,
1866- & count , & submit_list );
1881+ & count );
18671882 if (error )
18681883 break ;
18691884 }
18701885 if (count )
18711886 wpc -> nr_folios ++ ;
18721887
1873- WARN_ON_ONCE (!wpc -> ioend && !list_empty (& submit_list ));
1874- WARN_ON_ONCE (!folio_test_locked (folio ));
1875- WARN_ON_ONCE (folio_test_writeback (folio ));
1876- WARN_ON_ONCE (folio_test_dirty (folio ));
1877-
18781888 /*
18791889 * We can have dirty bits set past end of file in page_mkwrite path
18801890 * while mapping the last partial folio. Hence it's better to clear
@@ -1883,38 +1893,20 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18831893 iomap_clear_range_dirty (folio , 0 , folio_size (folio ));
18841894
18851895 /*
1886- * If the page hasn't been added to the ioend, it won't be affected by
1887- * I/O completion and we must unlock it now.
1896+ * Usually the writeback bit is cleared by the I/O completion handler.
1897+ * But we may end up either not actually writing any blocks, or (when
1898+ * there are multiple blocks in a folio) all I/O might have finished
1899+ * already at this point. In that case we need to clear the writeback
1900+ * bit ourselves right after unlocking the page.
18881901 */
1889- if (error && !count ) {
1890- folio_unlock (folio );
1891- goto done ;
1892- }
1893-
1894- folio_start_writeback (folio );
18951902 folio_unlock (folio );
1896-
1897- /*
1898- * Preserve the original error if there was one; catch
1899- * submission errors here and propagate into subsequent ioend
1900- * submissions.
1901- */
1902- list_for_each_entry_safe (ioend , next , & submit_list , io_list ) {
1903- int error2 ;
1904-
1905- list_del_init (& ioend -> io_list );
1906- error2 = iomap_submit_ioend (wpc , ioend , error );
1907- if (error2 && !error )
1908- error = error2 ;
1903+ if (ifs ) {
1904+ if (atomic_dec_and_test (& ifs -> write_bytes_pending ))
1905+ folio_end_writeback (folio );
1906+ } else {
1907+ if (!count )
1908+ folio_end_writeback (folio );
19091909 }
1910-
1911- /*
1912- * We can end up here with no error and nothing to write only if we race
1913- * with a partial page truncate on a sub-page block sized filesystem.
1914- */
1915- if (!count )
1916- folio_end_writeback (folio );
1917- done :
19181910 mapping_set_error (inode -> i_mapping , error );
19191911 return error ;
19201912}
@@ -1942,9 +1934,7 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
19421934
19431935 wpc -> ops = ops ;
19441936 ret = write_cache_pages (mapping , wbc , iomap_do_writepage , wpc );
1945- if (!wpc -> ioend )
1946- return ret ;
1947- return iomap_submit_ioend (wpc , wpc -> ioend , ret );
1937+ return iomap_submit_ioend (wpc , ret );
19481938}
19491939EXPORT_SYMBOL_GPL (iomap_writepages );
19501940
0 commit comments