2121
2222#include "../internal.h"
2323
24+ #define IOEND_BATCH_SIZE 4096
25+
2426/*
2527 * Structure allocated for each folio when block size < folio size
2628 * to track sub-folio uptodate status and I/O completions.
@@ -1039,7 +1041,7 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
10391041 * state, release holds on bios, and finally free up memory. Do not use the
10401042 * ioend after this.
10411043 */
1042- static void
1044+ static u32
10431045iomap_finish_ioend (struct iomap_ioend * ioend , int error )
10441046{
10451047 struct inode * inode = ioend -> io_inode ;
@@ -1048,6 +1050,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
10481050 u64 start = bio -> bi_iter .bi_sector ;
10491051 loff_t offset = ioend -> io_offset ;
10501052 bool quiet = bio_flagged (bio , BIO_QUIET );
1053+ u32 folio_count = 0 ;
10511054
10521055 for (bio = & ioend -> io_inline_bio ; bio ; bio = next ) {
10531056 struct folio_iter fi ;
@@ -1062,9 +1065,11 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
10621065 next = bio -> bi_private ;
10631066
10641067 /* walk all folios in bio, ending page IO on them */
1065- bio_for_each_folio_all (fi , bio )
1068+ bio_for_each_folio_all (fi , bio ) {
10661069 iomap_finish_folio_write (inode , fi .folio , fi .length ,
10671070 error );
1071+ folio_count ++ ;
1072+ }
10681073 bio_put (bio );
10691074 }
10701075 /* The ioend has been freed by bio_put() */
@@ -1074,20 +1079,36 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
10741079"%s: writeback error on inode %lu, offset %lld, sector %llu" ,
10751080 inode -> i_sb -> s_id , inode -> i_ino , offset , start );
10761081 }
1082+ return folio_count ;
10771083}
10781084
1085+ /*
1086+ * Ioend completion routine for merged bios. This can only be called from task
1087+ * contexts as merged ioends can be of unbound length. Hence we have to break up
1088+ * the writeback completions into manageable chunks to avoid long scheduler
1089+ * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
1090+ * good batch processing throughput without creating adverse scheduler latency
1091+ * conditions.
1092+ */
10791093void
10801094iomap_finish_ioends (struct iomap_ioend * ioend , int error )
10811095{
10821096 struct list_head tmp ;
1097+ u32 completions ;
1098+
1099+ might_sleep ();
10831100
10841101 list_replace_init (& ioend -> io_list , & tmp );
1085- iomap_finish_ioend (ioend , error );
1102+ completions = iomap_finish_ioend (ioend , error );
10861103
10871104 while (!list_empty (& tmp )) {
1105+ if (completions > IOEND_BATCH_SIZE * 8 ) {
1106+ cond_resched ();
1107+ completions = 0 ;
1108+ }
10881109 ioend = list_first_entry (& tmp , struct iomap_ioend , io_list );
10891110 list_del_init (& ioend -> io_list );
1090- iomap_finish_ioend (ioend , error );
1111+ completions += iomap_finish_ioend (ioend , error );
10911112 }
10921113}
10931114EXPORT_SYMBOL_GPL (iomap_finish_ioends );
@@ -1108,6 +1129,18 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
11081129 return false;
11091130 if (ioend -> io_offset + ioend -> io_size != next -> io_offset )
11101131 return false;
1132+ /*
1133+ * Do not merge physically discontiguous ioends. The filesystem
1134+ * completion functions will have to iterate the physical
1135+ * discontiguities even if we merge the ioends at a logical level, so
1136+ * we don't gain anything by merging physical discontiguities here.
1137+ *
1138+ * We cannot use bio->bi_iter.bi_sector here as it is modified during
1139+ * submission so does not point to the start sector of the bio at
1140+ * completion.
1141+ */
1142+ if (ioend -> io_sector + (ioend -> io_size >> 9 ) != next -> io_sector )
1143+ return false;
11111144 return true;
11121145}
11131146
@@ -1209,8 +1242,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
12091242 ioend -> io_flags = wpc -> iomap .flags ;
12101243 ioend -> io_inode = inode ;
12111244 ioend -> io_size = 0 ;
1245+ ioend -> io_folios = 0 ;
12121246 ioend -> io_offset = offset ;
12131247 ioend -> io_bio = bio ;
1248+ ioend -> io_sector = sector ;
12141249 return ioend ;
12151250}
12161251
@@ -1251,6 +1286,13 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
12511286 return false;
12521287 if (sector != bio_end_sector (wpc -> ioend -> io_bio ))
12531288 return false;
1289+ /*
1290+ * Limit ioend bio chain lengths to minimise IO completion latency. This
1291+ * also prevents long tight loops ending page writeback on all the
1292+ * folios in the ioend.
1293+ */
1294+ if (wpc -> ioend -> io_folios >= IOEND_BATCH_SIZE )
1295+ return false;
12541296 return true;
12551297}
12561298
@@ -1335,6 +1377,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
13351377 & submit_list );
13361378 count ++ ;
13371379 }
1380+ if (count )
1381+ wpc -> ioend -> io_folios ++ ;
13381382
13391383 WARN_ON_ONCE (!wpc -> ioend && !list_empty (& submit_list ));
13401384 WARN_ON_ONCE (!folio_test_locked (folio ));
0 commit comments