2525
2626typedef int (* iomap_punch_t )(struct inode * inode , loff_t offset , loff_t length );
2727/*
28- * Structure allocated for each folio to track per-block uptodate state
28+ * Structure allocated for each folio to track per-block uptodate, dirty state
2929 * and I/O completions.
3030 */
3131struct iomap_folio_state {
3232 atomic_t read_bytes_pending ;
3333 atomic_t write_bytes_pending ;
3434 spinlock_t state_lock ;
35+
36+ /*
37+ * Each block has two bits in this bitmap:
38+ * Bits [0..blocks_per_folio) has the uptodate status.
39+ * Bits [b_p_f...(2*b_p_f)) has the dirty status.
40+ */
3541 unsigned long state [];
3642};
3743
@@ -78,6 +84,61 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
7884 folio_mark_uptodate (folio );
7985}
8086
87+ static inline bool ifs_block_is_dirty (struct folio * folio ,
88+ struct iomap_folio_state * ifs , int block )
89+ {
90+ struct inode * inode = folio -> mapping -> host ;
91+ unsigned int blks_per_folio = i_blocks_per_folio (inode , folio );
92+
93+ return test_bit (block + blks_per_folio , ifs -> state );
94+ }
95+
96+ static void ifs_clear_range_dirty (struct folio * folio ,
97+ struct iomap_folio_state * ifs , size_t off , size_t len )
98+ {
99+ struct inode * inode = folio -> mapping -> host ;
100+ unsigned int blks_per_folio = i_blocks_per_folio (inode , folio );
101+ unsigned int first_blk = (off >> inode -> i_blkbits );
102+ unsigned int last_blk = (off + len - 1 ) >> inode -> i_blkbits ;
103+ unsigned int nr_blks = last_blk - first_blk + 1 ;
104+ unsigned long flags ;
105+
106+ spin_lock_irqsave (& ifs -> state_lock , flags );
107+ bitmap_clear (ifs -> state , first_blk + blks_per_folio , nr_blks );
108+ spin_unlock_irqrestore (& ifs -> state_lock , flags );
109+ }
110+
111+ static void iomap_clear_range_dirty (struct folio * folio , size_t off , size_t len )
112+ {
113+ struct iomap_folio_state * ifs = folio -> private ;
114+
115+ if (ifs )
116+ ifs_clear_range_dirty (folio , ifs , off , len );
117+ }
118+
119+ static void ifs_set_range_dirty (struct folio * folio ,
120+ struct iomap_folio_state * ifs , size_t off , size_t len )
121+ {
122+ struct inode * inode = folio -> mapping -> host ;
123+ unsigned int blks_per_folio = i_blocks_per_folio (inode , folio );
124+ unsigned int first_blk = (off >> inode -> i_blkbits );
125+ unsigned int last_blk = (off + len - 1 ) >> inode -> i_blkbits ;
126+ unsigned int nr_blks = last_blk - first_blk + 1 ;
127+ unsigned long flags ;
128+
129+ spin_lock_irqsave (& ifs -> state_lock , flags );
130+ bitmap_set (ifs -> state , first_blk + blks_per_folio , nr_blks );
131+ spin_unlock_irqrestore (& ifs -> state_lock , flags );
132+ }
133+
134+ static void iomap_set_range_dirty (struct folio * folio , size_t off , size_t len )
135+ {
136+ struct iomap_folio_state * ifs = folio -> private ;
137+
138+ if (ifs )
139+ ifs_set_range_dirty (folio , ifs , off , len );
140+ }
141+
81142static struct iomap_folio_state * ifs_alloc (struct inode * inode ,
82143 struct folio * folio , unsigned int flags )
83144{
@@ -93,14 +154,24 @@ static struct iomap_folio_state *ifs_alloc(struct inode *inode,
93154 else
94155 gfp = GFP_NOFS | __GFP_NOFAIL ;
95156
96- ifs = kzalloc (struct_size (ifs , state , BITS_TO_LONGS (nr_blocks )),
97- gfp );
98- if (ifs ) {
99- spin_lock_init (& ifs -> state_lock );
100- if (folio_test_uptodate (folio ))
101- bitmap_fill (ifs -> state , nr_blocks );
102- folio_attach_private (folio , ifs );
103- }
157+ /*
158+ * ifs->state tracks two sets of state flags when the
159+ * filesystem block size is smaller than the folio size.
160+ * The first state tracks per-block uptodate and the
161+ * second tracks per-block dirty state.
162+ */
163+ ifs = kzalloc (struct_size (ifs , state ,
164+ BITS_TO_LONGS (2 * nr_blocks )), gfp );
165+ if (!ifs )
166+ return ifs ;
167+
168+ spin_lock_init (& ifs -> state_lock );
169+ if (folio_test_uptodate (folio ))
170+ bitmap_set (ifs -> state , 0 , nr_blocks );
171+ if (folio_test_dirty (folio ))
172+ bitmap_set (ifs -> state , nr_blocks , nr_blocks );
173+ folio_attach_private (folio , ifs );
174+
104175 return ifs ;
105176}
106177
@@ -519,6 +590,17 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
519590}
520591EXPORT_SYMBOL_GPL (iomap_invalidate_folio );
521592
593+ bool iomap_dirty_folio (struct address_space * mapping , struct folio * folio )
594+ {
595+ struct inode * inode = mapping -> host ;
596+ size_t len = folio_size (folio );
597+
598+ ifs_alloc (inode , folio , 0 );
599+ iomap_set_range_dirty (folio , 0 , len );
600+ return filemap_dirty_folio (mapping , folio );
601+ }
602+ EXPORT_SYMBOL_GPL (iomap_dirty_folio );
603+
522604static void
523605iomap_write_failed (struct inode * inode , loff_t pos , unsigned len )
524606{
@@ -723,6 +805,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
723805 if (unlikely (copied < len && !folio_test_uptodate (folio )))
724806 return 0 ;
725807 iomap_set_range_uptodate (folio , offset_in_folio (folio , pos ), len );
808+ iomap_set_range_dirty (folio , offset_in_folio (folio , pos ), copied );
726809 filemap_dirty_folio (inode -> i_mapping , folio );
727810 return copied ;
728811}
@@ -892,6 +975,43 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
892975}
893976EXPORT_SYMBOL_GPL (iomap_file_buffered_write );
894977
978+ static int iomap_write_delalloc_ifs_punch (struct inode * inode ,
979+ struct folio * folio , loff_t start_byte , loff_t end_byte ,
980+ iomap_punch_t punch )
981+ {
982+ unsigned int first_blk , last_blk , i ;
983+ loff_t last_byte ;
984+ u8 blkbits = inode -> i_blkbits ;
985+ struct iomap_folio_state * ifs ;
986+ int ret = 0 ;
987+
988+ /*
989+ * When we have per-block dirty tracking, there can be
990+ * blocks within a folio which are marked uptodate
991+ * but not dirty. In that case it is necessary to punch
992+ * out such blocks to avoid leaking any delalloc blocks.
993+ */
994+ ifs = folio -> private ;
995+ if (!ifs )
996+ return ret ;
997+
998+ last_byte = min_t (loff_t , end_byte - 1 ,
999+ folio_pos (folio ) + folio_size (folio ) - 1 );
1000+ first_blk = offset_in_folio (folio , start_byte ) >> blkbits ;
1001+ last_blk = offset_in_folio (folio , last_byte ) >> blkbits ;
1002+ for (i = first_blk ; i <= last_blk ; i ++ ) {
1003+ if (!ifs_block_is_dirty (folio , ifs , i )) {
1004+ ret = punch (inode , folio_pos (folio ) + (i << blkbits ),
1005+ 1 << blkbits );
1006+ if (ret )
1007+ return ret ;
1008+ }
1009+ }
1010+
1011+ return ret ;
1012+ }
1013+
1014+
8951015static int iomap_write_delalloc_punch (struct inode * inode , struct folio * folio ,
8961016 loff_t * punch_start_byte , loff_t start_byte , loff_t end_byte ,
8971017 iomap_punch_t punch )
@@ -909,6 +1029,12 @@ static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio,
9091029 return ret ;
9101030 }
9111031
1032+ /* Punch non-dirty blocks within folio */
1033+ ret = iomap_write_delalloc_ifs_punch (inode , folio , start_byte ,
1034+ end_byte , punch );
1035+ if (ret )
1036+ return ret ;
1037+
9121038 /*
9131039 * Make sure the next punch start is correctly bound to
9141040 * the end of this data range, not the end of the folio.
@@ -1639,14 +1765,21 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
16391765 struct writeback_control * wbc , struct inode * inode ,
16401766 struct folio * folio , u64 end_pos )
16411767{
1642- struct iomap_folio_state * ifs = ifs_alloc ( inode , folio , 0 ) ;
1768+ struct iomap_folio_state * ifs = folio -> private ;
16431769 struct iomap_ioend * ioend , * next ;
16441770 unsigned len = i_blocksize (inode );
16451771 unsigned nblocks = i_blocks_per_folio (inode , folio );
16461772 u64 pos = folio_pos (folio );
16471773 int error = 0 , count = 0 , i ;
16481774 LIST_HEAD (submit_list );
16491775
1776+ WARN_ON_ONCE (end_pos <= pos );
1777+
1778+ if (!ifs && nblocks > 1 ) {
1779+ ifs = ifs_alloc (inode , folio , 0 );
1780+ iomap_set_range_dirty (folio , 0 , end_pos - pos );
1781+ }
1782+
16501783 WARN_ON_ONCE (ifs && atomic_read (& ifs -> write_bytes_pending ) != 0 );
16511784
16521785 /*
@@ -1655,7 +1788,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
16551788 * invalid, grab a new one.
16561789 */
16571790 for (i = 0 ; i < nblocks && pos < end_pos ; i ++ , pos += len ) {
1658- if (ifs && !ifs_block_is_uptodate ( ifs , i ))
1791+ if (ifs && !ifs_block_is_dirty ( folio , ifs , i ))
16591792 continue ;
16601793
16611794 error = wpc -> ops -> map_blocks (wpc , inode , pos );
@@ -1699,6 +1832,12 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
16991832 }
17001833 }
17011834
1835+ /*
1836+ * We can have dirty bits set past end of file in page_mkwrite path
1837+ * while mapping the last partial folio. Hence it's better to clear
1838+ * all the dirty bits in the folio here.
1839+ */
1840+ iomap_clear_range_dirty (folio , 0 , folio_size (folio ));
17021841 folio_start_writeback (folio );
17031842 folio_unlock (folio );
17041843
0 commit comments