@@ -306,80 +306,38 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
306306}
307307
308308static ssize_t ext4_handle_inode_extension (struct inode * inode , loff_t offset ,
309- ssize_t written , size_t count )
309+ ssize_t count )
310310{
311311 handle_t * handle ;
312- bool truncate = false;
313- u8 blkbits = inode -> i_blkbits ;
314- ext4_lblk_t written_blk , end_blk ;
315- int ret ;
316-
317- /*
318- * Note that EXT4_I(inode)->i_disksize can get extended up to
319- * inode->i_size while the I/O was running due to writeback of delalloc
320- * blocks. But, the code in ext4_iomap_alloc() is careful to use
321- * zeroed/unwritten extents if this is possible; thus we won't leave
322- * uninitialized blocks in a file even if we didn't succeed in writing
323- * as much as we intended.
324- */
325- WARN_ON_ONCE (i_size_read (inode ) < EXT4_I (inode )-> i_disksize );
326- if (offset + count <= EXT4_I (inode )-> i_disksize ) {
327- /*
328- * We need to ensure that the inode is removed from the orphan
329- * list if it has been added prematurely, due to writeback of
330- * delalloc blocks.
331- */
332- if (!list_empty (& EXT4_I (inode )-> i_orphan ) && inode -> i_nlink ) {
333- handle = ext4_journal_start (inode , EXT4_HT_INODE , 2 );
334-
335- if (IS_ERR (handle )) {
336- ext4_orphan_del (NULL , inode );
337- return PTR_ERR (handle );
338- }
339-
340- ext4_orphan_del (handle , inode );
341- ext4_journal_stop (handle );
342- }
343-
344- return written ;
345- }
346-
347- if (written < 0 )
348- goto truncate ;
349312
313+ lockdep_assert_held_write (& inode -> i_rwsem );
350314 handle = ext4_journal_start (inode , EXT4_HT_INODE , 2 );
351- if (IS_ERR (handle )) {
352- written = PTR_ERR (handle );
353- goto truncate ;
354- }
315+ if (IS_ERR (handle ))
316+ return PTR_ERR (handle );
355317
356- if (ext4_update_inode_size (inode , offset + written )) {
357- ret = ext4_mark_inode_dirty (handle , inode );
318+ if (ext4_update_inode_size (inode , offset + count )) {
319+ int ret = ext4_mark_inode_dirty (handle , inode );
358320 if (unlikely (ret )) {
359- written = ret ;
360321 ext4_journal_stop (handle );
361- goto truncate ;
322+ return ret ;
362323 }
363324 }
364325
365- /*
366- * We may need to truncate allocated but not written blocks beyond EOF.
367- */
368- written_blk = ALIGN (offset + written , 1 << blkbits );
369- end_blk = ALIGN (offset + count , 1 << blkbits );
370- if (written_blk < end_blk && ext4_can_truncate (inode ))
371- truncate = true;
372-
373- /*
374- * Remove the inode from the orphan list if it has been extended and
375- * everything went OK.
376- */
377- if (!truncate && inode -> i_nlink )
326+ if (inode -> i_nlink )
378327 ext4_orphan_del (handle , inode );
379328 ext4_journal_stop (handle );
380329
381- if (truncate ) {
382- truncate :
330+ return count ;
331+ }
332+
333+ /*
334+ * Clean up the inode after DIO or DAX extending write has completed and the
335+ * inode size has been updated using ext4_handle_inode_extension().
336+ */
337+ static void ext4_inode_extension_cleanup (struct inode * inode , ssize_t count )
338+ {
339+ lockdep_assert_held_write (& inode -> i_rwsem );
340+ if (count < 0 ) {
383341 ext4_truncate_failed_write (inode );
384342 /*
385343 * If the truncate operation failed early, then the inode may
@@ -388,9 +346,28 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
388346 */
389347 if (inode -> i_nlink )
390348 ext4_orphan_del (NULL , inode );
349+ return ;
391350 }
351+ /*
352+ * If i_disksize got extended due to writeback of delalloc blocks while
353+ * the DIO was running we could fail to cleanup the orphan list in
354+ * ext4_handle_inode_extension(). Do it now.
355+ */
356+ if (!list_empty (& EXT4_I (inode )-> i_orphan ) && inode -> i_nlink ) {
357+ handle_t * handle = ext4_journal_start (inode , EXT4_HT_INODE , 2 );
392358
393- return written ;
359+ if (IS_ERR (handle )) {
360+ /*
361+ * The write has successfully completed. Not much to
362+ * do with the error here so just cleanup the orphan
363+ * list and hope for the best.
364+ */
365+ ext4_orphan_del (NULL , inode );
366+ return ;
367+ }
368+ ext4_orphan_del (handle , inode );
369+ ext4_journal_stop (handle );
370+ }
394371}
395372
396373static int ext4_dio_write_end_io (struct kiocb * iocb , ssize_t size ,
@@ -399,31 +376,22 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
399376 loff_t pos = iocb -> ki_pos ;
400377 struct inode * inode = file_inode (iocb -> ki_filp );
401378
379+ if (!error && size && flags & IOMAP_DIO_UNWRITTEN )
380+ error = ext4_convert_unwritten_extents (NULL , inode , pos , size );
402381 if (error )
403382 return error ;
404-
405- if (size && flags & IOMAP_DIO_UNWRITTEN ) {
406- error = ext4_convert_unwritten_extents (NULL , inode , pos , size );
407- if (error < 0 )
408- return error ;
409- }
410383 /*
411- * If we are extending the file, we have to update i_size here before
412- * page cache gets invalidated in iomap_dio_rw(). Otherwise racing
413- * buffered reads could zero out too much from page cache pages. Update
414- * of on-disk size will happen later in ext4_dio_write_iter() where
415- * we have enough information to also perform orphan list handling etc.
416- * Note that we perform all extending writes synchronously under
417- * i_rwsem held exclusively so i_size update is safe here in that case.
418- * If the write was not extending, we cannot see pos > i_size here
419- * because operations reducing i_size like truncate wait for all
420- * outstanding DIO before updating i_size.
384+ * Note that EXT4_I(inode)->i_disksize can get extended up to
385+ * inode->i_size while the I/O was running due to writeback of delalloc
386+ * blocks. But the code in ext4_iomap_alloc() is careful to use
387+ * zeroed/unwritten extents if this is possible; thus we won't leave
388+ * uninitialized blocks in a file even if we didn't succeed in writing
389+ * as much as we intended.
421390 */
422- pos += size ;
423- if (pos > i_size_read (inode ))
424- i_size_write (inode , pos );
425-
426- return 0 ;
391+ WARN_ON_ONCE (i_size_read (inode ) < READ_ONCE (EXT4_I (inode )-> i_disksize ));
392+ if (pos + size <= READ_ONCE (EXT4_I (inode )-> i_disksize ))
393+ return size ;
394+ return ext4_handle_inode_extension (inode , pos , size );
427395}
428396
429397static const struct iomap_dio_ops ext4_dio_write_ops = {
@@ -608,9 +576,16 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
608576 dio_flags , NULL , 0 );
609577 if (ret == - ENOTBLK )
610578 ret = 0 ;
611-
612- if (extend )
613- ret = ext4_handle_inode_extension (inode , offset , ret , count );
579+ if (extend ) {
580+ /*
581+ * We always perform extending DIO write synchronously so by
582+ * now the IO is completed and ext4_handle_inode_extension()
583+ * was called. Cleanup the inode in case of error or race with
584+ * writeback of delalloc blocks.
585+ */
586+ WARN_ON_ONCE (ret == - EIOCBQUEUED );
587+ ext4_inode_extension_cleanup (inode , ret );
588+ }
614589
615590out :
616591 if (ilock_shared )
@@ -691,8 +666,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
691666
692667 ret = dax_iomap_rw (iocb , from , & ext4_iomap_ops );
693668
694- if (extend )
695- ret = ext4_handle_inode_extension (inode , offset , ret , count );
669+ if (extend ) {
670+ ret = ext4_handle_inode_extension (inode , offset , ret );
671+ ext4_inode_extension_cleanup (inode , ret );
672+ }
696673out :
697674 inode_unlock (inode );
698675 if (ret > 0 )
0 commit comments