@@ -181,7 +181,6 @@ const struct address_space_operations zonefs_file_aops = {
181181 .migrate_folio = filemap_migrate_folio ,
182182 .is_partially_uptodate = iomap_is_partially_uptodate ,
183183 .error_remove_page = generic_error_remove_page ,
184- .direct_IO = noop_direct_IO ,
185184 .swap_activate = zonefs_swap_activate ,
186185};
187186
@@ -342,6 +341,77 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
342341 return generic_file_llseek_size (file , offset , whence , isize , isize );
343342}
344343
344+ struct zonefs_zone_append_bio {
345+ /* The target inode of the BIO */
346+ struct inode * inode ;
347+
348+ /* For sync writes, the target append write offset */
349+ u64 append_offset ;
350+
351+ /*
352+ * This member must come last, bio_alloc_bioset will allocate enough
353+ * bytes for entire zonefs_bio but relies on bio being last.
354+ */
355+ struct bio bio ;
356+ };
357+
358+ static inline struct zonefs_zone_append_bio *
359+ zonefs_zone_append_bio (struct bio * bio )
360+ {
361+ return container_of (bio , struct zonefs_zone_append_bio , bio );
362+ }
363+
364+ static void zonefs_file_zone_append_dio_bio_end_io (struct bio * bio )
365+ {
366+ struct zonefs_zone_append_bio * za_bio = zonefs_zone_append_bio (bio );
367+ struct zonefs_zone * z = zonefs_inode_zone (za_bio -> inode );
368+ sector_t za_sector ;
369+
370+ if (bio -> bi_status != BLK_STS_OK )
371+ goto bio_end ;
372+
373+ /*
374+ * If the file zone was written underneath the file system, the zone
375+ * append operation can still succedd (if the zone is not full) but
376+ * the write append location will not be where we expect it to be.
377+ * Check that we wrote where we intended to, that is, at z->z_wpoffset.
378+ */
379+ za_sector = z -> z_sector + (za_bio -> append_offset >> SECTOR_SHIFT );
380+ if (bio -> bi_iter .bi_sector != za_sector ) {
381+ zonefs_warn (za_bio -> inode -> i_sb ,
382+ "Invalid write sector %llu for zone at %llu\n" ,
383+ bio -> bi_iter .bi_sector , z -> z_sector );
384+ bio -> bi_status = BLK_STS_IOERR ;
385+ }
386+
387+ bio_end :
388+ iomap_dio_bio_end_io (bio );
389+ }
390+
391+ static void zonefs_file_zone_append_dio_submit_io (const struct iomap_iter * iter ,
392+ struct bio * bio ,
393+ loff_t file_offset )
394+ {
395+ struct zonefs_zone_append_bio * za_bio = zonefs_zone_append_bio (bio );
396+ struct inode * inode = iter -> inode ;
397+ struct zonefs_zone * z = zonefs_inode_zone (inode );
398+
399+ /*
400+ * Issue a zone append BIO to process sync dio writes. The append
401+ * file offset is saved to check the zone append write location
402+ * on completion of the BIO.
403+ */
404+ za_bio -> inode = inode ;
405+ za_bio -> append_offset = file_offset ;
406+
407+ bio -> bi_opf &= ~REQ_OP_WRITE ;
408+ bio -> bi_opf |= REQ_OP_ZONE_APPEND ;
409+ bio -> bi_iter .bi_sector = z -> z_sector ;
410+ bio -> bi_end_io = zonefs_file_zone_append_dio_bio_end_io ;
411+
412+ submit_bio (bio );
413+ }
414+
345415static int zonefs_file_write_dio_end_io (struct kiocb * iocb , ssize_t size ,
346416 int error , unsigned int flags )
347417{
@@ -372,93 +442,17 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
372442 return 0 ;
373443}
374444
375- static const struct iomap_dio_ops zonefs_write_dio_ops = {
376- .end_io = zonefs_file_write_dio_end_io ,
377- };
445+ static struct bio_set zonefs_zone_append_bio_set ;
378446
379- static ssize_t zonefs_file_dio_append (struct kiocb * iocb , struct iov_iter * from )
380- {
381- struct inode * inode = file_inode (iocb -> ki_filp );
382- struct zonefs_zone * z = zonefs_inode_zone (inode );
383- struct block_device * bdev = inode -> i_sb -> s_bdev ;
384- unsigned int max = bdev_max_zone_append_sectors (bdev );
385- pgoff_t start , end ;
386- struct bio * bio ;
387- ssize_t size = 0 ;
388- int nr_pages ;
389- ssize_t ret ;
390-
391- max = ALIGN_DOWN (max << SECTOR_SHIFT , inode -> i_sb -> s_blocksize );
392- iov_iter_truncate (from , max );
393-
394- /*
395- * If the inode block size (zone write granularity) is smaller than the
396- * page size, we may be appending data belonging to the last page of the
397- * inode straddling inode->i_size, with that page already cached due to
398- * a buffered read or readahead. So make sure to invalidate that page.
399- * This will always be a no-op for the case where the block size is
400- * equal to the page size.
401- */
402- start = iocb -> ki_pos >> PAGE_SHIFT ;
403- end = (iocb -> ki_pos + iov_iter_count (from ) - 1 ) >> PAGE_SHIFT ;
404- if (invalidate_inode_pages2_range (inode -> i_mapping , start , end ))
405- return - EBUSY ;
406-
407- nr_pages = iov_iter_npages (from , BIO_MAX_VECS );
408- if (!nr_pages )
409- return 0 ;
410-
411- bio = bio_alloc (bdev , nr_pages ,
412- REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE , GFP_NOFS );
413- bio -> bi_iter .bi_sector = z -> z_sector ;
414- bio -> bi_ioprio = iocb -> ki_ioprio ;
415- if (iocb_is_dsync (iocb ))
416- bio -> bi_opf |= REQ_FUA ;
417-
418- ret = bio_iov_iter_get_pages (bio , from );
419- if (unlikely (ret ))
420- goto out_release ;
421-
422- size = bio -> bi_iter .bi_size ;
423- task_io_account_write (size );
424-
425- if (iocb -> ki_flags & IOCB_HIPRI )
426- bio_set_polled (bio , iocb );
427-
428- ret = submit_bio_wait (bio );
429-
430- /*
431- * If the file zone was written underneath the file system, the zone
432- * write pointer may not be where we expect it to be, but the zone
433- * append write can still succeed. So check manually that we wrote where
434- * we intended to, that is, at zi->i_wpoffset.
435- */
436- if (!ret ) {
437- sector_t wpsector =
438- z -> z_sector + (z -> z_wpoffset >> SECTOR_SHIFT );
439-
440- if (bio -> bi_iter .bi_sector != wpsector ) {
441- zonefs_warn (inode -> i_sb ,
442- "Corrupted write pointer %llu for zone at %llu\n" ,
443- bio -> bi_iter .bi_sector , z -> z_sector );
444- ret = - EIO ;
445- }
446- }
447-
448- zonefs_file_write_dio_end_io (iocb , size , ret , 0 );
449- trace_zonefs_file_dio_append (inode , size , ret );
450-
451- out_release :
452- bio_release_pages (bio , false);
453- bio_put (bio );
454-
455- if (ret >= 0 ) {
456- iocb -> ki_pos += size ;
457- return size ;
458- }
447+ static const struct iomap_dio_ops zonefs_zone_append_dio_ops = {
448+ .submit_io = zonefs_file_zone_append_dio_submit_io ,
449+ .end_io = zonefs_file_write_dio_end_io ,
450+ .bio_set = & zonefs_zone_append_bio_set ,
451+ };
459452
460- return ret ;
461- }
453+ static const struct iomap_dio_ops zonefs_write_dio_ops = {
454+ .end_io = zonefs_file_write_dio_end_io ,
455+ };
462456
463457/*
464458 * Do not exceed the LFS limits nor the file zone size. If pos is under the
@@ -539,6 +533,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
539533 struct zonefs_inode_info * zi = ZONEFS_I (inode );
540534 struct zonefs_zone * z = zonefs_inode_zone (inode );
541535 struct super_block * sb = inode -> i_sb ;
536+ const struct iomap_dio_ops * dio_ops ;
542537 bool sync = is_sync_kiocb (iocb );
543538 bool append = false;
544539 ssize_t ret , count ;
@@ -582,20 +577,26 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
582577 }
583578
584579 if (append ) {
585- ret = zonefs_file_dio_append (iocb , from );
580+ unsigned int max = bdev_max_zone_append_sectors (sb -> s_bdev );
581+
582+ max = ALIGN_DOWN (max << SECTOR_SHIFT , sb -> s_blocksize );
583+ iov_iter_truncate (from , max );
584+
585+ dio_ops = & zonefs_zone_append_dio_ops ;
586586 } else {
587- /*
588- * iomap_dio_rw() may return ENOTBLK if there was an issue with
589- * page invalidation. Overwrite that error code with EBUSY to
590- * be consistent with zonefs_file_dio_append() return value for
591- * similar issues.
592- */
593- ret = iomap_dio_rw (iocb , from , & zonefs_write_iomap_ops ,
594- & zonefs_write_dio_ops , 0 , NULL , 0 );
595- if (ret == - ENOTBLK )
596- ret = - EBUSY ;
587+ dio_ops = & zonefs_write_dio_ops ;
597588 }
598589
590+ /*
591+ * iomap_dio_rw() may return ENOTBLK if there was an issue with
592+ * page invalidation. Overwrite that error code with EBUSY so that
593+ * the user can make sense of the error.
594+ */
595+ ret = iomap_dio_rw (iocb , from , & zonefs_write_iomap_ops ,
596+ dio_ops , 0 , NULL , 0 );
597+ if (ret == - ENOTBLK )
598+ ret = - EBUSY ;
599+
599600 if (zonefs_zone_is_seq (z ) &&
600601 (ret > 0 || ret == - EIOCBQUEUED )) {
601602 if (ret > 0 )
@@ -813,6 +814,7 @@ static int zonefs_file_open(struct inode *inode, struct file *file)
813814{
814815 int ret ;
815816
817+ file -> f_mode |= FMODE_CAN_ODIRECT ;
816818 ret = generic_file_open (inode , file );
817819 if (ret )
818820 return ret ;
@@ -900,3 +902,15 @@ const struct file_operations zonefs_file_operations = {
900902 .splice_write = iter_file_splice_write ,
901903 .iopoll = iocb_bio_iopoll ,
902904};
905+
906+ int zonefs_file_bioset_init (void )
907+ {
908+ return bioset_init (& zonefs_zone_append_bio_set , BIO_POOL_SIZE ,
909+ offsetof(struct zonefs_zone_append_bio , bio ),
910+ BIOSET_NEED_BVECS );
911+ }
912+
913+ void zonefs_file_bioset_exit (void )
914+ {
915+ bioset_exit (& zonefs_zone_append_bio_set );
916+ }
0 commit comments