@@ -342,6 +342,77 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
342342 return generic_file_llseek_size (file , offset , whence , isize , isize );
343343}
344344
345+ struct zonefs_zone_append_bio {
346+ /* The target inode of the BIO */
347+ struct inode * inode ;
348+
349+ /* For sync writes, the target append write offset */
350+ u64 append_offset ;
351+
352+ /*
353+ * This member must come last, bio_alloc_bioset will allocate enough
354+ * bytes for entire zonefs_bio but relies on bio being last.
355+ */
356+ struct bio bio ;
357+ };
358+
359+ static inline struct zonefs_zone_append_bio *
360+ zonefs_zone_append_bio (struct bio * bio )
361+ {
362+ return container_of (bio , struct zonefs_zone_append_bio , bio );
363+ }
364+
365+ static void zonefs_file_zone_append_dio_bio_end_io (struct bio * bio )
366+ {
367+ struct zonefs_zone_append_bio * za_bio = zonefs_zone_append_bio (bio );
368+ struct zonefs_zone * z = zonefs_inode_zone (za_bio -> inode );
369+ sector_t za_sector ;
370+
371+ if (bio -> bi_status != BLK_STS_OK )
372+ goto bio_end ;
373+
374+ /*
375+ * If the file zone was written underneath the file system, the zone
376+ * append operation can still succedd (if the zone is not full) but
377+ * the write append location will not be where we expect it to be.
378+ * Check that we wrote where we intended to, that is, at z->z_wpoffset.
379+ */
380+ za_sector = z -> z_sector + (za_bio -> append_offset >> SECTOR_SHIFT );
381+ if (bio -> bi_iter .bi_sector != za_sector ) {
382+ zonefs_warn (za_bio -> inode -> i_sb ,
383+ "Invalid write sector %llu for zone at %llu\n" ,
384+ bio -> bi_iter .bi_sector , z -> z_sector );
385+ bio -> bi_status = BLK_STS_IOERR ;
386+ }
387+
388+ bio_end :
389+ iomap_dio_bio_end_io (bio );
390+ }
391+
392+ static void zonefs_file_zone_append_dio_submit_io (const struct iomap_iter * iter ,
393+ struct bio * bio ,
394+ loff_t file_offset )
395+ {
396+ struct zonefs_zone_append_bio * za_bio = zonefs_zone_append_bio (bio );
397+ struct inode * inode = iter -> inode ;
398+ struct zonefs_zone * z = zonefs_inode_zone (inode );
399+
400+ /*
401+ * Issue a zone append BIO to process sync dio writes. The append
402+ * file offset is saved to check the zone append write location
403+ * on completion of the BIO.
404+ */
405+ za_bio -> inode = inode ;
406+ za_bio -> append_offset = file_offset ;
407+
408+ bio -> bi_opf &= ~REQ_OP_WRITE ;
409+ bio -> bi_opf |= REQ_OP_ZONE_APPEND ;
410+ bio -> bi_iter .bi_sector = z -> z_sector ;
411+ bio -> bi_end_io = zonefs_file_zone_append_dio_bio_end_io ;
412+
413+ submit_bio (bio );
414+ }
415+
345416static int zonefs_file_write_dio_end_io (struct kiocb * iocb , ssize_t size ,
346417 int error , unsigned int flags )
347418{
@@ -372,93 +443,17 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
372443 return 0 ;
373444}
374445
375- static const struct iomap_dio_ops zonefs_write_dio_ops = {
376- .end_io = zonefs_file_write_dio_end_io ,
377- };
446+ static struct bio_set zonefs_zone_append_bio_set ;
378447
379- static ssize_t zonefs_file_dio_append (struct kiocb * iocb , struct iov_iter * from )
380- {
381- struct inode * inode = file_inode (iocb -> ki_filp );
382- struct zonefs_zone * z = zonefs_inode_zone (inode );
383- struct block_device * bdev = inode -> i_sb -> s_bdev ;
384- unsigned int max = bdev_max_zone_append_sectors (bdev );
385- pgoff_t start , end ;
386- struct bio * bio ;
387- ssize_t size = 0 ;
388- int nr_pages ;
389- ssize_t ret ;
390-
391- max = ALIGN_DOWN (max << SECTOR_SHIFT , inode -> i_sb -> s_blocksize );
392- iov_iter_truncate (from , max );
393-
394- /*
395- * If the inode block size (zone write granularity) is smaller than the
396- * page size, we may be appending data belonging to the last page of the
397- * inode straddling inode->i_size, with that page already cached due to
398- * a buffered read or readahead. So make sure to invalidate that page.
399- * This will always be a no-op for the case where the block size is
400- * equal to the page size.
401- */
402- start = iocb -> ki_pos >> PAGE_SHIFT ;
403- end = (iocb -> ki_pos + iov_iter_count (from ) - 1 ) >> PAGE_SHIFT ;
404- if (invalidate_inode_pages2_range (inode -> i_mapping , start , end ))
405- return - EBUSY ;
406-
407- nr_pages = iov_iter_npages (from , BIO_MAX_VECS );
408- if (!nr_pages )
409- return 0 ;
410-
411- bio = bio_alloc (bdev , nr_pages ,
412- REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE , GFP_NOFS );
413- bio -> bi_iter .bi_sector = z -> z_sector ;
414- bio -> bi_ioprio = iocb -> ki_ioprio ;
415- if (iocb_is_dsync (iocb ))
416- bio -> bi_opf |= REQ_FUA ;
417-
418- ret = bio_iov_iter_get_pages (bio , from );
419- if (unlikely (ret ))
420- goto out_release ;
421-
422- size = bio -> bi_iter .bi_size ;
423- task_io_account_write (size );
424-
425- if (iocb -> ki_flags & IOCB_HIPRI )
426- bio_set_polled (bio , iocb );
427-
428- ret = submit_bio_wait (bio );
429-
430- /*
431- * If the file zone was written underneath the file system, the zone
432- * write pointer may not be where we expect it to be, but the zone
433- * append write can still succeed. So check manually that we wrote where
434- * we intended to, that is, at zi->i_wpoffset.
435- */
436- if (!ret ) {
437- sector_t wpsector =
438- z -> z_sector + (z -> z_wpoffset >> SECTOR_SHIFT );
439-
440- if (bio -> bi_iter .bi_sector != wpsector ) {
441- zonefs_warn (inode -> i_sb ,
442- "Corrupted write pointer %llu for zone at %llu\n" ,
443- bio -> bi_iter .bi_sector , z -> z_sector );
444- ret = - EIO ;
445- }
446- }
447-
448- zonefs_file_write_dio_end_io (iocb , size , ret , 0 );
449- trace_zonefs_file_dio_append (inode , size , ret );
450-
451- out_release :
452- bio_release_pages (bio , false);
453- bio_put (bio );
454-
455- if (ret >= 0 ) {
456- iocb -> ki_pos += size ;
457- return size ;
458- }
448+ static const struct iomap_dio_ops zonefs_zone_append_dio_ops = {
449+ .submit_io = zonefs_file_zone_append_dio_submit_io ,
450+ .end_io = zonefs_file_write_dio_end_io ,
451+ .bio_set = & zonefs_zone_append_bio_set ,
452+ };
459453
460- return ret ;
461- }
454+ static const struct iomap_dio_ops zonefs_write_dio_ops = {
455+ .end_io = zonefs_file_write_dio_end_io ,
456+ };
462457
463458/*
464459 * Do not exceed the LFS limits nor the file zone size. If pos is under the
@@ -539,6 +534,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
539534 struct zonefs_inode_info * zi = ZONEFS_I (inode );
540535 struct zonefs_zone * z = zonefs_inode_zone (inode );
541536 struct super_block * sb = inode -> i_sb ;
537+ const struct iomap_dio_ops * dio_ops ;
542538 bool sync = is_sync_kiocb (iocb );
543539 bool append = false;
544540 ssize_t ret , count ;
@@ -582,20 +578,26 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
582578 }
583579
584580 if (append ) {
585- ret = zonefs_file_dio_append (iocb , from );
581+ unsigned int max = bdev_max_zone_append_sectors (sb -> s_bdev );
582+
583+ max = ALIGN_DOWN (max << SECTOR_SHIFT , sb -> s_blocksize );
584+ iov_iter_truncate (from , max );
585+
586+ dio_ops = & zonefs_zone_append_dio_ops ;
586587 } else {
587- /*
588- * iomap_dio_rw() may return ENOTBLK if there was an issue with
589- * page invalidation. Overwrite that error code with EBUSY to
590- * be consistent with zonefs_file_dio_append() return value for
591- * similar issues.
592- */
593- ret = iomap_dio_rw (iocb , from , & zonefs_write_iomap_ops ,
594- & zonefs_write_dio_ops , 0 , NULL , 0 );
595- if (ret == - ENOTBLK )
596- ret = - EBUSY ;
588+ dio_ops = & zonefs_write_dio_ops ;
597589 }
598590
591+ /*
592+ * iomap_dio_rw() may return ENOTBLK if there was an issue with
593+ * page invalidation. Overwrite that error code with EBUSY so that
594+ * the user can make sense of the error.
595+ */
596+ ret = iomap_dio_rw (iocb , from , & zonefs_write_iomap_ops ,
597+ dio_ops , 0 , NULL , 0 );
598+ if (ret == - ENOTBLK )
599+ ret = - EBUSY ;
600+
599601 if (zonefs_zone_is_seq (z ) &&
600602 (ret > 0 || ret == - EIOCBQUEUED )) {
601603 if (ret > 0 )
@@ -900,3 +902,15 @@ const struct file_operations zonefs_file_operations = {
900902 .splice_write = iter_file_splice_write ,
901903 .iopoll = iocb_bio_iopoll ,
902904};
905+
906+ int zonefs_file_bioset_init (void )
907+ {
908+ return bioset_init (& zonefs_zone_append_bio_set , BIO_POOL_SIZE ,
909+ offsetof(struct zonefs_zone_append_bio , bio ),
910+ BIOSET_NEED_BVECS );
911+ }
912+
913+ void zonefs_file_bioset_exit (void )
914+ {
915+ bioset_exit (& zonefs_zone_append_bio_set );
916+ }
0 commit comments