@@ -1254,6 +1254,136 @@ static int wait_for_concurrent_writes(struct file *file)
12541254 return err ;
12551255}
12561256
1257+ struct nfsd_write_dio_seg {
1258+ struct iov_iter iter ;
1259+ int flags ;
1260+ };
1261+
1262+ static unsigned long
1263+ iov_iter_bvec_offset (const struct iov_iter * iter )
1264+ {
1265+ return (unsigned long )(iter -> bvec -> bv_offset + iter -> iov_offset );
1266+ }
1267+
1268+ static void
1269+ nfsd_write_dio_seg_init (struct nfsd_write_dio_seg * segment ,
1270+ struct bio_vec * bvec , unsigned int nvecs ,
1271+ unsigned long total , size_t start , size_t len ,
1272+ struct kiocb * iocb )
1273+ {
1274+ iov_iter_bvec (& segment -> iter , ITER_SOURCE , bvec , nvecs , total );
1275+ if (start )
1276+ iov_iter_advance (& segment -> iter , start );
1277+ iov_iter_truncate (& segment -> iter , len );
1278+ segment -> flags = iocb -> ki_flags ;
1279+ }
1280+
1281+ static unsigned int
1282+ nfsd_write_dio_iters_init (struct nfsd_file * nf , struct bio_vec * bvec ,
1283+ unsigned int nvecs , struct kiocb * iocb ,
1284+ unsigned long total ,
1285+ struct nfsd_write_dio_seg segments [3 ])
1286+ {
1287+ u32 offset_align = nf -> nf_dio_offset_align ;
1288+ loff_t prefix_end , orig_end , middle_end ;
1289+ u32 mem_align = nf -> nf_dio_mem_align ;
1290+ size_t prefix , middle , suffix ;
1291+ loff_t offset = iocb -> ki_pos ;
1292+ unsigned int nsegs = 0 ;
1293+
1294+ /*
1295+ * Check if direct I/O is feasible for this write request.
1296+ * If alignments are not available, the write is too small,
1297+ * or no alignment can be found, fall back to buffered I/O.
1298+ */
1299+ if (unlikely (!mem_align || !offset_align ) ||
1300+ unlikely (total < max (offset_align , mem_align )))
1301+ goto no_dio ;
1302+
1303+ prefix_end = round_up (offset , offset_align );
1304+ orig_end = offset + total ;
1305+ middle_end = round_down (orig_end , offset_align );
1306+
1307+ prefix = prefix_end - offset ;
1308+ middle = middle_end - prefix_end ;
1309+ suffix = orig_end - middle_end ;
1310+
1311+ if (!middle )
1312+ goto no_dio ;
1313+
1314+ if (prefix )
1315+ nfsd_write_dio_seg_init (& segments [nsegs ++ ], bvec ,
1316+ nvecs , total , 0 , prefix , iocb );
1317+
1318+ nfsd_write_dio_seg_init (& segments [nsegs ], bvec , nvecs ,
1319+ total , prefix , middle , iocb );
1320+
1321+ /*
1322+ * Check if the bvec iterator is aligned for direct I/O.
1323+ *
1324+ * bvecs generated from RPC receive buffers are contiguous: After
1325+ * the first bvec, all subsequent bvecs start at bv_offset zero
1326+ * (page-aligned). Therefore, only the first bvec is checked.
1327+ */
1328+ if (iov_iter_bvec_offset (& segments [nsegs ].iter ) & (mem_align - 1 ))
1329+ goto no_dio ;
1330+ segments [nsegs ].flags |= IOCB_DIRECT ;
1331+ nsegs ++ ;
1332+
1333+ if (suffix )
1334+ nfsd_write_dio_seg_init (& segments [nsegs ++ ], bvec , nvecs , total ,
1335+ prefix + middle , suffix , iocb );
1336+
1337+ return nsegs ;
1338+
1339+ no_dio :
1340+ /* No DIO alignment possible - pack into single non-DIO segment. */
1341+ nfsd_write_dio_seg_init (& segments [0 ], bvec , nvecs , total , 0 ,
1342+ total , iocb );
1343+ return 1 ;
1344+ }
1345+
1346+ static noinline_for_stack int
1347+ nfsd_direct_write (struct svc_rqst * rqstp , struct svc_fh * fhp ,
1348+ struct nfsd_file * nf , unsigned int nvecs ,
1349+ unsigned long * cnt , struct kiocb * kiocb )
1350+ {
1351+ struct nfsd_write_dio_seg segments [3 ];
1352+ struct file * file = nf -> nf_file ;
1353+ unsigned int nsegs , i ;
1354+ ssize_t host_err ;
1355+
1356+ nsegs = nfsd_write_dio_iters_init (nf , rqstp -> rq_bvec , nvecs ,
1357+ kiocb , * cnt , segments );
1358+
1359+ * cnt = 0 ;
1360+ for (i = 0 ; i < nsegs ; i ++ ) {
1361+ kiocb -> ki_flags = segments [i ].flags ;
1362+ if (kiocb -> ki_flags & IOCB_DIRECT )
1363+ trace_nfsd_write_direct (rqstp , fhp , kiocb -> ki_pos ,
1364+ segments [i ].iter .count );
1365+ else {
1366+ trace_nfsd_write_vector (rqstp , fhp , kiocb -> ki_pos ,
1367+ segments [i ].iter .count );
1368+ /*
1369+ * Mark the I/O buffer as evict-able to reduce
1370+ * memory contention.
1371+ */
1372+ if (nf -> nf_file -> f_op -> fop_flags & FOP_DONTCACHE )
1373+ kiocb -> ki_flags |= IOCB_DONTCACHE ;
1374+ }
1375+
1376+ host_err = vfs_iocb_iter_write (file , kiocb , & segments [i ].iter );
1377+ if (host_err < 0 )
1378+ return host_err ;
1379+ * cnt += host_err ;
1380+ if (host_err < segments [i ].iter .count )
1381+ break ; /* partial write */
1382+ }
1383+
1384+ return 0 ;
1385+ }
1386+
12571387/**
12581388 * nfsd_vfs_write - write data to an already-open file
12591389 * @rqstp: RPC execution context
@@ -1328,25 +1458,32 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
13281458 }
13291459
13301460 nvecs = xdr_buf_to_bvec (rqstp -> rq_bvec , rqstp -> rq_maxpages , payload );
1331- iov_iter_bvec ( & iter , ITER_SOURCE , rqstp -> rq_bvec , nvecs , * cnt );
1461+
13321462 since = READ_ONCE (file -> f_wb_err );
13331463 if (verf )
13341464 nfsd_copy_write_verifier (verf , nn );
13351465
13361466 switch (nfsd_io_cache_write ) {
1337- case NFSD_IO_BUFFERED :
1467+ case NFSD_IO_DIRECT :
1468+ host_err = nfsd_direct_write (rqstp , fhp , nf , nvecs ,
1469+ cnt , & kiocb );
13381470 break ;
13391471 case NFSD_IO_DONTCACHE :
13401472 if (file -> f_op -> fop_flags & FOP_DONTCACHE )
13411473 kiocb .ki_flags |= IOCB_DONTCACHE ;
1474+ fallthrough ;
1475+ case NFSD_IO_BUFFERED :
1476+ iov_iter_bvec (& iter , ITER_SOURCE , rqstp -> rq_bvec , nvecs , * cnt );
1477+ host_err = vfs_iocb_iter_write (file , & kiocb , & iter );
1478+ if (host_err < 0 )
1479+ break ;
1480+ * cnt = host_err ;
13421481 break ;
13431482 }
1344- host_err = vfs_iocb_iter_write (file , & kiocb , & iter );
13451483 if (host_err < 0 ) {
13461484 commit_reset_write_verifier (nn , rqstp , host_err );
13471485 goto out_nfserr ;
13481486 }
1349- * cnt = host_err ;
13501487 nfsd_stats_io_write_add (nn , exp , * cnt );
13511488 fsnotify_modify (file );
13521489 host_err = filemap_check_wb_err (file -> f_mapping , since );
0 commit comments