1414 RDMA_RW_MULTI_WR ,
1515 RDMA_RW_MR ,
1616 RDMA_RW_SIG_MR ,
17+ RDMA_RW_IOVA ,
1718};
1819
1920static bool rdma_rw_force_mr ;
@@ -383,6 +384,87 @@ static int rdma_rw_init_map_wrs_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
383384 return - ENOMEM ;
384385}
385386
387+ /*
388+ * Try to use the two-step IOVA API to map bvecs into a contiguous DMA range.
389+ * This reduces IOTLB sync overhead by doing one sync at the end instead of
390+ * one per bvec, and produces a contiguous DMA address range that can be
391+ * described by a single SGE.
392+ *
393+ * Returns the number of WQEs (always 1) on success, -EOPNOTSUPP if IOVA
394+ * mapping is not available, or another negative error code on failure.
395+ */
396+ static int rdma_rw_init_iova_wrs_bvec (struct rdma_rw_ctx * ctx ,
397+ struct ib_qp * qp , const struct bio_vec * bvec ,
398+ struct bvec_iter * iter , u64 remote_addr , u32 rkey ,
399+ enum dma_data_direction dir )
400+ {
401+ struct ib_device * dev = qp -> pd -> device ;
402+ struct device * dma_dev = dev -> dma_device ;
403+ size_t total_len = iter -> bi_size ;
404+ struct bio_vec first_bv ;
405+ size_t mapped_len = 0 ;
406+ int ret ;
407+
408+ /* Virtual DMA devices cannot support IOVA allocators */
409+ if (ib_uses_virt_dma (dev ))
410+ return - EOPNOTSUPP ;
411+
412+ /* Try to allocate contiguous IOVA space */
413+ first_bv = mp_bvec_iter_bvec (bvec , * iter );
414+ if (!dma_iova_try_alloc (dma_dev , & ctx -> iova .state ,
415+ bvec_phys (& first_bv ), total_len ))
416+ return - EOPNOTSUPP ;
417+
418+ /* Link all bvecs into the IOVA space */
419+ while (iter -> bi_size ) {
420+ struct bio_vec bv = mp_bvec_iter_bvec (bvec , * iter );
421+
422+ ret = dma_iova_link (dma_dev , & ctx -> iova .state , bvec_phys (& bv ),
423+ mapped_len , bv .bv_len , dir , 0 );
424+ if (ret )
425+ goto out_destroy ;
426+
427+ mapped_len += bv .bv_len ;
428+ bvec_iter_advance (bvec , iter , bv .bv_len );
429+ }
430+
431+ /* Sync the IOTLB once for all linked pages */
432+ ret = dma_iova_sync (dma_dev , & ctx -> iova .state , 0 , mapped_len );
433+ if (ret )
434+ goto out_destroy ;
435+
436+ ctx -> iova .mapped_len = mapped_len ;
437+
438+ /* Single SGE covers the entire contiguous IOVA range */
439+ ctx -> iova .sge .addr = ctx -> iova .state .addr ;
440+ ctx -> iova .sge .length = mapped_len ;
441+ ctx -> iova .sge .lkey = qp -> pd -> local_dma_lkey ;
442+
443+ /* Single WR for the whole transfer */
444+ memset (& ctx -> iova .wr , 0 , sizeof (ctx -> iova .wr ));
445+ if (dir == DMA_TO_DEVICE )
446+ ctx -> iova .wr .wr .opcode = IB_WR_RDMA_WRITE ;
447+ else
448+ ctx -> iova .wr .wr .opcode = IB_WR_RDMA_READ ;
449+ ctx -> iova .wr .wr .num_sge = 1 ;
450+ ctx -> iova .wr .wr .sg_list = & ctx -> iova .sge ;
451+ ctx -> iova .wr .remote_addr = remote_addr ;
452+ ctx -> iova .wr .rkey = rkey ;
453+
454+ ctx -> type = RDMA_RW_IOVA ;
455+ ctx -> nr_ops = 1 ;
456+ return 1 ;
457+
458+ out_destroy :
459+ /*
460+ * dma_iova_destroy() expects the actual mapped length, not the
461+ * total allocation size. It unlinks only the successfully linked
462+ * range and frees the entire IOVA allocation.
463+ */
464+ dma_iova_destroy (dma_dev , & ctx -> iova .state , mapped_len , dir , 0 );
465+ return ret ;
466+ }
467+
386468/**
387469 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
388470 * @ctx: context to initialize
@@ -485,6 +567,8 @@ int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
485567 struct bvec_iter iter , u64 remote_addr , u32 rkey ,
486568 enum dma_data_direction dir )
487569{
570+ int ret ;
571+
488572 if (nr_bvec == 0 || iter .bi_size == 0 )
489573 return - EINVAL ;
490574
@@ -495,6 +579,16 @@ int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
495579 if (nr_bvec == 1 )
496580 return rdma_rw_init_single_wr_bvec (ctx , qp , bvecs , & iter ,
497581 remote_addr , rkey , dir );
582+
583+ /*
584+ * Try IOVA-based mapping first for multi-bvec transfers.
585+ * This reduces IOTLB sync overhead by batching all mappings.
586+ */
587+ ret = rdma_rw_init_iova_wrs_bvec (ctx , qp , bvecs , & iter , remote_addr ,
588+ rkey , dir );
589+ if (ret != - EOPNOTSUPP )
590+ return ret ;
591+
498592 return rdma_rw_init_map_wrs_bvec (ctx , qp , bvecs , nr_bvec , & iter ,
499593 remote_addr , rkey , dir );
500594}
@@ -671,6 +765,10 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
671765 first_wr = & ctx -> reg [0 ].reg_wr .wr ;
672766 last_wr = & ctx -> reg [ctx -> nr_ops - 1 ].wr .wr ;
673767 break ;
768+ case RDMA_RW_IOVA :
769+ first_wr = & ctx -> iova .wr .wr ;
770+ last_wr = & ctx -> iova .wr .wr ;
771+ break ;
674772 case RDMA_RW_MULTI_WR :
675773 first_wr = & ctx -> map .wrs [0 ].wr ;
676774 last_wr = & ctx -> map .wrs [ctx -> nr_ops - 1 ].wr ;
@@ -745,6 +843,10 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
745843 break ;
746844 case RDMA_RW_SINGLE_WR :
747845 break ;
846+ case RDMA_RW_IOVA :
847+ /* IOVA contexts must use rdma_rw_ctx_destroy_bvec() */
848+ WARN_ON_ONCE (1 );
849+ return ;
748850 default :
749851 BUG ();
750852 break ;
@@ -778,6 +880,10 @@ void rdma_rw_ctx_destroy_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
778880 u32 i ;
779881
780882 switch (ctx -> type ) {
883+ case RDMA_RW_IOVA :
884+ dma_iova_destroy (dev -> dma_device , & ctx -> iova .state ,
885+ ctx -> iova .mapped_len , dir , 0 );
886+ break ;
781887 case RDMA_RW_MULTI_WR :
782888 for (i = 0 ; i < nr_bvec ; i ++ )
783889 ib_dma_unmap_bvec (dev , ctx -> map .sges [i ].addr ,
0 commit comments