@@ -74,7 +74,8 @@ static const u64 vhost_net_features[VIRTIO_FEATURES_DWORDS] = {
7474 (1ULL << VHOST_NET_F_VIRTIO_NET_HDR ) |
7575 (1ULL << VIRTIO_NET_F_MRG_RXBUF ) |
7676 (1ULL << VIRTIO_F_ACCESS_PLATFORM ) |
77- (1ULL << VIRTIO_F_RING_RESET ),
77+ (1ULL << VIRTIO_F_RING_RESET ) |
78+ (1ULL << VIRTIO_F_IN_ORDER ),
7879 VIRTIO_BIT (VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO ) |
7980 VIRTIO_BIT (VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO ),
8081};
@@ -376,7 +377,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
376377 while (j ) {
377378 add = min (UIO_MAXIOV - nvq -> done_idx , j );
378379 vhost_add_used_and_signal_n (vq -> dev , vq ,
379- & vq -> heads [nvq -> done_idx ], add );
380+ & vq -> heads [nvq -> done_idx ],
381+ NULL , add );
380382 nvq -> done_idx = (nvq -> done_idx + add ) % UIO_MAXIOV ;
381383 j -= add ;
382384 }
@@ -451,15 +453,17 @@ static int vhost_net_enable_vq(struct vhost_net *n,
451453 return vhost_poll_start (poll , sock -> file );
452454}
453455
454- static void vhost_net_signal_used (struct vhost_net_virtqueue * nvq )
456+ static void vhost_net_signal_used (struct vhost_net_virtqueue * nvq ,
457+ unsigned int count )
455458{
456459 struct vhost_virtqueue * vq = & nvq -> vq ;
457460 struct vhost_dev * dev = vq -> dev ;
458461
459462 if (!nvq -> done_idx )
460463 return ;
461464
462- vhost_add_used_and_signal_n (dev , vq , vq -> heads , nvq -> done_idx );
465+ vhost_add_used_and_signal_n (dev , vq , vq -> heads ,
466+ vq -> nheads , count );
463467 nvq -> done_idx = 0 ;
464468}
465469
@@ -468,13 +472,20 @@ static void vhost_tx_batch(struct vhost_net *net,
468472 struct socket * sock ,
469473 struct msghdr * msghdr )
470474{
475+ struct vhost_virtqueue * vq = & nvq -> vq ;
476+ bool in_order = vhost_has_feature (vq , VIRTIO_F_IN_ORDER );
471477 struct tun_msg_ctl ctl = {
472478 .type = TUN_MSG_PTR ,
473479 .num = nvq -> batched_xdp ,
474480 .ptr = nvq -> xdp ,
475481 };
476482 int i , err ;
477483
484+ if (in_order ) {
485+ vq -> heads [0 ].len = 0 ;
486+ vq -> nheads [0 ] = nvq -> done_idx ;
487+ }
488+
478489 if (nvq -> batched_xdp == 0 )
479490 goto signal_used ;
480491
@@ -496,7 +507,7 @@ static void vhost_tx_batch(struct vhost_net *net,
496507 }
497508
498509signal_used :
499- vhost_net_signal_used (nvq );
510+ vhost_net_signal_used (nvq , in_order ? 1 : nvq -> done_idx );
500511 nvq -> batched_xdp = 0 ;
501512}
502513
@@ -750,6 +761,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
750761 int sent_pkts = 0 ;
751762 bool sock_can_batch = (sock -> sk -> sk_sndbuf == INT_MAX );
752763 bool busyloop_intr ;
764+ bool in_order = vhost_has_feature (vq , VIRTIO_F_IN_ORDER );
753765
754766 do {
755767 busyloop_intr = false;
@@ -786,11 +798,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
786798 break ;
787799 }
788800
789- /* We can't build XDP buff, go for single
790- * packet path but let's flush batched
791- * packets.
792- */
793- vhost_tx_batch (net , nvq , sock , & msg );
801+ if (nvq -> batched_xdp ) {
802+ /* We can't build XDP buff, go for single
803+ * packet path but let's flush batched
804+ * packets.
805+ */
806+ vhost_tx_batch (net , nvq , sock , & msg );
807+ }
794808 msg .msg_control = NULL ;
795809 } else {
796810 if (tx_can_batch (vq , total_len ))
@@ -811,8 +825,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
811825 pr_debug ("Truncated TX packet: len %d != %zd\n" ,
812826 err , len );
813827done :
814- vq -> heads [nvq -> done_idx ].id = cpu_to_vhost32 (vq , head );
815- vq -> heads [nvq -> done_idx ].len = 0 ;
828+ if (in_order ) {
829+ vq -> heads [0 ].id = cpu_to_vhost32 (vq , head );
830+ } else {
831+ vq -> heads [nvq -> done_idx ].id = cpu_to_vhost32 (vq , head );
832+ vq -> heads [nvq -> done_idx ].len = 0 ;
833+ }
816834 ++ nvq -> done_idx ;
817835 } while (likely (!vhost_exceeds_weight (vq , ++ sent_pkts , total_len )));
818836
@@ -991,7 +1009,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
9911009}
9921010
9931011static int vhost_net_rx_peek_head_len (struct vhost_net * net , struct sock * sk ,
994- bool * busyloop_intr )
1012+ bool * busyloop_intr , unsigned int count )
9951013{
9961014 struct vhost_net_virtqueue * rnvq = & net -> vqs [VHOST_NET_VQ_RX ];
9971015 struct vhost_net_virtqueue * tnvq = & net -> vqs [VHOST_NET_VQ_TX ];
@@ -1001,7 +1019,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
10011019
10021020 if (!len && rvq -> busyloop_timeout ) {
10031021 /* Flush batched heads first */
1004- vhost_net_signal_used (rnvq );
1022+ vhost_net_signal_used (rnvq , count );
10051023 /* Both tx vq and rx socket were polled here */
10061024 vhost_net_busy_poll (net , rvq , tvq , busyloop_intr , true);
10071025
@@ -1013,22 +1031,25 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
10131031
10141032/* This is a multi-buffer version of vhost_get_desc, that works if
10151033 * vq has read descriptors only.
1016- * @vq - the relevant virtqueue
1034+ * @nvq - the relevant vhost_net virtqueue
10171035 * @datalen - data length we'll be reading
10181036 * @iovcount - returned count of io vectors we fill
10191037 * @log - vhost log
10201038 * @log_num - log offset
10211039 * @quota - headcount quota, 1 for big buffer
10221040 * returns number of buffer heads allocated, negative on error
10231041 */
1024- static int get_rx_bufs (struct vhost_virtqueue * vq ,
1042+ static int get_rx_bufs (struct vhost_net_virtqueue * nvq ,
10251043 struct vring_used_elem * heads ,
1044+ u16 * nheads ,
10261045 int datalen ,
10271046 unsigned * iovcount ,
10281047 struct vhost_log * log ,
10291048 unsigned * log_num ,
10301049 unsigned int quota )
10311050{
1051+ struct vhost_virtqueue * vq = & nvq -> vq ;
1052+ bool in_order = vhost_has_feature (vq , VIRTIO_F_IN_ORDER );
10321053 unsigned int out , in ;
10331054 int seg = 0 ;
10341055 int headcount = 0 ;
@@ -1065,14 +1086,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
10651086 nlogs += * log_num ;
10661087 log += * log_num ;
10671088 }
1068- heads [headcount ].id = cpu_to_vhost32 (vq , d );
10691089 len = iov_length (vq -> iov + seg , in );
1070- heads [headcount ].len = cpu_to_vhost32 (vq , len );
1071- datalen -= len ;
1090+ if (!in_order ) {
1091+ heads [headcount ].id = cpu_to_vhost32 (vq , d );
1092+ heads [headcount ].len = cpu_to_vhost32 (vq , len );
1093+ }
10721094 ++ headcount ;
1095+ datalen -= len ;
10731096 seg += in ;
10741097 }
1075- heads [ headcount - 1 ]. len = cpu_to_vhost32 ( vq , len + datalen );
1098+
10761099 * iovcount = seg ;
10771100 if (unlikely (log ))
10781101 * log_num = nlogs ;
@@ -1082,6 +1105,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
10821105 r = UIO_MAXIOV + 1 ;
10831106 goto err ;
10841107 }
1108+
1109+ if (!in_order )
1110+ heads [headcount - 1 ].len = cpu_to_vhost32 (vq , len + datalen );
1111+ else {
1112+ heads [0 ].len = cpu_to_vhost32 (vq , len + datalen );
1113+ heads [0 ].id = cpu_to_vhost32 (vq , d );
1114+ nheads [0 ] = headcount ;
1115+ }
1116+
10851117 return headcount ;
10861118err :
10871119 vhost_discard_vq_desc (vq , headcount );
@@ -1094,6 +1126,8 @@ static void handle_rx(struct vhost_net *net)
10941126{
10951127 struct vhost_net_virtqueue * nvq = & net -> vqs [VHOST_NET_VQ_RX ];
10961128 struct vhost_virtqueue * vq = & nvq -> vq ;
1129+ bool in_order = vhost_has_feature (vq , VIRTIO_F_IN_ORDER );
1130+ unsigned int count = 0 ;
10971131 unsigned in , log ;
10981132 struct vhost_log * vq_log ;
10991133 struct msghdr msg = {
@@ -1141,12 +1175,13 @@ static void handle_rx(struct vhost_net *net)
11411175
11421176 do {
11431177 sock_len = vhost_net_rx_peek_head_len (net , sock -> sk ,
1144- & busyloop_intr );
1178+ & busyloop_intr , count );
11451179 if (!sock_len )
11461180 break ;
11471181 sock_len += sock_hlen ;
11481182 vhost_len = sock_len + vhost_hlen ;
1149- headcount = get_rx_bufs (vq , vq -> heads + nvq -> done_idx ,
1183+ headcount = get_rx_bufs (nvq , vq -> heads + count ,
1184+ vq -> nheads + count ,
11501185 vhost_len , & in , vq_log , & log ,
11511186 likely (mergeable ) ? UIO_MAXIOV : 1 );
11521187 /* On error, stop handling until the next kick. */
@@ -1222,8 +1257,11 @@ static void handle_rx(struct vhost_net *net)
12221257 goto out ;
12231258 }
12241259 nvq -> done_idx += headcount ;
1225- if (nvq -> done_idx > VHOST_NET_BATCH )
1226- vhost_net_signal_used (nvq );
1260+ count += in_order ? 1 : headcount ;
1261+ if (nvq -> done_idx > VHOST_NET_BATCH ) {
1262+ vhost_net_signal_used (nvq , count );
1263+ count = 0 ;
1264+ }
12271265 if (unlikely (vq_log ))
12281266 vhost_log_write (vq , vq_log , log , vhost_len ,
12291267 vq -> iov , in );
@@ -1235,7 +1273,7 @@ static void handle_rx(struct vhost_net *net)
12351273 else if (!sock_len )
12361274 vhost_net_enable_vq (net , vq );
12371275out :
1238- vhost_net_signal_used (nvq );
1276+ vhost_net_signal_used (nvq , count );
12391277 mutex_unlock (& vq -> mutex );
12401278}
12411279
0 commit comments