Skip to content

Commit 581512a

Browse files
Arseniy KrasnovPaolo Abeni
authored andcommitted
vsock/virtio: MSG_ZEROCOPY flag support
This adds handling of MSG_ZEROCOPY flag on transmission path: 1) If this flag is set and zerocopy transmission is possible (enabled in socket options and transport allows zerocopy), then non-linear skb will be created and filled with the pages of user's buffer. Pages of user's buffer are locked in memory by 'get_user_pages()'. 2) Replaces way of skb owning: instead of 'skb_set_owner_sk_safe()' it calls 'skb_set_owner_w()'. Reason of this change is that '__zerocopy_sg_from_iter()' increments 'sk_wmem_alloc' of socket, so to decrease this field correctly, proper skb destructor is needed: 'sock_wfree()'. This destructor is set by 'skb_set_owner_w()'. 3) Adds new callback to 'struct virtio_transport': 'can_msgzerocopy'. If this callback is set, then transport needs extra check to be able to send provided number of buffers in zerocopy mode. Currently, the only transport that needs this callback set is virtio, because this transport adds new buffers to the virtio queue and we need to check, that number of these buffers is less than size of the queue (it is required by virtio spec). vhost and loopback transports don't need this check. Signed-off-by: Arseniy Krasnov <avkrasnov@salutedevices.com> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
1 parent 4b0bf10 commit 581512a

4 files changed

Lines changed: 241 additions & 62 deletions

File tree

include/linux/virtio_vsock.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,15 @@ struct virtio_transport {
160160

161161
/* Takes ownership of the packet */
162162
int (*send_pkt)(struct sk_buff *skb);
163+
164+
/* Used in MSG_ZEROCOPY mode. Checks, that provided data
165+
* (number of buffers) could be transmitted with zerocopy
166+
* mode. If this callback is not implemented for the current
167+
* transport - this means that this transport doesn't need
168+
* extra checks and can perform zerocopy transmission by
169+
* default.
170+
*/
171+
bool (*can_msgzerocopy)(int bufs_num);
163172
};
164173

165174
ssize_t

include/trace/events/vsock_virtio_transport_common.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,17 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
4343
__u32 len,
4444
__u16 type,
4545
__u16 op,
46-
__u32 flags
46+
__u32 flags,
47+
bool zcopy
4748
),
4849
TP_ARGS(
4950
src_cid, src_port,
5051
dst_cid, dst_port,
5152
len,
5253
type,
5354
op,
54-
flags
55+
flags,
56+
zcopy
5557
),
5658
TP_STRUCT__entry(
5759
__field(__u32, src_cid)
@@ -62,6 +64,7 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
6264
__field(__u16, type)
6365
__field(__u16, op)
6466
__field(__u32, flags)
67+
__field(bool, zcopy)
6568
),
6669
TP_fast_assign(
6770
__entry->src_cid = src_cid;
@@ -72,14 +75,15 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
7275
__entry->type = type;
7376
__entry->op = op;
7477
__entry->flags = flags;
78+
__entry->zcopy = zcopy;
7579
),
76-
TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x",
80+
TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x zcopy=%s",
7781
__entry->src_cid, __entry->src_port,
7882
__entry->dst_cid, __entry->dst_port,
7983
__entry->len,
8084
show_type(__entry->type),
8185
show_op(__entry->op),
82-
__entry->flags)
86+
__entry->flags, __entry->zcopy ? "true" : "false")
8387
);
8488

8589
TRACE_EVENT(virtio_transport_recv_pkt,

net/vmw_vsock/virtio_transport.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,37 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
455455
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
456456
}
457457

458+
static bool virtio_transport_can_msgzerocopy(int bufs_num)
459+
{
460+
struct virtio_vsock *vsock;
461+
bool res = false;
462+
463+
rcu_read_lock();
464+
465+
vsock = rcu_dereference(the_virtio_vsock);
466+
if (vsock) {
467+
struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];
468+
469+
/* Check that tx queue is large enough to keep whole
470+
* data to send. This is needed, because when there is
471+
* not enough free space in the queue, current skb to
472+
* send will be reinserted to the head of tx list of
473+
* the socket to retry transmission later, so if skb
474+
* is bigger than whole queue, it will be reinserted
475+
* again and again, thus blocking other skbs to be sent.
476+
* Each page of the user provided buffer will be added
477+
* as a single buffer to the tx virtqueue, so compare
478+
* number of pages against maximum capacity of the queue.
479+
*/
480+
if (bufs_num <= vq->num_max)
481+
res = true;
482+
}
483+
484+
rcu_read_unlock();
485+
486+
return res;
487+
}
488+
458489
static bool virtio_transport_seqpacket_allow(u32 remote_cid);
459490

460491
static struct virtio_transport virtio_transport = {
@@ -504,6 +535,7 @@ static struct virtio_transport virtio_transport = {
504535
},
505536

506537
.send_pkt = virtio_transport_send_pkt,
538+
.can_msgzerocopy = virtio_transport_can_msgzerocopy,
507539
};
508540

509541
static bool virtio_transport_seqpacket_allow(u32 remote_cid)

0 commit comments

Comments
 (0)