Skip to content

Commit 0345552

Browse files
edumazetkuba-moo
authored andcommitted
net_sched: limit try_bulk_dequeue_skb() batches
After commit 100dfa7 ("inet: dev_queue_xmit() llist adoption") I started seeing many qdisc requeues on IDPF under high TX workload. $ tc -s qd sh dev eth1 handle 1: ; sleep 1; tc -s qd sh dev eth1 handle 1: qdisc mq 1: root Sent 43534617319319 bytes 268186451819 pkt (dropped 0, overlimits 0 requeues 3532840114) backlog 1056Kb 6675p requeues 3532840114 qdisc mq 1: root Sent 43554665866695 bytes 268309964788 pkt (dropped 0, overlimits 0 requeues 3537737653) backlog 781164b 4822p requeues 3537737653 This is caused by try_bulk_dequeue_skb() being only limited by BQL budget. perf record -C120-239 -e qdisc:qdisc_dequeue sleep 1 ; perf script ... netperf 75332 [146] 2711.138269: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1292 skbaddr=0xff378005a1e9f200 netperf 75332 [146] 2711.138953: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1213 skbaddr=0xff378004d607a500 netperf 75330 [144] 2711.139631: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1233 skbaddr=0xff3780046be20100 netperf 75333 [147] 2711.140356: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1093 skbaddr=0xff37800514845b00 netperf 75337 [151] 2711.141037: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1353 skbaddr=0xff37800460753300 netperf 75337 [151] 2711.141877: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1367 skbaddr=0xff378004e72c7b00 netperf 75330 [144] 2711.142643: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1202 skbaddr=0xff3780045bd60000 ... This is bad because : 1) Large batches hold one victim cpu for a very long time. 2) Driver often hit their own TX ring limit (all slots are used). 3) We call dev_requeue_skb() 4) Requeues are using a FIFO (q->gso_skb), breaking qdisc ability to implement FQ or priority scheduling. 5) dequeue_skb() gets packets from q->gso_skb one skb at a time with no xmit_more support. This is causing many spinlock games between the qdisc and the device driver. Requeues were supposed to be very rare, lets keep them this way. Limit batch sizes to /proc/sys/net/core/dev_weight (default 64) as __qdisc_run() was designed to use. Fixes: 5772e9a ("qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE") Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Acked-by: Jesper Dangaard Brouer <hawk@kernel.org> Link: https://patch.msgid.link/20251109161215.2574081-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 7a6fa4f commit 0345552

1 file changed

Lines changed: 10 additions & 7 deletions

File tree

net/sched/sch_generic.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,10 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
180180
static void try_bulk_dequeue_skb(struct Qdisc *q,
181181
struct sk_buff *skb,
182182
const struct netdev_queue *txq,
183-
int *packets)
183+
int *packets, int budget)
184184
{
185185
int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
186+
int cnt = 0;
186187

187188
while (bytelimit > 0) {
188189
struct sk_buff *nskb = q->dequeue(q);
@@ -193,8 +194,10 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
193194
bytelimit -= nskb->len; /* covers GSO len */
194195
skb->next = nskb;
195196
skb = nskb;
196-
(*packets)++; /* GSO counts as one pkt */
197+
if (++cnt >= budget)
198+
break;
197199
}
200+
(*packets) += cnt;
198201
skb_mark_not_on_list(skb);
199202
}
200203

@@ -228,7 +231,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
228231
* A requeued skb (via q->gso_skb) can also be a SKB list.
229232
*/
230233
static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
231-
int *packets)
234+
int *packets, int budget)
232235
{
233236
const struct netdev_queue *txq = q->dev_queue;
234237
struct sk_buff *skb = NULL;
@@ -295,7 +298,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
295298
if (skb) {
296299
bulk:
297300
if (qdisc_may_bulk(q))
298-
try_bulk_dequeue_skb(q, skb, txq, packets);
301+
try_bulk_dequeue_skb(q, skb, txq, packets, budget);
299302
else
300303
try_bulk_dequeue_skb_slow(q, skb, packets);
301304
}
@@ -387,7 +390,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
387390
* >0 - queue is not empty.
388391
*
389392
*/
390-
static inline bool qdisc_restart(struct Qdisc *q, int *packets)
393+
static inline bool qdisc_restart(struct Qdisc *q, int *packets, int budget)
391394
{
392395
spinlock_t *root_lock = NULL;
393396
struct netdev_queue *txq;
@@ -396,7 +399,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
396399
bool validate;
397400

398401
/* Dequeue packet */
399-
skb = dequeue_skb(q, &validate, packets);
402+
skb = dequeue_skb(q, &validate, packets, budget);
400403
if (unlikely(!skb))
401404
return false;
402405

@@ -414,7 +417,7 @@ void __qdisc_run(struct Qdisc *q)
414417
int quota = READ_ONCE(net_hotdata.dev_tx_weight);
415418
int packets;
416419

417-
while (qdisc_restart(q, &packets)) {
420+
while (qdisc_restart(q, &packets, quota)) {
418421
quota -= packets;
419422
if (quota <= 0) {
420423
if (q->flags & TCQ_F_NOLOCK)

0 commit comments

Comments
 (0)