Skip to content

Commit 9a5e077

Browse files
shailend-gdavem330
authored andcommitted
gve: Avoid rescheduling napi if on wrong cpu
In order to make possible the implementation of per-queue ndo hooks, gve_turnup was changed in a previous patch to account for queues already having some unprocessed descriptors: it does a one-off napi_schdule to handle them. If conditions of consistent high traffic persist in the immediate aftermath of this, the poll routine for a queue can be "stuck" on the cpu on which the ndo hooks ran, instead of the cpu its irq has affinity with. This situation is exacerbated by the fact that the ndo hooks for all the queues are invoked on the same cpu, potentially causing all the napi poll routines to be residing on the same cpu. A self correcting mechanism in the poll method itself solves this problem. Tested-by: Mina Almasry <almasrymina@google.com> Reviewed-by: Praveen Kaligineedi <pkaligineedi@google.com> Reviewed-by: Harshitha Ramamurthy <hramamurthy@google.com> Signed-off-by: Shailend Chand <shailend@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 864616d commit 9a5e077

2 files changed

Lines changed: 32 additions & 2 deletions

File tree

drivers/net/ethernet/google/gve/gve.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ struct gve_notify_block {
610610
struct gve_priv *priv;
611611
struct gve_tx_ring *tx; /* tx rings on this block */
612612
struct gve_rx_ring *rx; /* rx rings on this block */
613+
u32 irq;
613614
};
614615

615616
/* Tracks allowed and current queue settings */

drivers/net/ethernet/google/gve/gve_main.c

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/etherdevice.h>
1010
#include <linux/filter.h>
1111
#include <linux/interrupt.h>
12+
#include <linux/irq.h>
1213
#include <linux/module.h>
1314
#include <linux/pci.h>
1415
#include <linux/sched.h>
@@ -253,6 +254,18 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg)
253254
return IRQ_HANDLED;
254255
}
255256

257+
static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
258+
{
259+
int cpu_curr = smp_processor_id();
260+
const struct cpumask *aff_mask;
261+
262+
aff_mask = irq_get_effective_affinity_mask(irq);
263+
if (unlikely(!aff_mask))
264+
return 1;
265+
266+
return cpumask_test_cpu(cpu_curr, aff_mask);
267+
}
268+
256269
int gve_napi_poll(struct napi_struct *napi, int budget)
257270
{
258271
struct gve_notify_block *block;
@@ -322,8 +335,21 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
322335
reschedule |= work_done == budget;
323336
}
324337

325-
if (reschedule)
326-
return budget;
338+
if (reschedule) {
339+
/* Reschedule by returning budget only if already on the correct
340+
* cpu.
341+
*/
342+
if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
343+
return budget;
344+
345+
/* If not on the cpu with which this queue's irq has affinity
346+
* with, we avoid rescheduling napi and arm the irq instead so
347+
* that napi gets rescheduled back eventually onto the right
348+
* cpu.
349+
*/
350+
if (work_done == budget)
351+
work_done--;
352+
}
327353

328354
if (likely(napi_complete_done(napi, work_done))) {
329355
/* Enable interrupts again.
@@ -428,6 +454,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
428454
"Failed to receive msix vector %d\n", i);
429455
goto abort_with_some_ntfy_blocks;
430456
}
457+
block->irq = priv->msix_vectors[msix_idx].vector;
431458
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
432459
get_cpu_mask(i % active_cpus));
433460
block->irq_db_index = &priv->irq_db_indices[i].index;
@@ -441,6 +468,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
441468
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
442469
NULL);
443470
free_irq(priv->msix_vectors[msix_idx].vector, block);
471+
block->irq = 0;
444472
}
445473
kvfree(priv->ntfy_blocks);
446474
priv->ntfy_blocks = NULL;
@@ -474,6 +502,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv)
474502
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
475503
NULL);
476504
free_irq(priv->msix_vectors[msix_idx].vector, block);
505+
block->irq = 0;
477506
}
478507
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
479508
kvfree(priv->ntfy_blocks);

0 commit comments

Comments
 (0)