Skip to content

Commit 32d53c0

Browse files
alobakinkuba-moo
authored andcommitted
ice: fix 'scheduling while atomic' on aux critical err interrupt
There's a kernel BUG splat on processing aux critical error interrupts in ice_misc_intr(): [ 2100.917085] BUG: scheduling while atomic: swapper/15/0/0x00010000 ... [ 2101.060770] Call Trace: [ 2101.063229] <IRQ> [ 2101.065252] dump_stack+0x41/0x60 [ 2101.068587] __schedule_bug.cold.100+0x4c/0x58 [ 2101.073060] __schedule+0x6a4/0x830 [ 2101.076570] schedule+0x35/0xa0 [ 2101.079727] schedule_preempt_disabled+0xa/0x10 [ 2101.084284] __mutex_lock.isra.7+0x310/0x420 [ 2101.088580] ? ice_misc_intr+0x201/0x2e0 [ice] [ 2101.093078] ice_send_event_to_aux+0x25/0x70 [ice] [ 2101.097921] ice_misc_intr+0x220/0x2e0 [ice] [ 2101.102232] __handle_irq_event_percpu+0x40/0x180 [ 2101.106965] handle_irq_event_percpu+0x30/0x80 [ 2101.111434] handle_irq_event+0x36/0x53 [ 2101.115292] handle_edge_irq+0x82/0x190 [ 2101.119148] handle_irq+0x1c/0x30 [ 2101.122480] do_IRQ+0x49/0xd0 [ 2101.125465] common_interrupt+0xf/0xf [ 2101.129146] </IRQ> ... As Andrew correctly mentioned previously[0], the following call ladder happens: ice_misc_intr() <- hardirq ice_send_event_to_aux() device_lock() mutex_lock() might_sleep() might_resched() <- oops Add a new PF state bit which indicates that an aux critical error occurred and serve it in ice_service_task() in process context. The new ice_pf::oicr_err_reg is read-write in both hardirq and process contexts, but only 3 bits of non-critical data probably aren't worth explicit synchronizing (and they're even in the same byte [31:24]). [0] https://lore.kernel.org/all/YeSRUVmrdmlUXHDn@lunn.ch Fixes: 348048e ("ice: Implement iidc operations") Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com> Tested-by: Michal Kubiak <michal.kubiak@intel.com> Acked-by: Tony Nguyen <anthony.l.nguyen@intel.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 6a7d8cf commit 32d53c0

2 files changed

Lines changed: 17 additions & 10 deletions

File tree

drivers/net/ethernet/intel/ice/ice.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ enum ice_pf_state {
290290
ICE_LINK_DEFAULT_OVERRIDE_PENDING,
291291
ICE_PHY_INIT_COMPLETE,
292292
ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */
293+
ICE_AUX_ERR_PENDING,
293294
ICE_STATE_NBITS /* must be last */
294295
};
295296

@@ -559,6 +560,7 @@ struct ice_pf {
559560
wait_queue_head_t reset_wait_queue;
560561

561562
u32 hw_csum_rx_error;
563+
u32 oicr_err_reg;
562564
u16 oicr_idx; /* Other interrupt cause MSIX vector index */
563565
u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */
564566
u16 max_pf_txqs; /* Total Tx queues PF wide */

drivers/net/ethernet/intel/ice/ice_main.c

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2255,6 +2255,19 @@ static void ice_service_task(struct work_struct *work)
22552255
return;
22562256
}
22572257

2258+
if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) {
2259+
struct iidc_event *event;
2260+
2261+
event = kzalloc(sizeof(*event), GFP_KERNEL);
2262+
if (event) {
2263+
set_bit(IIDC_EVENT_CRIT_ERR, event->type);
2264+
/* report the entire OICR value to AUX driver */
2265+
swap(event->reg, pf->oicr_err_reg);
2266+
ice_send_event_to_aux(pf, event);
2267+
kfree(event);
2268+
}
2269+
}
2270+
22582271
if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {
22592272
/* Plug aux device per request */
22602273
ice_plug_aux_dev(pf);
@@ -3041,17 +3054,9 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
30413054

30423055
#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
30433056
if (oicr & ICE_AUX_CRIT_ERR) {
3044-
struct iidc_event *event;
3045-
3057+
pf->oicr_err_reg |= oicr;
3058+
set_bit(ICE_AUX_ERR_PENDING, pf->state);
30463059
ena_mask &= ~ICE_AUX_CRIT_ERR;
3047-
event = kzalloc(sizeof(*event), GFP_ATOMIC);
3048-
if (event) {
3049-
set_bit(IIDC_EVENT_CRIT_ERR, event->type);
3050-
/* report the entire OICR value to AUX driver */
3051-
event->reg = oicr;
3052-
ice_send_event_to_aux(pf, event);
3053-
kfree(event);
3054-
}
30553060
}
30563061

30573062
/* Report any remaining unexpected interrupts */

0 commit comments

Comments
 (0)