Skip to content

Commit 2aeeef9

Browse files
Jianbo Liukuba-moo
authored andcommitted
bonding: change ipsec_lock from spin lock to mutex
In the cited commit, bond->ipsec_lock is added to protect ipsec_list, hence xdo_dev_state_add and xdo_dev_state_delete are called inside this lock. As ipsec_lock is a spin lock and such xfrmdev ops may sleep, "scheduling while atomic" will be triggered when changing bond's active slave. [ 101.055189] BUG: scheduling while atomic: bash/902/0x00000200 [ 101.055726] Modules linked in: [ 101.058211] CPU: 3 PID: 902 Comm: bash Not tainted 6.9.0-rc4+ #1 [ 101.058760] Hardware name: [ 101.059434] Call Trace: [ 101.059436] <TASK> [ 101.060873] dump_stack_lvl+0x51/0x60 [ 101.061275] __schedule_bug+0x4e/0x60 [ 101.061682] __schedule+0x612/0x7c0 [ 101.062078] ? __mod_timer+0x25c/0x370 [ 101.062486] schedule+0x25/0xd0 [ 101.062845] schedule_timeout+0x77/0xf0 [ 101.063265] ? asm_common_interrupt+0x22/0x40 [ 101.063724] ? __bpf_trace_itimer_state+0x10/0x10 [ 101.064215] __wait_for_common+0x87/0x190 [ 101.064648] ? usleep_range_state+0x90/0x90 [ 101.065091] cmd_exec+0x437/0xb20 [mlx5_core] [ 101.065569] mlx5_cmd_do+0x1e/0x40 [mlx5_core] [ 101.066051] mlx5_cmd_exec+0x18/0x30 [mlx5_core] [ 101.066552] mlx5_crypto_create_dek_key+0xea/0x120 [mlx5_core] [ 101.067163] ? bonding_sysfs_store_option+0x4d/0x80 [bonding] [ 101.067738] ? kmalloc_trace+0x4d/0x350 [ 101.068156] mlx5_ipsec_create_sa_ctx+0x33/0x100 [mlx5_core] [ 101.068747] mlx5e_xfrm_add_state+0x47b/0xaa0 [mlx5_core] [ 101.069312] bond_change_active_slave+0x392/0x900 [bonding] [ 101.069868] bond_option_active_slave_set+0x1c2/0x240 [bonding] [ 101.070454] __bond_opt_set+0xa6/0x430 [bonding] [ 101.070935] __bond_opt_set_notify+0x2f/0x90 [bonding] [ 101.071453] bond_opt_tryset_rtnl+0x72/0xb0 [bonding] [ 101.071965] bonding_sysfs_store_option+0x4d/0x80 [bonding] [ 101.072567] kernfs_fop_write_iter+0x10c/0x1a0 [ 101.073033] vfs_write+0x2d8/0x400 [ 101.073416] ? alloc_fd+0x48/0x180 [ 101.073798] ksys_write+0x5f/0xe0 [ 101.074175] do_syscall_64+0x52/0x110 [ 101.074576] entry_SYSCALL_64_after_hwframe+0x4b/0x53 As bond_ipsec_add_sa_all and bond_ipsec_del_sa_all are only called from bond_change_active_slave, which requires holding the RTNL lock. And bond_ipsec_add_sa and bond_ipsec_del_sa are xfrm state xdo_dev_state_add and xdo_dev_state_delete APIs, which are in user context. So ipsec_lock doesn't have to be spin lock, change it to mutex, and thus the above issue can be resolved. Fixes: 9a56055 ("bonding: Add struct bond_ipesc to manage SA") Signed-off-by: Jianbo Liu <jianbol@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Hangbin Liu <liuhangbin@gmail.com> Acked-by: Jay Vosburgh <jv@jvosburgh.net> Link: https://patch.msgid.link/20240823031056.110999-4-jianbol@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 907ed83 commit 2aeeef9

2 files changed

Lines changed: 44 additions & 37 deletions

File tree

drivers/net/bonding/bond_main.c

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs,
428428
{
429429
struct net_device *bond_dev = xs->xso.dev;
430430
struct net_device *real_dev;
431+
netdevice_tracker tracker;
431432
struct bond_ipsec *ipsec;
432433
struct bonding *bond;
433434
struct slave *slave;
@@ -439,38 +440,41 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs,
439440
rcu_read_lock();
440441
bond = netdev_priv(bond_dev);
441442
slave = rcu_dereference(bond->curr_active_slave);
442-
if (!slave) {
443-
rcu_read_unlock();
444-
return -ENODEV;
443+
real_dev = slave ? slave->dev : NULL;
444+
netdev_hold(real_dev, &tracker, GFP_ATOMIC);
445+
rcu_read_unlock();
446+
if (!real_dev) {
447+
err = -ENODEV;
448+
goto out;
445449
}
446450

447-
real_dev = slave->dev;
448451
if (!real_dev->xfrmdev_ops ||
449452
!real_dev->xfrmdev_ops->xdo_dev_state_add ||
450453
netif_is_bond_master(real_dev)) {
451454
NL_SET_ERR_MSG_MOD(extack, "Slave does not support ipsec offload");
452-
rcu_read_unlock();
453-
return -EINVAL;
455+
err = -EINVAL;
456+
goto out;
454457
}
455458

456-
ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC);
459+
ipsec = kmalloc(sizeof(*ipsec), GFP_KERNEL);
457460
if (!ipsec) {
458-
rcu_read_unlock();
459-
return -ENOMEM;
461+
err = -ENOMEM;
462+
goto out;
460463
}
461464

462465
xs->xso.real_dev = real_dev;
463466
err = real_dev->xfrmdev_ops->xdo_dev_state_add(xs, extack);
464467
if (!err) {
465468
ipsec->xs = xs;
466469
INIT_LIST_HEAD(&ipsec->list);
467-
spin_lock_bh(&bond->ipsec_lock);
470+
mutex_lock(&bond->ipsec_lock);
468471
list_add(&ipsec->list, &bond->ipsec_list);
469-
spin_unlock_bh(&bond->ipsec_lock);
472+
mutex_unlock(&bond->ipsec_lock);
470473
} else {
471474
kfree(ipsec);
472475
}
473-
rcu_read_unlock();
476+
out:
477+
netdev_put(real_dev, &tracker);
474478
return err;
475479
}
476480

@@ -481,35 +485,35 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
481485
struct bond_ipsec *ipsec;
482486
struct slave *slave;
483487

484-
rcu_read_lock();
485-
slave = rcu_dereference(bond->curr_active_slave);
486-
if (!slave)
487-
goto out;
488+
slave = rtnl_dereference(bond->curr_active_slave);
489+
real_dev = slave ? slave->dev : NULL;
490+
if (!real_dev)
491+
return;
488492

489-
real_dev = slave->dev;
493+
mutex_lock(&bond->ipsec_lock);
490494
if (!real_dev->xfrmdev_ops ||
491495
!real_dev->xfrmdev_ops->xdo_dev_state_add ||
492496
netif_is_bond_master(real_dev)) {
493-
spin_lock_bh(&bond->ipsec_lock);
494497
if (!list_empty(&bond->ipsec_list))
495498
slave_warn(bond_dev, real_dev,
496499
"%s: no slave xdo_dev_state_add\n",
497500
__func__);
498-
spin_unlock_bh(&bond->ipsec_lock);
499501
goto out;
500502
}
501503

502-
spin_lock_bh(&bond->ipsec_lock);
503504
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
505+
/* If new state is added before ipsec_lock acquired */
506+
if (ipsec->xs->xso.real_dev == real_dev)
507+
continue;
508+
504509
ipsec->xs->xso.real_dev = real_dev;
505510
if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) {
506511
slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__);
507512
ipsec->xs->xso.real_dev = NULL;
508513
}
509514
}
510-
spin_unlock_bh(&bond->ipsec_lock);
511515
out:
512-
rcu_read_unlock();
516+
mutex_unlock(&bond->ipsec_lock);
513517
}
514518

515519
/**
@@ -520,6 +524,7 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
520524
{
521525
struct net_device *bond_dev = xs->xso.dev;
522526
struct net_device *real_dev;
527+
netdevice_tracker tracker;
523528
struct bond_ipsec *ipsec;
524529
struct bonding *bond;
525530
struct slave *slave;
@@ -530,14 +535,16 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
530535
rcu_read_lock();
531536
bond = netdev_priv(bond_dev);
532537
slave = rcu_dereference(bond->curr_active_slave);
538+
real_dev = slave ? slave->dev : NULL;
539+
netdev_hold(real_dev, &tracker, GFP_ATOMIC);
540+
rcu_read_unlock();
533541

534542
if (!slave)
535543
goto out;
536544

537545
if (!xs->xso.real_dev)
538546
goto out;
539547

540-
real_dev = slave->dev;
541548
WARN_ON(xs->xso.real_dev != real_dev);
542549

543550
if (!real_dev->xfrmdev_ops ||
@@ -549,16 +556,16 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
549556

550557
real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
551558
out:
552-
spin_lock_bh(&bond->ipsec_lock);
559+
netdev_put(real_dev, &tracker);
560+
mutex_lock(&bond->ipsec_lock);
553561
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
554562
if (ipsec->xs == xs) {
555563
list_del(&ipsec->list);
556564
kfree(ipsec);
557565
break;
558566
}
559567
}
560-
spin_unlock_bh(&bond->ipsec_lock);
561-
rcu_read_unlock();
568+
mutex_unlock(&bond->ipsec_lock);
562569
}
563570

564571
static void bond_ipsec_del_sa_all(struct bonding *bond)
@@ -568,15 +575,12 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
568575
struct bond_ipsec *ipsec;
569576
struct slave *slave;
570577

571-
rcu_read_lock();
572-
slave = rcu_dereference(bond->curr_active_slave);
573-
if (!slave) {
574-
rcu_read_unlock();
578+
slave = rtnl_dereference(bond->curr_active_slave);
579+
real_dev = slave ? slave->dev : NULL;
580+
if (!real_dev)
575581
return;
576-
}
577582

578-
real_dev = slave->dev;
579-
spin_lock_bh(&bond->ipsec_lock);
583+
mutex_lock(&bond->ipsec_lock);
580584
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
581585
if (!ipsec->xs->xso.real_dev)
582586
continue;
@@ -593,8 +597,7 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
593597
real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs);
594598
}
595599
}
596-
spin_unlock_bh(&bond->ipsec_lock);
597-
rcu_read_unlock();
600+
mutex_unlock(&bond->ipsec_lock);
598601
}
599602

600603
static void bond_ipsec_free_sa(struct xfrm_state *xs)
@@ -5921,7 +5924,7 @@ void bond_setup(struct net_device *bond_dev)
59215924
/* set up xfrm device ops (only supported in active-backup right now) */
59225925
bond_dev->xfrmdev_ops = &bond_xfrmdev_ops;
59235926
INIT_LIST_HEAD(&bond->ipsec_list);
5924-
spin_lock_init(&bond->ipsec_lock);
5927+
mutex_init(&bond->ipsec_lock);
59255928
#endif /* CONFIG_XFRM_OFFLOAD */
59265929

59275930
/* don't acquire bond device's netif_tx_lock when transmitting */
@@ -5970,6 +5973,10 @@ static void bond_uninit(struct net_device *bond_dev)
59705973
__bond_release_one(bond_dev, slave->dev, true, true);
59715974
netdev_info(bond_dev, "Released all slaves\n");
59725975

5976+
#ifdef CONFIG_XFRM_OFFLOAD
5977+
mutex_destroy(&bond->ipsec_lock);
5978+
#endif /* CONFIG_XFRM_OFFLOAD */
5979+
59735980
bond_set_slave_arr(bond, NULL, NULL);
59745981

59755982
list_del_rcu(&bond->bond_list);

include/net/bonding.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ struct bonding {
260260
#ifdef CONFIG_XFRM_OFFLOAD
261261
struct list_head ipsec_list;
262262
/* protecting ipsec_list */
263-
spinlock_t ipsec_lock;
263+
struct mutex ipsec_lock;
264264
#endif /* CONFIG_XFRM_OFFLOAD */
265265
struct bpf_prog *xdp_prog;
266266
};

0 commit comments

Comments
 (0)