Skip to content

Commit bf2ba43

Browse files
Luis Henriquesidryomov
authored andcommitted
ceph: reduce contention in ceph_check_delayed_caps()
Function ceph_check_delayed_caps() is called from the mdsc->delayed_work workqueue and it can be kept looping for quite some time if caps keep being added back to the mdsc->cap_delay_list. This may result in the watchdog tainting the kernel with the softlockup flag. This patch breaks this loop if the caps have been recently (i.e. during the loop execution). Any new caps added to the list will be handled in the next run. Also, allow schedule_delayed() callers to explicitly set the delay value instead of defaulting to 5s, so we can ensure that it runs soon afterward if it looks like there is more work. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/46284 Signed-off-by: Luis Henriques <lhenriques@suse.de> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
1 parent c500bee commit bf2ba43

3 files changed

Lines changed: 33 additions & 11 deletions

File tree

fs/ceph/caps.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4150,18 +4150,31 @@ void ceph_handle_caps(struct ceph_mds_session *session,
41504150

41514151
/*
41524152
* Delayed work handler to process end of delayed cap release LRU list.
4153+
*
4154+
* If new caps are added to the list while processing it, these won't get
4155+
* processed in this run. In this case, the ci->i_hold_caps_max will be
4156+
* returned so that the work can be scheduled accordingly.
41534157
*/
4154-
void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
4158+
unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
41554159
{
41564160
struct inode *inode;
41574161
struct ceph_inode_info *ci;
4162+
struct ceph_mount_options *opt = mdsc->fsc->mount_options;
4163+
unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
4164+
unsigned long loop_start = jiffies;
4165+
unsigned long delay = 0;
41584166

41594167
dout("check_delayed_caps\n");
41604168
spin_lock(&mdsc->cap_delay_lock);
41614169
while (!list_empty(&mdsc->cap_delay_list)) {
41624170
ci = list_first_entry(&mdsc->cap_delay_list,
41634171
struct ceph_inode_info,
41644172
i_cap_delay_list);
4173+
if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) {
4174+
dout("%s caps added recently. Exiting loop", __func__);
4175+
delay = ci->i_hold_caps_max;
4176+
break;
4177+
}
41654178
if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
41664179
time_before(jiffies, ci->i_hold_caps_max))
41674180
break;
@@ -4177,6 +4190,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
41774190
}
41784191
}
41794192
spin_unlock(&mdsc->cap_delay_lock);
4193+
4194+
return delay;
41804195
}
41814196

41824197
/*

fs/ceph/mds_client.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4490,22 +4490,29 @@ void inc_session_sequence(struct ceph_mds_session *s)
44904490
}
44914491

44924492
/*
4493-
* delayed work -- periodically trim expired leases, renew caps with mds
4493+
* delayed work -- periodically trim expired leases, renew caps with mds. If
4494+
* the @delay parameter is set to 0 or if it's more than 5 secs, the default
4495+
* workqueue delay value of 5 secs will be used.
44944496
*/
4495-
static void schedule_delayed(struct ceph_mds_client *mdsc)
4497+
static void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay)
44964498
{
4497-
int delay = 5;
4498-
unsigned hz = round_jiffies_relative(HZ * delay);
4499-
schedule_delayed_work(&mdsc->delayed_work, hz);
4499+
unsigned long max_delay = HZ * 5;
4500+
4501+
/* 5 secs default delay */
4502+
if (!delay || (delay > max_delay))
4503+
delay = max_delay;
4504+
schedule_delayed_work(&mdsc->delayed_work,
4505+
round_jiffies_relative(delay));
45004506
}
45014507

45024508
static void delayed_work(struct work_struct *work)
45034509
{
4504-
int i;
45054510
struct ceph_mds_client *mdsc =
45064511
container_of(work, struct ceph_mds_client, delayed_work.work);
4512+
unsigned long delay;
45074513
int renew_interval;
45084514
int renew_caps;
4515+
int i;
45094516

45104517
dout("mdsc delayed_work\n");
45114518

@@ -4545,15 +4552,15 @@ static void delayed_work(struct work_struct *work)
45454552
}
45464553
mutex_unlock(&mdsc->mutex);
45474554

4548-
ceph_check_delayed_caps(mdsc);
4555+
delay = ceph_check_delayed_caps(mdsc);
45494556

45504557
ceph_queue_cap_reclaim_work(mdsc);
45514558

45524559
ceph_trim_snapid_map(mdsc);
45534560

45544561
maybe_recover_session(mdsc);
45554562

4556-
schedule_delayed(mdsc);
4563+
schedule_delayed(mdsc, delay);
45574564
}
45584565

45594566
int ceph_mdsc_init(struct ceph_fs_client *fsc)
@@ -5030,7 +5037,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
50305037
mdsc->mdsmap->m_epoch);
50315038

50325039
mutex_unlock(&mdsc->mutex);
5033-
schedule_delayed(mdsc);
5040+
schedule_delayed(mdsc, 0);
50345041
return;
50355042

50365043
bad_unlock:

fs/ceph/super.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1167,7 +1167,7 @@ extern void ceph_flush_snaps(struct ceph_inode_info *ci,
11671167
extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
11681168
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
11691169
struct ceph_mds_session *session);
1170-
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
1170+
extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
11711171
extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
11721172
extern int ceph_drop_caps_for_unlink(struct inode *inode);
11731173
extern int ceph_encode_inode_release(void **p, struct inode *inode,

0 commit comments

Comments
 (0)