Skip to content

Commit 103b087

Browse files
Waiman-Longhtejun
authored andcommitted
cgroup/cpuset: Fail if isolated and nohz_full don't leave any housekeeping
Currently the user can set up isolated cpus via cpuset and nohz_full in such a way that leaves no housekeeping CPU (i.e. no CPU that is neither domain isolated nor nohz full). This can be a problem for other subsystems (e.g. the timer wheel imgration). Prevent this configuration by blocking any assignation that would cause the union of domain isolated cpus and nohz_full to covers all CPUs. [longman: Remove isolated_cpus_should_update() and rewrite the checking in update_prstate() and update_parent_effective_cpumask()] Originally-by: Gabriele Monaco <gmonaco@redhat.com> Signed-off-by: Waiman Long <longman@redhat.com> Reviewed-by: Chen Ridong <chenridong@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent 55939cf commit 103b087

1 file changed

Lines changed: 73 additions & 1 deletion

File tree

kernel/cgroup/cpuset.c

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1393,6 +1393,45 @@ static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
13931393
return isolcpus_updated;
13941394
}
13951395

1396+
/*
1397+
* isolated_cpus_can_update - check for isolated & nohz_full conflicts
1398+
* @add_cpus: cpu mask for cpus that are going to be isolated
1399+
* @del_cpus: cpu mask for cpus that are no longer isolated, can be NULL
1400+
* Return: false if there is conflict, true otherwise
1401+
*
1402+
* If nohz_full is enabled and we have isolated CPUs, their combination must
1403+
* still leave housekeeping CPUs.
1404+
*
1405+
* TBD: Should consider merging this function into
1406+
* prstate_housekeeping_conflict().
1407+
*/
1408+
static bool isolated_cpus_can_update(struct cpumask *add_cpus,
1409+
struct cpumask *del_cpus)
1410+
{
1411+
cpumask_var_t full_hk_cpus;
1412+
int res = true;
1413+
1414+
if (!housekeeping_enabled(HK_TYPE_KERNEL_NOISE))
1415+
return true;
1416+
1417+
if (del_cpus && cpumask_weight_and(del_cpus,
1418+
housekeeping_cpumask(HK_TYPE_KERNEL_NOISE)))
1419+
return true;
1420+
1421+
if (!alloc_cpumask_var(&full_hk_cpus, GFP_KERNEL))
1422+
return false;
1423+
1424+
cpumask_and(full_hk_cpus, housekeeping_cpumask(HK_TYPE_KERNEL_NOISE),
1425+
housekeeping_cpumask(HK_TYPE_DOMAIN));
1426+
cpumask_andnot(full_hk_cpus, full_hk_cpus, isolated_cpus);
1427+
cpumask_and(full_hk_cpus, full_hk_cpus, cpu_active_mask);
1428+
if (!cpumask_weight_andnot(full_hk_cpus, add_cpus))
1429+
res = false;
1430+
1431+
free_cpumask_var(full_hk_cpus);
1432+
return res;
1433+
}
1434+
13961435
static void update_isolation_cpumasks(bool isolcpus_updated)
13971436
{
13981437
int ret;
@@ -1551,6 +1590,9 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
15511590
if (!cpumask_intersects(tmp->new_cpus, cpu_active_mask) ||
15521591
cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus))
15531592
return PERR_INVCPUS;
1593+
if ((new_prs == PRS_ISOLATED) &&
1594+
!isolated_cpus_can_update(tmp->new_cpus, NULL))
1595+
return PERR_HKEEPING;
15541596

15551597
spin_lock_irq(&callback_lock);
15561598
isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
@@ -1650,6 +1692,9 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
16501692
else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
16511693
cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))
16521694
cs->prs_err = PERR_NOCPUS;
1695+
else if ((prs == PRS_ISOLATED) &&
1696+
!isolated_cpus_can_update(tmp->addmask, tmp->delmask))
1697+
cs->prs_err = PERR_HKEEPING;
16531698
if (cs->prs_err)
16541699
goto invalidate;
16551700
}
@@ -1750,6 +1795,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
17501795
int part_error = PERR_NONE; /* Partition error? */
17511796
int isolcpus_updated = 0;
17521797
struct cpumask *xcpus = user_xcpus(cs);
1798+
int parent_prs = parent->partition_root_state;
17531799
bool nocpu;
17541800

17551801
lockdep_assert_held(&cpuset_mutex);
@@ -1813,6 +1859,10 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
18131859
if (prstate_housekeeping_conflict(new_prs, xcpus))
18141860
return PERR_HKEEPING;
18151861

1862+
if ((new_prs == PRS_ISOLATED) && (new_prs != parent_prs) &&
1863+
!isolated_cpus_can_update(xcpus, NULL))
1864+
return PERR_HKEEPING;
1865+
18161866
if (tasks_nocpu_error(parent, cs, xcpus))
18171867
return PERR_NOCPUS;
18181868

@@ -1866,6 +1916,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
18661916
*
18671917
* For invalid partition:
18681918
* delmask = newmask & parent->effective_xcpus
1919+
* The partition may become valid soon.
18691920
*/
18701921
if (is_partition_invalid(cs)) {
18711922
adding = false;
@@ -1880,6 +1931,23 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
18801931
deleting = cpumask_and(tmp->delmask, tmp->delmask,
18811932
parent->effective_xcpus);
18821933
}
1934+
1935+
/*
1936+
* TBD: Invalidate a currently valid child root partition may
1937+
* still break isolated_cpus_can_update() rule if parent is an
1938+
* isolated partition.
1939+
*/
1940+
if (is_partition_valid(cs) && (old_prs != parent_prs)) {
1941+
if ((parent_prs == PRS_ROOT) &&
1942+
/* Adding to parent means removing isolated CPUs */
1943+
!isolated_cpus_can_update(tmp->delmask, tmp->addmask))
1944+
part_error = PERR_HKEEPING;
1945+
if ((parent_prs == PRS_ISOLATED) &&
1946+
/* Adding to parent means adding isolated CPUs */
1947+
!isolated_cpus_can_update(tmp->addmask, tmp->delmask))
1948+
part_error = PERR_HKEEPING;
1949+
}
1950+
18831951
/*
18841952
* The new CPUs to be removed from parent's effective CPUs
18851953
* must be present.
@@ -2994,7 +3062,11 @@ static int update_prstate(struct cpuset *cs, int new_prs)
29943062
* A change in load balance state only, no change in cpumasks.
29953063
* Need to update isolated_cpus.
29963064
*/
2997-
isolcpus_updated = true;
3065+
if ((new_prs == PRS_ISOLATED) &&
3066+
!isolated_cpus_can_update(cs->effective_xcpus, NULL))
3067+
err = PERR_HKEEPING;
3068+
else
3069+
isolcpus_updated = true;
29983070
} else {
29993071
/*
30003072
* Switching back to member is always allowed even if it

0 commit comments

Comments
 (0)