Skip to content

Commit 16a23f3

Browse files
committed
Merge branches 'pm-em' and 'pm-cpuidle'
Marge Energy Model support updates and cpuidle updates for 5.19-rc1: - Update the Energy Model support code to allow the Energy Model to be artificial, which means that the power values may not be on a uniform scale with other devices providing power information, and update the cpufreq_cooling and devfreq_cooling thermal drivers to support artificial Energy Models (Lukasz Luba). - Make DTPM check the Energy Model type (Lukasz Luba). - Fix policy counter decrementation in cpufreq if Energy Model is in use (Pierre Gondois). - Add AlderLake processor support to the intel_idle driver (Zhang Rui). - Fix regression leading to no genpd governor in the PSCI cpuidle driver and fix the riscv-sbi cpuidle driver to allow a genpd governor to be used (Ulf Hansson). * pm-em: PM: EM: Decrement policy counter powercap: DTPM: Check for Energy Model type thermal: cooling: Check Energy Model type in cpufreq_cooling and devfreq_cooling Documentation: EM: Add artificial EM registration description PM: EM: Remove old debugfs files and print all 'flags' PM: EM: Change the order of arguments in the .active_power() callback PM: EM: Use the new .get_cost() callback while registering EM PM: EM: Add artificial EM flag PM: EM: Add .get_cost() callback * pm-cpuidle: cpuidle: riscv-sbi: Fix code to allow a genpd governor to be used cpuidle: psci: Fix regression leading to no genpd governor intel_idle: Add AlderLake support
3 parents 95f2ce5 + c9d8923 + a6653fb commit 16a23f3

12 files changed

Lines changed: 240 additions & 51 deletions

File tree

Documentation/power/energy-model.rst

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power
123123
(static + dynamic). These power values might be coming directly from
124124
experiments and measurements.
125125

126+
Registration of 'artificial' EM
127+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
128+
129+
There is an option to provide a custom callback for drivers missing detailed
130+
knowledge about power value for each performance state. The callback
131+
.get_cost() is optional and provides the 'cost' values used by the EAS.
132+
This is useful for platforms that only provide information on relative
133+
efficiency between CPU types, where one could use the information to
134+
create an abstract power model. But even an abstract power model can
135+
sometimes be hard to fit in, given the input power value size restrictions.
136+
The .get_cost() allows to provide the 'cost' values which reflect the
137+
efficiency of the CPUs. This would allow to provide EAS information which
138+
has different relation than what would be forced by the EM internal
139+
formulas calculating 'cost' values. To register an EM for such platform, the
140+
driver must set the flag 'milliwatts' to 0, provide .get_power() callback
141+
and provide .get_cost() callback. The EM framework would handle such platform
142+
properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such
143+
platform. Special care should be taken by other frameworks which are using EM
144+
to test and treat this flag properly.
145+
126146
Registration of 'simple' EM
127147
~~~~~~~~~~~~~~~~~~~~~~~~~~~
128148

@@ -181,8 +201,8 @@ EM framework::
181201

182202
-> drivers/cpufreq/foo_cpufreq.c
183203

184-
01 static int est_power(unsigned long *mW, unsigned long *KHz,
185-
02 struct device *dev)
204+
01 static int est_power(struct device *dev, unsigned long *mW,
205+
02 unsigned long *KHz)
186206
03 {
187207
04 long freq, power;
188208
05

drivers/cpufreq/mediatek-cpufreq-hw.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
5151
};
5252

5353
static int __maybe_unused
54-
mtk_cpufreq_get_cpu_power(unsigned long *mW,
55-
unsigned long *KHz, struct device *cpu_dev)
54+
mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
55+
unsigned long *KHz)
5656
{
5757
struct mtk_cpufreq_data *data;
5858
struct cpufreq_policy *policy;

drivers/cpufreq/scmi-cpufreq.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
9696
}
9797

9898
static int __maybe_unused
99-
scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
100-
struct device *cpu_dev)
99+
scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
100+
unsigned long *KHz)
101101
{
102102
unsigned long Hz;
103103
int ret, domain;

drivers/cpuidle/cpuidle-psci-domain.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
5252
struct generic_pm_domain *pd;
5353
struct psci_pd_provider *pd_provider;
5454
struct dev_power_governor *pd_gov;
55-
int ret = -ENOMEM, state_count = 0;
55+
int ret = -ENOMEM;
5656

5757
pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node);
5858
if (!pd)
@@ -71,7 +71,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
7171
pd->flags |= GENPD_FLAG_ALWAYS_ON;
7272

7373
/* Use governor for CPU PM domains if it has some states to manage. */
74-
pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
74+
pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
7575

7676
ret = pm_genpd_init(pd, pd_gov, false);
7777
if (ret)

drivers/cpuidle/cpuidle-riscv-sbi.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ static int sbi_pd_init(struct device_node *np)
414414
struct generic_pm_domain *pd;
415415
struct sbi_pd_provider *pd_provider;
416416
struct dev_power_governor *pd_gov;
417-
int ret = -ENOMEM, state_count = 0;
417+
int ret = -ENOMEM;
418418

419419
pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node);
420420
if (!pd)
@@ -433,7 +433,7 @@ static int sbi_pd_init(struct device_node *np)
433433
pd->flags |= GENPD_FLAG_ALWAYS_ON;
434434

435435
/* Use governor for CPU PM domains if it has some states to manage. */
436-
pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
436+
pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
437437

438438
ret = pm_genpd_init(pd, pd_gov, false);
439439
if (ret)

drivers/idle/intel_idle.c

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,106 @@ static struct cpuidle_state icx_cstates[] __initdata = {
764764
.enter = NULL }
765765
};
766766

767+
/*
768+
* On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
769+
* C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
770+
* But in this case there is effectively no C1, because C1 requests are
771+
* promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
772+
* and C1E requests end up with C1, so there is effectively no C1E.
773+
*
774+
* By default we enable C1E and disable C1 by marking it with
775+
* 'CPUIDLE_FLAG_UNUSABLE'.
776+
*/
777+
static struct cpuidle_state adl_cstates[] __initdata = {
778+
{
779+
.name = "C1",
780+
.desc = "MWAIT 0x00",
781+
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
782+
.exit_latency = 1,
783+
.target_residency = 1,
784+
.enter = &intel_idle,
785+
.enter_s2idle = intel_idle_s2idle, },
786+
{
787+
.name = "C1E",
788+
.desc = "MWAIT 0x01",
789+
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
790+
.exit_latency = 2,
791+
.target_residency = 4,
792+
.enter = &intel_idle,
793+
.enter_s2idle = intel_idle_s2idle, },
794+
{
795+
.name = "C6",
796+
.desc = "MWAIT 0x20",
797+
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
798+
.exit_latency = 220,
799+
.target_residency = 600,
800+
.enter = &intel_idle,
801+
.enter_s2idle = intel_idle_s2idle, },
802+
{
803+
.name = "C8",
804+
.desc = "MWAIT 0x40",
805+
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
806+
.exit_latency = 280,
807+
.target_residency = 800,
808+
.enter = &intel_idle,
809+
.enter_s2idle = intel_idle_s2idle, },
810+
{
811+
.name = "C10",
812+
.desc = "MWAIT 0x60",
813+
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
814+
.exit_latency = 680,
815+
.target_residency = 2000,
816+
.enter = &intel_idle,
817+
.enter_s2idle = intel_idle_s2idle, },
818+
{
819+
.enter = NULL }
820+
};
821+
822+
static struct cpuidle_state adl_l_cstates[] __initdata = {
823+
{
824+
.name = "C1",
825+
.desc = "MWAIT 0x00",
826+
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
827+
.exit_latency = 1,
828+
.target_residency = 1,
829+
.enter = &intel_idle,
830+
.enter_s2idle = intel_idle_s2idle, },
831+
{
832+
.name = "C1E",
833+
.desc = "MWAIT 0x01",
834+
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
835+
.exit_latency = 2,
836+
.target_residency = 4,
837+
.enter = &intel_idle,
838+
.enter_s2idle = intel_idle_s2idle, },
839+
{
840+
.name = "C6",
841+
.desc = "MWAIT 0x20",
842+
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
843+
.exit_latency = 170,
844+
.target_residency = 500,
845+
.enter = &intel_idle,
846+
.enter_s2idle = intel_idle_s2idle, },
847+
{
848+
.name = "C8",
849+
.desc = "MWAIT 0x40",
850+
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
851+
.exit_latency = 200,
852+
.target_residency = 600,
853+
.enter = &intel_idle,
854+
.enter_s2idle = intel_idle_s2idle, },
855+
{
856+
.name = "C10",
857+
.desc = "MWAIT 0x60",
858+
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
859+
.exit_latency = 230,
860+
.target_residency = 700,
861+
.enter = &intel_idle,
862+
.enter_s2idle = intel_idle_s2idle, },
863+
{
864+
.enter = NULL }
865+
};
866+
767867
/*
768868
* On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
769869
* versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
@@ -1147,6 +1247,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
11471247
.use_acpi = true,
11481248
};
11491249

1250+
static const struct idle_cpu idle_cpu_adl __initconst = {
1251+
.state_table = adl_cstates,
1252+
};
1253+
1254+
static const struct idle_cpu idle_cpu_adl_l __initconst = {
1255+
.state_table = adl_l_cstates,
1256+
};
1257+
11501258
static const struct idle_cpu idle_cpu_spr __initconst = {
11511259
.state_table = spr_cstates,
11521260
.disable_promotion_to_c1e = true,
@@ -1215,6 +1323,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
12151323
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
12161324
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
12171325
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
1326+
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
1327+
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
12181328
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
12191329
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
12201330
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
@@ -1573,6 +1683,25 @@ static void __init skx_idle_state_table_update(void)
15731683
}
15741684
}
15751685

1686+
/**
1687+
* adl_idle_state_table_update - Adjust AlderLake idle states table.
1688+
*/
1689+
static void __init adl_idle_state_table_update(void)
1690+
{
1691+
/* Check if user prefers C1 over C1E. */
1692+
if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
1693+
cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1694+
cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
1695+
1696+
/* Disable C1E by clearing the "C1E promotion" bit. */
1697+
c1e_promotion = C1E_PROMOTION_DISABLE;
1698+
return;
1699+
}
1700+
1701+
/* Make sure C1E is enabled by default */
1702+
c1e_promotion = C1E_PROMOTION_ENABLE;
1703+
}
1704+
15761705
/**
15771706
* spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
15781707
*/
@@ -1642,6 +1771,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
16421771
case INTEL_FAM6_SAPPHIRERAPIDS_X:
16431772
spr_idle_state_table_update();
16441773
break;
1774+
case INTEL_FAM6_ALDERLAKE:
1775+
case INTEL_FAM6_ALDERLAKE_L:
1776+
adl_idle_state_table_update();
1777+
break;
16451778
}
16461779

16471780
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {

drivers/opp/of.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
14481448
* Returns 0 on success or a proper -EINVAL value in case of error.
14491449
*/
14501450
static int __maybe_unused
1451-
_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
1451+
_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
14521452
{
14531453
struct dev_pm_opp *opp;
14541454
unsigned long opp_freq, opp_power;
@@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
14821482
* Returns -EINVAL if the power calculation failed because of missing
14831483
* parameters, 0 otherwise.
14841484
*/
1485-
static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz,
1486-
struct device *dev)
1485+
static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
1486+
unsigned long *kHz)
14871487
{
14881488
struct dev_pm_opp *opp;
14891489
struct device_node *np;

drivers/powercap/dtpm_cpu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
211211
return 0;
212212

213213
pd = em_cpu_get(cpu);
214-
if (!pd)
214+
if (!pd || em_is_artificial(pd))
215215
return -EINVAL;
216216

217217
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);

drivers/thermal/cpufreq_cooling.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
328328
struct cpufreq_policy *policy;
329329
unsigned int nr_levels;
330330

331-
if (!em)
331+
if (!em || em_is_artificial(em))
332332
return false;
333333

334334
policy = cpufreq_cdev->policy;

drivers/thermal/devfreq_cooling.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
358358
struct thermal_cooling_device *cdev;
359359
struct device *dev = df->dev.parent;
360360
struct devfreq_cooling_device *dfc;
361+
struct em_perf_domain *em;
361362
char *name;
362363
int err, num_opps;
363364

@@ -367,8 +368,9 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
367368

368369
dfc->devfreq = df;
369370

370-
dfc->em_pd = em_pd_get(dev);
371-
if (dfc->em_pd) {
371+
em = em_pd_get(dev);
372+
if (em && !em_is_artificial(em)) {
373+
dfc->em_pd = em;
372374
devfreq_cooling_ops.get_requested_power =
373375
devfreq_cooling_get_requested_power;
374376
devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
@@ -379,7 +381,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
379381
num_opps = em_pd_nr_perf_states(dfc->em_pd);
380382
} else {
381383
/* Backward compatibility for drivers which do not use IPA */
382-
dev_dbg(dev, "missing EM for cooling device\n");
384+
dev_dbg(dev, "missing proper EM for cooling device\n");
383385

384386
num_opps = dev_pm_opp_get_opp_count(dev);
385387

0 commit comments

Comments
 (0)