Skip to content

Commit dfad78e

Browse files
committed
Merge branches 'pm-sleep', 'pm-domains' and 'pm-docs'
Merge changes related to system sleep, PM domains changes and power management documentation changes for 5.18-rc1: - Fix load_image_and_restore() error path (Ye Bin). - Fix typos in comments in the system wakeup hadling code (Tom Rix). - Clean up non-kernel-doc comments in hibernation code (Jiapeng Chong). - Fix __setup handler error handling in system-wide suspend and hibernation core code (Randy Dunlap). - Add device name to suspend_report_result() (Youngjin Jang). - Make virtual guests honour ACPI S4 hardware signature by default (David Woodhouse). - Block power off of a parent PM domain unless child is in deepest state (Ulf Hansson). - Use dev_err_probe() to simplify error handling for generic PM domains (Ahmad Fatoum). - Fix sleep-in-atomic bug caused by genpd_debug_remove() (Shawn Guo). - Document Intel uncore frequency scaling (Srinivas Pandruvada). * pm-sleep: PM: hibernate: Honour ACPI hardware signature by default for virtual guests PM: sleep: Add device name to suspend_report_result() PM: suspend: fix return value of __setup handler PM: hibernate: fix __setup handler error handling PM: hibernate: Clean up non-kernel-doc comments PM: sleep: wakeup: Fix typos in comments PM: hibernate: fix load_image_and_restore() error path * pm-domains: PM: domains: Fix sleep-in-atomic bug caused by genpd_debug_remove() PM: domains: use dev_err_probe() to simplify error handling PM: domains: Prevent power off for parent unless child is in deepest state * pm-docs: Documentation: admin-guide: pm: Document uncore frequency scaling
4 parents 86c17c4 + f6c46b1 + f6bfe8b + a644161 commit dfad78e

16 files changed

Lines changed: 146 additions & 59 deletions

File tree

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
.. include:: <isonum.txt>
3+
4+
==============================
5+
Intel Uncore Frequency Scaling
6+
==============================
7+
8+
:Copyright: |copy| 2022 Intel Corporation
9+
10+
:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
11+
12+
Introduction
13+
------------
14+
15+
The uncore can consume significant amount of power in Intel's Xeon servers based
16+
on the workload characteristics. To optimize the total power and improve overall
17+
performance, SoCs have internal algorithms for scaling uncore frequency. These
18+
algorithms monitor workload usage of uncore and set a desirable frequency.
19+
20+
It is possible that users have different expectations of uncore performance and
21+
want to have control over it. The objective is similar to allowing users to set
22+
the scaling min/max frequencies via cpufreq sysfs to improve CPU performance.
23+
Users may have some latency sensitive workloads where they do not want any
24+
change to uncore frequency. Also, users may have workloads which require
25+
different core and uncore performance at distinct phases and they may want to
26+
use both cpufreq and the uncore scaling interface to distribute power and
27+
improve overall performance.
28+
29+
Sysfs Interface
30+
---------------
31+
32+
To control uncore frequency, a sysfs interface is provided in the directory:
33+
`/sys/devices/system/cpu/intel_uncore_frequency/`.
34+
35+
There is one directory for each package and die combination as the scope of
36+
uncore scaling control is per die in multiple die/package SoCs or per
37+
package for single die per package SoCs. The name represents the
38+
scope of control. For example: 'package_00_die_00' is for package id 0 and
39+
die 0.
40+
41+
Each package_*_die_* contains the following attributes:
42+
43+
``initial_max_freq_khz``
44+
Out of reset, this attribute represent the maximum possible frequency.
45+
This is a read-only attribute. If users adjust max_freq_khz,
46+
they can always go back to maximum using the value from this attribute.
47+
48+
``initial_min_freq_khz``
49+
Out of reset, this attribute represent the minimum possible frequency.
50+
This is a read-only attribute. If users adjust min_freq_khz,
51+
they can always go back to minimum using the value from this attribute.
52+
53+
``max_freq_khz``
54+
This attribute is used to set the maximum uncore frequency.
55+
56+
``min_freq_khz``
57+
This attribute is used to set the minimum uncore frequency.
58+
59+
``current_freq_khz``
60+
This attribute is used to get the current uncore frequency.

Documentation/admin-guide/pm/working-state.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ Working-State Power Management
1515
cpufreq_drivers
1616
intel_epb
1717
intel-speed-select
18+
intel_uncore_frequency_scaling

arch/x86/kernel/acpi/sleep.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <asm/desc.h>
1616
#include <asm/cacheflush.h>
1717
#include <asm/realmode.h>
18+
#include <asm/hypervisor.h>
1819

1920
#include <linux/ftrace.h>
2021
#include "../../realmode/rm/wakeup.h"
@@ -140,9 +141,9 @@ static int __init acpi_sleep_setup(char *str)
140141
acpi_realmode_flags |= 4;
141142
#ifdef CONFIG_HIBERNATION
142143
if (strncmp(str, "s4_hwsig", 8) == 0)
143-
acpi_check_s4_hw_signature(1);
144+
acpi_check_s4_hw_signature = 1;
144145
if (strncmp(str, "s4_nohwsig", 10) == 0)
145-
acpi_check_s4_hw_signature(0);
146+
acpi_check_s4_hw_signature = 0;
146147
#endif
147148
if (strncmp(str, "nonvs", 5) == 0)
148149
acpi_nvs_nosave();
@@ -160,3 +161,21 @@ static int __init acpi_sleep_setup(char *str)
160161
}
161162

162163
__setup("acpi_sleep=", acpi_sleep_setup);
164+
165+
#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HYPERVISOR_GUEST)
166+
static int __init init_s4_sigcheck(void)
167+
{
168+
/*
169+
* If running on a hypervisor, honour the ACPI specification
170+
* by default and trigger a clean reboot when the hardware
171+
* signature in FACS is changed after hibernation.
172+
*/
173+
if (acpi_check_s4_hw_signature == -1 &&
174+
!hypervisor_is_type(X86_HYPER_NATIVE))
175+
acpi_check_s4_hw_signature = 1;
176+
177+
return 0;
178+
}
179+
/* This must happen before acpi_init() which is a subsys initcall */
180+
arch_initcall(init_s4_sigcheck);
181+
#endif

drivers/acpi/sleep.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -869,12 +869,7 @@ static inline void acpi_sleep_syscore_init(void) {}
869869
#ifdef CONFIG_HIBERNATION
870870
static unsigned long s4_hardware_signature;
871871
static struct acpi_table_facs *facs;
872-
static int sigcheck = -1; /* Default behaviour is just to warn */
873-
874-
void __init acpi_check_s4_hw_signature(int check)
875-
{
876-
sigcheck = check;
877-
}
872+
int acpi_check_s4_hw_signature = -1; /* Default behaviour is just to warn */
878873

879874
static int acpi_hibernation_begin(pm_message_t stage)
880875
{
@@ -999,7 +994,7 @@ static void acpi_sleep_hibernate_setup(void)
999994
hibernation_set_ops(old_suspend_ordering ?
1000995
&acpi_hibernation_ops_old : &acpi_hibernation_ops);
1001996
sleep_states[ACPI_STATE_S4] = 1;
1002-
if (!sigcheck)
997+
if (!acpi_check_s4_hw_signature)
1003998
return;
1004999

10051000
acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs);
@@ -1011,7 +1006,7 @@ static void acpi_sleep_hibernate_setup(void)
10111006
*/
10121007
s4_hardware_signature = facs->hardware_signature;
10131008

1014-
if (sigcheck > 0) {
1009+
if (acpi_check_s4_hw_signature > 0) {
10151010
/*
10161011
* If we're actually obeying the ACPI specification
10171012
* then the signature is written out as part of the

drivers/base/power/domain.c

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,18 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
636636
atomic_read(&genpd->sd_count) > 0)
637637
return -EBUSY;
638638

639+
/*
640+
* The children must be in their deepest (powered-off) states to allow
641+
* the parent to be powered off. Note that, there's no need for
642+
* additional locking, as powering on a child, requires the parent's
643+
* lock to be acquired first.
644+
*/
645+
list_for_each_entry(link, &genpd->parent_links, parent_node) {
646+
struct generic_pm_domain *child = link->child;
647+
if (child->state_idx < child->state_count - 1)
648+
return -EBUSY;
649+
}
650+
639651
list_for_each_entry(pdd, &genpd->dev_list, list_node) {
640652
enum pm_qos_flags_status stat;
641653

@@ -1073,6 +1085,13 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
10731085
|| atomic_read(&genpd->sd_count) > 0)
10741086
return;
10751087

1088+
/* Check that the children are in their deepest (powered-off) state. */
1089+
list_for_each_entry(link, &genpd->parent_links, parent_node) {
1090+
struct generic_pm_domain *child = link->child;
1091+
if (child->state_idx < child->state_count - 1)
1092+
return;
1093+
}
1094+
10761095
/* Choose the deepest state when suspending */
10771096
genpd->state_idx = genpd->state_count - 1;
10781097
if (_genpd_power_off(genpd, false))
@@ -2058,9 +2077,9 @@ static int genpd_remove(struct generic_pm_domain *genpd)
20582077
kfree(link);
20592078
}
20602079

2061-
genpd_debug_remove(genpd);
20622080
list_del(&genpd->gpd_list_node);
20632081
genpd_unlock(genpd);
2082+
genpd_debug_remove(genpd);
20642083
cancel_work_sync(&genpd->power_off_work);
20652084
if (genpd_is_cpu_domain(genpd))
20662085
free_cpumask_var(genpd->cpus);
@@ -2248,12 +2267,8 @@ int of_genpd_add_provider_simple(struct device_node *np,
22482267
/* Parse genpd OPP table */
22492268
if (genpd->set_performance_state) {
22502269
ret = dev_pm_opp_of_add_table(&genpd->dev);
2251-
if (ret) {
2252-
if (ret != -EPROBE_DEFER)
2253-
dev_err(&genpd->dev, "Failed to add OPP table: %d\n",
2254-
ret);
2255-
return ret;
2256-
}
2270+
if (ret)
2271+
return dev_err_probe(&genpd->dev, ret, "Failed to add OPP table\n");
22572272

22582273
/*
22592274
* Save table for faster processing while setting performance
@@ -2312,9 +2327,8 @@ int of_genpd_add_provider_onecell(struct device_node *np,
23122327
if (genpd->set_performance_state) {
23132328
ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i);
23142329
if (ret) {
2315-
if (ret != -EPROBE_DEFER)
2316-
dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n",
2317-
i, ret);
2330+
dev_err_probe(&genpd->dev, ret,
2331+
"Failed to add OPP table for index %d\n", i);
23182332
goto error;
23192333
}
23202334

@@ -2672,12 +2686,8 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
26722686
ret = genpd_add_device(pd, dev, base_dev);
26732687
mutex_unlock(&gpd_list_lock);
26742688

2675-
if (ret < 0) {
2676-
if (ret != -EPROBE_DEFER)
2677-
dev_err(dev, "failed to add to PM domain %s: %d",
2678-
pd->name, ret);
2679-
return ret;
2680-
}
2689+
if (ret < 0)
2690+
return dev_err_probe(dev, ret, "failed to add to PM domain %s\n", pd->name);
26812691

26822692
dev->pm_domain->detach = genpd_dev_pm_detach;
26832693
dev->pm_domain->sync = genpd_dev_pm_sync;

drivers/base/power/main.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
485485
trace_device_pm_callback_start(dev, info, state.event);
486486
error = cb(dev);
487487
trace_device_pm_callback_end(dev, error);
488-
suspend_report_result(cb, error);
488+
suspend_report_result(dev, cb, error);
489489

490490
initcall_debug_report(dev, calltime, cb, error);
491491

@@ -1568,7 +1568,7 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
15681568
trace_device_pm_callback_start(dev, info, state.event);
15691569
error = cb(dev, state);
15701570
trace_device_pm_callback_end(dev, error);
1571-
suspend_report_result(cb, error);
1571+
suspend_report_result(dev, cb, error);
15721572

15731573
initcall_debug_report(dev, calltime, cb, error);
15741574

@@ -1855,7 +1855,7 @@ static int device_prepare(struct device *dev, pm_message_t state)
18551855
device_unlock(dev);
18561856

18571857
if (ret < 0) {
1858-
suspend_report_result(callback, ret);
1858+
suspend_report_result(dev, callback, ret);
18591859
pm_runtime_put(dev);
18601860
return ret;
18611861
}
@@ -1960,10 +1960,10 @@ int dpm_suspend_start(pm_message_t state)
19601960
}
19611961
EXPORT_SYMBOL_GPL(dpm_suspend_start);
19621962

1963-
void __suspend_report_result(const char *function, void *fn, int ret)
1963+
void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret)
19641964
{
19651965
if (ret)
1966-
pr_err("%s(): %pS returns %d\n", function, fn, ret);
1966+
dev_err(dev, "%s(): %pS returns %d\n", function, fn, ret);
19671967
}
19681968
EXPORT_SYMBOL_GPL(__suspend_report_result);
19691969

drivers/base/power/wakeirq.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq);
289289
*
290290
* Enables wakeirq conditionally. We need to enable wake-up interrupt
291291
* lazily on the first rpm_suspend(). This is needed as the consumer device
292-
* starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would
292+
* starts in RPM_SUSPENDED state, and the first pm_runtime_get() would
293293
* otherwise try to disable already disabled wakeirq. The wake-up interrupt
294294
* starts disabled with IRQ_NOAUTOEN set.
295295
*

drivers/base/power/wakeup.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
587587
* @ws: Wakeup source to handle.
588588
*
589589
* Update the @ws' statistics and, if @ws has just been activated, notify the PM
590-
* core of the event by incrementing the counter of of wakeup events being
590+
* core of the event by incrementing the counter of the wakeup events being
591591
* processed.
592592
*/
593593
static void wakeup_source_activate(struct wakeup_source *ws)
@@ -733,7 +733,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
733733

734734
/*
735735
* Increment the counter of registered wakeup events and decrement the
736-
* couter of wakeup events in progress simultaneously.
736+
* counter of wakeup events in progress simultaneously.
737737
*/
738738
cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
739739
trace_wakeup_source_deactivate(ws->name, cec);

drivers/pci/pci-driver.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state)
596596
int error;
597597

598598
error = drv->suspend(pci_dev, state);
599-
suspend_report_result(drv->suspend, error);
599+
suspend_report_result(dev, drv->suspend, error);
600600
if (error)
601601
return error;
602602

@@ -775,7 +775,7 @@ static int pci_pm_suspend(struct device *dev)
775775
int error;
776776

777777
error = pm->suspend(dev);
778-
suspend_report_result(pm->suspend, error);
778+
suspend_report_result(dev, pm->suspend, error);
779779
if (error)
780780
return error;
781781

@@ -821,7 +821,7 @@ static int pci_pm_suspend_noirq(struct device *dev)
821821
int error;
822822

823823
error = pm->suspend_noirq(dev);
824-
suspend_report_result(pm->suspend_noirq, error);
824+
suspend_report_result(dev, pm->suspend_noirq, error);
825825
if (error)
826826
return error;
827827

@@ -1010,7 +1010,7 @@ static int pci_pm_freeze(struct device *dev)
10101010
int error;
10111011

10121012
error = pm->freeze(dev);
1013-
suspend_report_result(pm->freeze, error);
1013+
suspend_report_result(dev, pm->freeze, error);
10141014
if (error)
10151015
return error;
10161016
}
@@ -1030,7 +1030,7 @@ static int pci_pm_freeze_noirq(struct device *dev)
10301030
int error;
10311031

10321032
error = pm->freeze_noirq(dev);
1033-
suspend_report_result(pm->freeze_noirq, error);
1033+
suspend_report_result(dev, pm->freeze_noirq, error);
10341034
if (error)
10351035
return error;
10361036
}
@@ -1116,7 +1116,7 @@ static int pci_pm_poweroff(struct device *dev)
11161116
int error;
11171117

11181118
error = pm->poweroff(dev);
1119-
suspend_report_result(pm->poweroff, error);
1119+
suspend_report_result(dev, pm->poweroff, error);
11201120
if (error)
11211121
return error;
11221122
}
@@ -1154,7 +1154,7 @@ static int pci_pm_poweroff_noirq(struct device *dev)
11541154
int error;
11551155

11561156
error = pm->poweroff_noirq(dev);
1157-
suspend_report_result(pm->poweroff_noirq, error);
1157+
suspend_report_result(dev, pm->poweroff_noirq, error);
11581158
if (error)
11591159
return error;
11601160
}

drivers/pnp/driver.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ static int __pnp_bus_suspend(struct device *dev, pm_message_t state)
171171

172172
if (pnp_drv->driver.pm && pnp_drv->driver.pm->suspend) {
173173
error = pnp_drv->driver.pm->suspend(dev);
174-
suspend_report_result(pnp_drv->driver.pm->suspend, error);
174+
suspend_report_result(dev, pnp_drv->driver.pm->suspend, error);
175175
if (error)
176176
return error;
177177
}

0 commit comments

Comments
 (0)