Skip to content

Commit a60a556

Browse files
Sumit Guptavireshk
authored andcommitted
cpufreq: tegra194: use refclk delta based loop instead of udelay
Use reference clock count based loop instead of "udelay()" for sampling of counters to improve the accuracy of re-generated CPU frequency. "udelay()" internally calls "WFE" which stops the counters and results in bigger delta between the last set freq and the re-generated value from counters. The counter sampling window used in loop is the minimum number of reference clock cycles which is known to give a stable value of CPU frequency. The change also helps to reduce the sampling window from "500us" to "<50us". Suggested-by: Antti Miettinen <amiettinen@nvidia.com> Signed-off-by: Sumit Gupta <sumitg@nvidia.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
1 parent 6b121b4 commit a60a556

1 file changed

Lines changed: 55 additions & 17 deletions

File tree

drivers/cpufreq/tegra194-cpufreq.c

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
#include <linux/cpu.h>
77
#include <linux/cpufreq.h>
8-
#include <linux/delay.h>
98
#include <linux/dma-mapping.h>
109
#include <linux/module.h>
1110
#include <linux/of.h>
@@ -21,10 +20,11 @@
2120

2221
#define KHZ 1000
2322
#define REF_CLK_MHZ 408 /* 408 MHz */
24-
#define US_DELAY 500
2523
#define CPUFREQ_TBL_STEP_HZ (50 * KHZ * KHZ)
2624
#define MAX_CNT ~0U
2725

26+
#define MAX_DELTA_KHZ 115200
27+
2828
#define NDIV_MASK 0x1FF
2929

3030
#define CORE_OFFSET(cpu) (cpu * 8)
@@ -68,6 +68,7 @@ struct tegra_cpufreq_soc {
6868
int maxcpus_per_cluster;
6969
unsigned int num_clusters;
7070
phys_addr_t actmon_cntr_base;
71+
u32 refclk_delta_min;
7172
};
7273

7374
struct tegra194_cpufreq_data {
@@ -149,6 +150,8 @@ static void tegra234_read_counters(struct tegra_cpu_ctr *c)
149150
{
150151
struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
151152
void __iomem *actmon_reg;
153+
u32 delta_refcnt;
154+
int cnt = 0;
152155
u64 val;
153156

154157
actmon_reg = CORE_ACTMON_CNTR_REG(data, data->cpu_data[c->cpu].clusterid,
@@ -157,10 +160,25 @@ static void tegra234_read_counters(struct tegra_cpu_ctr *c)
157160
val = readq(actmon_reg);
158161
c->last_refclk_cnt = upper_32_bits(val);
159162
c->last_coreclk_cnt = lower_32_bits(val);
160-
udelay(US_DELAY);
161-
val = readq(actmon_reg);
162-
c->refclk_cnt = upper_32_bits(val);
163-
c->coreclk_cnt = lower_32_bits(val);
163+
164+
/*
165+
* The sampling window is based on the minimum number of reference
166+
* clock cycles which is known to give a stable value of CPU frequency.
167+
*/
168+
do {
169+
val = readq(actmon_reg);
170+
c->refclk_cnt = upper_32_bits(val);
171+
c->coreclk_cnt = lower_32_bits(val);
172+
if (c->refclk_cnt < c->last_refclk_cnt)
173+
delta_refcnt = c->refclk_cnt + (MAX_CNT - c->last_refclk_cnt);
174+
else
175+
delta_refcnt = c->refclk_cnt - c->last_refclk_cnt;
176+
if (++cnt >= 0xFFFF) {
177+
pr_warn("cpufreq: problem with refclk on cpu:%d, delta_refcnt:%u, cnt:%d\n",
178+
c->cpu, delta_refcnt, cnt);
179+
break;
180+
}
181+
} while (delta_refcnt < data->soc->refclk_delta_min);
164182
}
165183

166184
static struct tegra_cpufreq_ops tegra234_cpufreq_ops = {
@@ -175,13 +193,15 @@ static const struct tegra_cpufreq_soc tegra234_cpufreq_soc = {
175193
.actmon_cntr_base = 0x9000,
176194
.maxcpus_per_cluster = 4,
177195
.num_clusters = 3,
196+
.refclk_delta_min = 16000,
178197
};
179198

180199
static const struct tegra_cpufreq_soc tegra239_cpufreq_soc = {
181200
.ops = &tegra234_cpufreq_ops,
182201
.actmon_cntr_base = 0x4000,
183202
.maxcpus_per_cluster = 8,
184203
.num_clusters = 1,
204+
.refclk_delta_min = 16000,
185205
};
186206

187207
static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid)
@@ -222,15 +242,33 @@ static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response
222242

223243
static void tegra194_read_counters(struct tegra_cpu_ctr *c)
224244
{
245+
struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
246+
u32 delta_refcnt;
247+
int cnt = 0;
225248
u64 val;
226249

227250
val = read_freq_feedback();
228251
c->last_refclk_cnt = lower_32_bits(val);
229252
c->last_coreclk_cnt = upper_32_bits(val);
230-
udelay(US_DELAY);
231-
val = read_freq_feedback();
232-
c->refclk_cnt = lower_32_bits(val);
233-
c->coreclk_cnt = upper_32_bits(val);
253+
254+
/*
255+
* The sampling window is based on the minimum number of reference
256+
* clock cycles which is known to give a stable value of CPU frequency.
257+
*/
258+
do {
259+
val = read_freq_feedback();
260+
c->refclk_cnt = lower_32_bits(val);
261+
c->coreclk_cnt = upper_32_bits(val);
262+
if (c->refclk_cnt < c->last_refclk_cnt)
263+
delta_refcnt = c->refclk_cnt + (MAX_CNT - c->last_refclk_cnt);
264+
else
265+
delta_refcnt = c->refclk_cnt - c->last_refclk_cnt;
266+
if (++cnt >= 0xFFFF) {
267+
pr_warn("cpufreq: problem with refclk on cpu:%d, delta_refcnt:%u, cnt:%d\n",
268+
c->cpu, delta_refcnt, cnt);
269+
break;
270+
}
271+
} while (delta_refcnt < data->soc->refclk_delta_min);
234272
}
235273

236274
static void tegra_read_counters(struct work_struct *work)
@@ -288,9 +326,8 @@ static unsigned int tegra194_calculate_speed(u32 cpu)
288326
u32 rate_mhz;
289327

290328
/*
291-
* udelay() is required to reconstruct cpu frequency over an
292-
* observation window. Using workqueue to call udelay() with
293-
* interrupts enabled.
329+
* Reconstruct cpu frequency over an observation/sampling window.
330+
* Using workqueue to keep interrupts enabled during the interval.
294331
*/
295332
read_counters_work.c.cpu = cpu;
296333
INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
@@ -372,9 +409,9 @@ static unsigned int tegra194_get_speed(u32 cpu)
372409
if (pos->driver_data != ndiv)
373410
continue;
374411

375-
if (abs(pos->frequency - rate) > 115200) {
376-
pr_warn("cpufreq: cpu%d,cur:%u,set:%u,set ndiv:%llu\n",
377-
cpu, rate, pos->frequency, ndiv);
412+
if (abs(pos->frequency - rate) > MAX_DELTA_KHZ) {
413+
pr_warn("cpufreq: cpu%d,cur:%u,set:%u,delta:%d,set ndiv:%llu\n",
414+
cpu, rate, pos->frequency, abs(rate - pos->frequency), ndiv);
378415
} else {
379416
rate = pos->frequency;
380417
}
@@ -568,6 +605,7 @@ static const struct tegra_cpufreq_soc tegra194_cpufreq_soc = {
568605
.ops = &tegra194_cpufreq_ops,
569606
.maxcpus_per_cluster = 2,
570607
.num_clusters = 4,
608+
.refclk_delta_min = 16000,
571609
};
572610

573611
static void tegra194_cpufreq_free_resources(void)
@@ -684,7 +722,7 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev)
684722

685723
soc = of_device_get_match_data(&pdev->dev);
686724

687-
if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters) {
725+
if (soc->ops && soc->maxcpus_per_cluster && soc->num_clusters && soc->refclk_delta_min) {
688726
data->soc = soc;
689727
} else {
690728
dev_err(&pdev->dev, "soc data missing\n");

0 commit comments

Comments
 (0)