Skip to content

Commit f5bfa23

Browse files
atishp04palmer-dabbelt
authored andcommitted
RISC-V: Add a perf core library for pmu drivers
Implement a perf core library that can support all the essential perf features in future. It can also accommodate any type of PMU implementation in future. Currently, both SBI based perf driver and legacy driver implemented uses the library. Most of the common perf functionalities are kept in this core library wile PMU specific driver can implement PMU specific features. For example, the SBI specific functionality will be implemented in the SBI specific driver. Reviewed-by: Anup Patel <anup@brainfault.org> Signed-off-by: Atish Patra <atish.patra@wdc.com> Signed-off-by: Atish Patra <atishp@rivosinc.com> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent c631121 commit f5bfa23

4 files changed

Lines changed: 398 additions & 0 deletions

File tree

drivers/perf/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,16 @@ config ARM_PMU
5656
Say y if you want to use CPU performance monitors on ARM-based
5757
systems.
5858

59+
config RISCV_PMU
60+
depends on RISCV
61+
bool "RISC-V PMU framework"
62+
default y
63+
help
64+
Say y if you want to use CPU performance monitors on RISCV-based
65+
systems. This provides the core PMU framework that abstracts common
66+
PMU functionalities in a core library so that different PMU drivers
67+
can reuse it.
68+
5969
config ARM_PMU_ACPI
6070
depends on ARM_PMU && ACPI
6171
def_bool y

drivers/perf/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
1010
obj-$(CONFIG_HISI_PMU) += hisilicon/
1111
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
1212
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
13+
obj-$(CONFIG_RISCV_PMU) += riscv_pmu.o
1314
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
1415
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
1516
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o

drivers/perf/riscv_pmu.c

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* RISC-V performance counter support.
4+
*
5+
* Copyright (C) 2021 Western Digital Corporation or its affiliates.
6+
*
7+
* This implementation is based on old RISC-V perf and ARM perf event code
8+
* which are in turn based on sparc64 and x86 code.
9+
*/
10+
11+
#include <linux/cpumask.h>
12+
#include <linux/irq.h>
13+
#include <linux/irqdesc.h>
14+
#include <linux/perf/riscv_pmu.h>
15+
#include <linux/printk.h>
16+
#include <linux/smp.h>
17+
18+
static unsigned long csr_read_num(int csr_num)
19+
{
20+
#define switchcase_csr_read(__csr_num, __val) {\
21+
case __csr_num: \
22+
__val = csr_read(__csr_num); \
23+
break; }
24+
#define switchcase_csr_read_2(__csr_num, __val) {\
25+
switchcase_csr_read(__csr_num + 0, __val) \
26+
switchcase_csr_read(__csr_num + 1, __val)}
27+
#define switchcase_csr_read_4(__csr_num, __val) {\
28+
switchcase_csr_read_2(__csr_num + 0, __val) \
29+
switchcase_csr_read_2(__csr_num + 2, __val)}
30+
#define switchcase_csr_read_8(__csr_num, __val) {\
31+
switchcase_csr_read_4(__csr_num + 0, __val) \
32+
switchcase_csr_read_4(__csr_num + 4, __val)}
33+
#define switchcase_csr_read_16(__csr_num, __val) {\
34+
switchcase_csr_read_8(__csr_num + 0, __val) \
35+
switchcase_csr_read_8(__csr_num + 8, __val)}
36+
#define switchcase_csr_read_32(__csr_num, __val) {\
37+
switchcase_csr_read_16(__csr_num + 0, __val) \
38+
switchcase_csr_read_16(__csr_num + 16, __val)}
39+
40+
unsigned long ret = 0;
41+
42+
switch (csr_num) {
43+
switchcase_csr_read_32(CSR_CYCLE, ret)
44+
switchcase_csr_read_32(CSR_CYCLEH, ret)
45+
default :
46+
break;
47+
}
48+
49+
return ret;
50+
#undef switchcase_csr_read_32
51+
#undef switchcase_csr_read_16
52+
#undef switchcase_csr_read_8
53+
#undef switchcase_csr_read_4
54+
#undef switchcase_csr_read_2
55+
#undef switchcase_csr_read
56+
}
57+
58+
/*
59+
* Read the CSR of a corresponding counter.
60+
*/
61+
unsigned long riscv_pmu_ctr_read_csr(unsigned long csr)
62+
{
63+
if (csr < CSR_CYCLE || csr > CSR_HPMCOUNTER31H ||
64+
(csr > CSR_HPMCOUNTER31 && csr < CSR_CYCLEH)) {
65+
pr_err("Invalid performance counter csr %lx\n", csr);
66+
return -EINVAL;
67+
}
68+
69+
return csr_read_num(csr);
70+
}
71+
72+
u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
73+
{
74+
int cwidth;
75+
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
76+
struct hw_perf_event *hwc = &event->hw;
77+
78+
if (!rvpmu->ctr_get_width)
79+
/**
80+
* If the pmu driver doesn't support counter width, set it to default
81+
* maximum allowed by the specification.
82+
*/
83+
cwidth = 63;
84+
else {
85+
if (hwc->idx == -1)
86+
/* Handle init case where idx is not initialized yet */
87+
cwidth = rvpmu->ctr_get_width(0);
88+
else
89+
cwidth = rvpmu->ctr_get_width(hwc->idx);
90+
}
91+
92+
return GENMASK_ULL(cwidth, 0);
93+
}
94+
95+
u64 riscv_pmu_event_update(struct perf_event *event)
96+
{
97+
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
98+
struct hw_perf_event *hwc = &event->hw;
99+
u64 prev_raw_count, new_raw_count;
100+
unsigned long cmask;
101+
u64 oldval, delta;
102+
103+
if (!rvpmu->ctr_read)
104+
return 0;
105+
106+
cmask = riscv_pmu_ctr_get_width_mask(event);
107+
108+
do {
109+
prev_raw_count = local64_read(&hwc->prev_count);
110+
new_raw_count = rvpmu->ctr_read(event);
111+
oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
112+
new_raw_count);
113+
} while (oldval != prev_raw_count);
114+
115+
delta = (new_raw_count - prev_raw_count) & cmask;
116+
local64_add(delta, &event->count);
117+
local64_sub(delta, &hwc->period_left);
118+
119+
return delta;
120+
}
121+
122+
static void riscv_pmu_stop(struct perf_event *event, int flags)
123+
{
124+
struct hw_perf_event *hwc = &event->hw;
125+
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
126+
127+
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
128+
129+
if (!(hwc->state & PERF_HES_STOPPED)) {
130+
if (rvpmu->ctr_stop) {
131+
rvpmu->ctr_stop(event, 0);
132+
hwc->state |= PERF_HES_STOPPED;
133+
}
134+
riscv_pmu_event_update(event);
135+
hwc->state |= PERF_HES_UPTODATE;
136+
}
137+
}
138+
139+
int riscv_pmu_event_set_period(struct perf_event *event)
140+
{
141+
struct hw_perf_event *hwc = &event->hw;
142+
s64 left = local64_read(&hwc->period_left);
143+
s64 period = hwc->sample_period;
144+
int overflow = 0;
145+
uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
146+
147+
if (unlikely(left <= -period)) {
148+
left = period;
149+
local64_set(&hwc->period_left, left);
150+
hwc->last_period = period;
151+
overflow = 1;
152+
}
153+
154+
if (unlikely(left <= 0)) {
155+
left += period;
156+
local64_set(&hwc->period_left, left);
157+
hwc->last_period = period;
158+
overflow = 1;
159+
}
160+
161+
/*
162+
* Limit the maximum period to prevent the counter value
163+
* from overtaking the one we are about to program. In
164+
* effect we are reducing max_period to account for
165+
* interrupt latency (and we are being very conservative).
166+
*/
167+
if (left > (max_period >> 1))
168+
left = (max_period >> 1);
169+
170+
local64_set(&hwc->prev_count, (u64)-left);
171+
perf_event_update_userpage(event);
172+
173+
return overflow;
174+
}
175+
176+
static void riscv_pmu_start(struct perf_event *event, int flags)
177+
{
178+
struct hw_perf_event *hwc = &event->hw;
179+
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
180+
uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
181+
u64 init_val;
182+
183+
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
184+
return;
185+
186+
if (flags & PERF_EF_RELOAD)
187+
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
188+
189+
hwc->state = 0;
190+
riscv_pmu_event_set_period(event);
191+
init_val = local64_read(&hwc->prev_count) & max_period;
192+
rvpmu->ctr_start(event, init_val);
193+
perf_event_update_userpage(event);
194+
}
195+
196+
static int riscv_pmu_add(struct perf_event *event, int flags)
197+
{
198+
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
199+
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
200+
struct hw_perf_event *hwc = &event->hw;
201+
int idx;
202+
203+
idx = rvpmu->ctr_get_idx(event);
204+
if (idx < 0)
205+
return idx;
206+
207+
hwc->idx = idx;
208+
cpuc->events[idx] = event;
209+
cpuc->n_events++;
210+
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
211+
if (flags & PERF_EF_START)
212+
riscv_pmu_start(event, PERF_EF_RELOAD);
213+
214+
/* Propagate our changes to the userspace mapping. */
215+
perf_event_update_userpage(event);
216+
217+
return 0;
218+
}
219+
220+
static void riscv_pmu_del(struct perf_event *event, int flags)
221+
{
222+
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
223+
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
224+
struct hw_perf_event *hwc = &event->hw;
225+
226+
riscv_pmu_stop(event, PERF_EF_UPDATE);
227+
cpuc->events[hwc->idx] = NULL;
228+
/* The firmware need to reset the counter mapping */
229+
if (rvpmu->ctr_stop)
230+
rvpmu->ctr_stop(event, RISCV_PMU_STOP_FLAG_RESET);
231+
cpuc->n_events--;
232+
if (rvpmu->ctr_clear_idx)
233+
rvpmu->ctr_clear_idx(event);
234+
perf_event_update_userpage(event);
235+
hwc->idx = -1;
236+
}
237+
238+
static void riscv_pmu_read(struct perf_event *event)
239+
{
240+
riscv_pmu_event_update(event);
241+
}
242+
243+
static int riscv_pmu_event_init(struct perf_event *event)
244+
{
245+
struct hw_perf_event *hwc = &event->hw;
246+
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
247+
int mapped_event;
248+
u64 event_config = 0;
249+
uint64_t cmask;
250+
251+
hwc->flags = 0;
252+
mapped_event = rvpmu->event_map(event, &event_config);
253+
if (mapped_event < 0) {
254+
pr_debug("event %x:%llx not supported\n", event->attr.type,
255+
event->attr.config);
256+
return mapped_event;
257+
}
258+
259+
/*
260+
* idx is set to -1 because the index of a general event should not be
261+
* decided until binding to some counter in pmu->add().
262+
* config will contain the information about counter CSR
263+
* the idx will contain the counter index
264+
*/
265+
hwc->config = event_config;
266+
hwc->idx = -1;
267+
hwc->event_base = mapped_event;
268+
269+
if (!is_sampling_event(event)) {
270+
/*
271+
* For non-sampling runs, limit the sample_period to half
272+
* of the counter width. That way, the new counter value
273+
* is far less likely to overtake the previous one unless
274+
* you have some serious IRQ latency issues.
275+
*/
276+
cmask = riscv_pmu_ctr_get_width_mask(event);
277+
hwc->sample_period = cmask >> 1;
278+
hwc->last_period = hwc->sample_period;
279+
local64_set(&hwc->period_left, hwc->sample_period);
280+
}
281+
282+
return 0;
283+
}
284+
285+
struct riscv_pmu *riscv_pmu_alloc(void)
286+
{
287+
struct riscv_pmu *pmu;
288+
int cpuid, i;
289+
struct cpu_hw_events *cpuc;
290+
291+
pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
292+
if (!pmu)
293+
goto out;
294+
295+
pmu->hw_events = alloc_percpu_gfp(struct cpu_hw_events, GFP_KERNEL);
296+
if (!pmu->hw_events) {
297+
pr_info("failed to allocate per-cpu PMU data.\n");
298+
goto out_free_pmu;
299+
}
300+
301+
for_each_possible_cpu(cpuid) {
302+
cpuc = per_cpu_ptr(pmu->hw_events, cpuid);
303+
cpuc->n_events = 0;
304+
for (i = 0; i < RISCV_MAX_COUNTERS; i++)
305+
cpuc->events[i] = NULL;
306+
}
307+
pmu->pmu = (struct pmu) {
308+
.event_init = riscv_pmu_event_init,
309+
.add = riscv_pmu_add,
310+
.del = riscv_pmu_del,
311+
.start = riscv_pmu_start,
312+
.stop = riscv_pmu_stop,
313+
.read = riscv_pmu_read,
314+
};
315+
316+
return pmu;
317+
318+
out_free_pmu:
319+
kfree(pmu);
320+
out:
321+
return NULL;
322+
}

0 commit comments

Comments
 (0)