Skip to content

Commit 816c267

Browse files
James-A-Clarkwilldeacon
authored andcommitted
arm64: perf: Add support for event counting threshold
FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on events whose count meets a specified threshold condition. For example if PMEVTYPERn.TC (Threshold Control) is set to 0b101 (Greater than or equal, count), and the threshold is set to 2, then the PMU counter will now only increment by 1 when an event would have previously incremented the PMU counter by 2 or more on a single processor cycle. Three new Perf event config fields, 'threshold', 'threshold_compare' and 'threshold_count' have been added to control the feature. threshold_compare maps to the upper two bits of PMEVTYPERn.TC and threshold_count maps to the first bit of TC. These separate attributes have been picked rather than enumerating all the possible combinations of the TC field as in the Arm ARM. The attributes would be used on a Perf command line like this: $ perf stat -e stall_slot/threshold=2,threshold_compare=2/ A new capability for reading out the maximum supported threshold value has also been added: $ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max 0x000000ff If a threshold higher than threshold_max is provided, then an error is generated. If FEAT_PMUv3_TH isn't implemented or a 32 bit kernel is running, then threshold_max reads zero, and attempting to set a threshold value will also result in an error. The threshold is per PMU counter, and there are potentially different threshold_max values per PMU type on heterogeneous systems. Bits higher than 32 now need to be written into PMEVTYPER, so armv8pmu_write_evtype() has to be updated to take an unsigned long value rather than u32 which gives the correct behavior on both aarch32 and 64. Signed-off-by: James Clark <james.clark@arm.com> Link: https://lore.kernel.org/r/20231211161331.1277825-11-james.clark@arm.com Signed-off-by: Will Deacon <will@kernel.org>
1 parent 186c91a commit 816c267

2 files changed

Lines changed: 79 additions & 1 deletion

File tree

drivers/perf/arm_pmuv3.c

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,22 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = {
309309
#define ATTR_CFG_FLD_rdpmc_CFG config1
310310
#define ATTR_CFG_FLD_rdpmc_LO 1
311311
#define ATTR_CFG_FLD_rdpmc_HI 1
312+
#define ATTR_CFG_FLD_threshold_count_CFG config1 /* PMEVTYPER.TC[0] */
313+
#define ATTR_CFG_FLD_threshold_count_LO 2
314+
#define ATTR_CFG_FLD_threshold_count_HI 2
315+
#define ATTR_CFG_FLD_threshold_compare_CFG config1 /* PMEVTYPER.TC[2:1] */
316+
#define ATTR_CFG_FLD_threshold_compare_LO 3
317+
#define ATTR_CFG_FLD_threshold_compare_HI 4
318+
#define ATTR_CFG_FLD_threshold_CFG config1 /* PMEVTYPER.TH */
319+
#define ATTR_CFG_FLD_threshold_LO 5
320+
#define ATTR_CFG_FLD_threshold_HI 16
312321

313322
GEN_PMU_FORMAT_ATTR(event);
314323
GEN_PMU_FORMAT_ATTR(long);
315324
GEN_PMU_FORMAT_ATTR(rdpmc);
325+
GEN_PMU_FORMAT_ATTR(threshold_count);
326+
GEN_PMU_FORMAT_ATTR(threshold_compare);
327+
GEN_PMU_FORMAT_ATTR(threshold);
316328

317329
static int sysctl_perf_user_access __read_mostly;
318330

@@ -326,10 +338,27 @@ static bool armv8pmu_event_want_user_access(struct perf_event *event)
326338
return ATTR_CFG_GET_FLD(&event->attr, rdpmc);
327339
}
328340

341+
static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
342+
{
343+
u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare);
344+
u8 th_count = ATTR_CFG_GET_FLD(attr, threshold_count);
345+
346+
/*
347+
* The count bit is always the bottom bit of the full control field, and
348+
* the comparison is the upper two bits, but it's not explicitly
349+
* labelled in the Arm ARM. For the Perf interface we split it into two
350+
* fields, so reconstruct it here.
351+
*/
352+
return (th_compare << 1) | th_count;
353+
}
354+
329355
static struct attribute *armv8_pmuv3_format_attrs[] = {
330356
&format_attr_event.attr,
331357
&format_attr_long.attr,
332358
&format_attr_rdpmc.attr,
359+
&format_attr_threshold.attr,
360+
&format_attr_threshold_compare.attr,
361+
&format_attr_threshold_count.attr,
333362
NULL,
334363
};
335364

@@ -379,10 +408,38 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
379408

380409
static DEVICE_ATTR_RO(bus_width);
381410

411+
static u32 threshold_max(struct arm_pmu *cpu_pmu)
412+
{
413+
/*
414+
* PMMIR.THWIDTH is readable and non-zero on aarch32, but it would be
415+
* impossible to write the threshold in the upper 32 bits of PMEVTYPER.
416+
*/
417+
if (IS_ENABLED(CONFIG_ARM))
418+
return 0;
419+
420+
/*
421+
* The largest value that can be written to PMEVTYPER<n>_EL0.TH is
422+
* (2 ^ PMMIR.THWIDTH) - 1.
423+
*/
424+
return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
425+
}
426+
427+
static ssize_t threshold_max_show(struct device *dev,
428+
struct device_attribute *attr, char *page)
429+
{
430+
struct pmu *pmu = dev_get_drvdata(dev);
431+
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
432+
433+
return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
434+
}
435+
436+
static DEVICE_ATTR_RO(threshold_max);
437+
382438
static struct attribute *armv8_pmuv3_caps_attrs[] = {
383439
&dev_attr_slots.attr,
384440
&dev_attr_bus_slots.attr,
385441
&dev_attr_bus_width.attr,
442+
&dev_attr_threshold_max.attr,
386443
NULL,
387444
};
388445

@@ -566,7 +623,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
566623
armv8pmu_write_hw_counter(event, value);
567624
}
568625

569-
static void armv8pmu_write_evtype(int idx, u32 val)
626+
static void armv8pmu_write_evtype(int idx, unsigned long val)
570627
{
571628
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
572629
unsigned long mask = ARMV8_PMU_EVTYPE_EVENT |
@@ -935,6 +992,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
935992
struct perf_event_attr *attr)
936993
{
937994
unsigned long config_base = 0;
995+
struct perf_event *perf_event = container_of(attr, struct perf_event,
996+
attr);
997+
struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
998+
u32 th;
938999

9391000
if (attr->exclude_idle) {
9401001
pr_debug("ARM performance counters do not support mode exclusion\n");
@@ -968,6 +1029,22 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
9681029
if (attr->exclude_user)
9691030
config_base |= ARMV8_PMU_EXCLUDE_EL0;
9701031

1032+
/*
1033+
* If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will
1034+
* be 0 and will also trigger this check, preventing it from being used.
1035+
*/
1036+
th = ATTR_CFG_GET_FLD(attr, threshold);
1037+
if (th > threshold_max(cpu_pmu)) {
1038+
pr_debug("PMU event threshold exceeds max value\n");
1039+
return -EINVAL;
1040+
}
1041+
1042+
if (IS_ENABLED(CONFIG_ARM64) && th) {
1043+
config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
1044+
config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
1045+
armv8pmu_event_threshold_control(attr));
1046+
}
1047+
9711048
/*
9721049
* Install the filter into config_base as this is used to
9731050
* construct the event type.

include/linux/perf/arm_pmuv3.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@
262262
#define ARMV8_PMU_SLOTS GENMASK(7, 0)
263263
#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8)
264264
#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16)
265+
#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
265266

266267
/*
267268
* This code is really good

0 commit comments

Comments
 (0)