Skip to content

Commit 3374491

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel: Support branch counters logging
The branch counters logging (A.K.A LBR event logging) introduces a per-counter indication of precise event occurrences in LBRs. It can provide a means to attribute exposed retirement latency to combinations of events across a block of instructions. It also provides a means of attributing Timed LBR latencies to events. The feature is first introduced on SRF/GRR. It is an enhancement of the ARCH LBR. It adds new fields in the LBR_INFO MSRs to log the occurrences of events on the GP counters. The information is displayed by the order of counters. The design proposed in this patch requires that the events which are logged must be in a group with the event that has LBR. If there are more than one LBR group, the counters logging information only from the current group (overflowed) are stored for the perf tool, otherwise the perf tool cannot know which and when other groups are scheduled especially when multiplexing is triggered. The user can ensure it uses the maximum number of counters that support LBR info (4 by now) by making the group large enough. The HW only logs events by the order of counters. The order may be different from the order of enabling which the perf tool can understand. When parsing the information of each branch entry, convert the counter order to the enabled order, and store the enabled order in the extension space. Unconditionally reset LBRs for an LBR event group when it's deleted. The logged counter information is only valid for the current LBR group. If another LBR group is scheduled later, the information from the stale LBRs would be otherwise wrongly interpreted. Add a sanity check in intel_pmu_hw_config(). Disable the feature if other counter filters (inv, cmask, edge, in_tx) are set or LBR call stack mode is enabled. (For the LBR call stack mode, we cannot simply flush the LBR, since it will break the call stack. Also, there is no obvious usage with the call stack mode for now.) Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't require any branch stack setup. Expose the maximum number of supported counters and the width of the counters into the sysfs. The perf tool can use the information to parse the logged counters in each branch. Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20231025201626.3000228-5-kan.liang@linux.intel.com
1 parent 318c498 commit 3374491

8 files changed

Lines changed: 207 additions & 8 deletions

File tree

arch/x86/events/intel/core.c

Lines changed: 97 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2792,6 +2792,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
27922792

27932793
static void intel_pmu_enable_event(struct perf_event *event)
27942794
{
2795+
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
27952796
struct hw_perf_event *hwc = &event->hw;
27962797
int idx = hwc->idx;
27972798

@@ -2800,8 +2801,10 @@ static void intel_pmu_enable_event(struct perf_event *event)
28002801

28012802
switch (idx) {
28022803
case 0 ... INTEL_PMC_IDX_FIXED - 1:
2804+
if (branch_sample_counters(event))
2805+
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
28032806
intel_set_masks(event, idx);
2804-
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
2807+
__x86_pmu_enable_event(hwc, enable_mask);
28052808
break;
28062809
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
28072810
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
@@ -3052,7 +3055,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
30523055
perf_sample_data_init(&data, 0, event->hw.last_period);
30533056

30543057
if (has_branch_stack(event))
3055-
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
3058+
intel_pmu_lbr_save_brstack(&data, cpuc, event);
30563059

30573060
if (perf_event_overflow(event, &data, regs))
30583061
x86_pmu_stop(event, 0);
@@ -3617,6 +3620,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
36173620
if (cpuc->excl_cntrs)
36183621
return intel_get_excl_constraints(cpuc, event, idx, c2);
36193622

3623+
/* Not all counters support the branch counter feature. */
3624+
if (branch_sample_counters(event)) {
3625+
c2 = dyn_constraint(cpuc, c2, idx);
3626+
c2->idxmsk64 &= x86_pmu.lbr_counters;
3627+
c2->weight = hweight64(c2->idxmsk64);
3628+
}
3629+
36203630
return c2;
36213631
}
36223632

@@ -3905,6 +3915,58 @@ static int intel_pmu_hw_config(struct perf_event *event)
39053915
if (needs_branch_stack(event) && is_sampling_event(event))
39063916
event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
39073917

3918+
if (branch_sample_counters(event)) {
3919+
struct perf_event *leader, *sibling;
3920+
int num = 0;
3921+
3922+
if (!(x86_pmu.flags & PMU_FL_BR_CNTR) ||
3923+
(event->attr.config & ~INTEL_ARCH_EVENT_MASK))
3924+
return -EINVAL;
3925+
3926+
/*
3927+
* The branch counter logging is not supported in the call stack
3928+
* mode yet, since we cannot simply flush the LBR during e.g.,
3929+
* multiplexing. Also, there is no obvious usage with the call
3930+
* stack mode. Simply forbids it for now.
3931+
*
3932+
* If any events in the group enable the branch counter logging
3933+
* feature, the group is treated as a branch counter logging
3934+
* group, which requires the extra space to store the counters.
3935+
*/
3936+
leader = event->group_leader;
3937+
if (branch_sample_call_stack(leader))
3938+
return -EINVAL;
3939+
if (branch_sample_counters(leader))
3940+
num++;
3941+
leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
3942+
3943+
for_each_sibling_event(sibling, leader) {
3944+
if (branch_sample_call_stack(sibling))
3945+
return -EINVAL;
3946+
if (branch_sample_counters(sibling))
3947+
num++;
3948+
}
3949+
3950+
if (num > fls(x86_pmu.lbr_counters))
3951+
return -EINVAL;
3952+
/*
3953+
* Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't
3954+
* require any branch stack setup.
3955+
* Clear the bit to avoid unnecessary branch stack setup.
3956+
*/
3957+
if (0 == (event->attr.branch_sample_type &
3958+
~(PERF_SAMPLE_BRANCH_PLM_ALL |
3959+
PERF_SAMPLE_BRANCH_COUNTERS)))
3960+
event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK;
3961+
3962+
/*
3963+
* Force the leader to be a LBR event. So LBRs can be reset
3964+
* with the leader event. See intel_pmu_lbr_del() for details.
3965+
*/
3966+
if (!intel_pmu_needs_branch_stack(leader))
3967+
return -EINVAL;
3968+
}
3969+
39083970
if (intel_pmu_needs_branch_stack(event)) {
39093971
ret = intel_pmu_setup_lbr_filter(event);
39103972
if (ret)
@@ -4383,8 +4445,13 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
43834445
*/
43844446
if (event->attr.precise_ip == 3) {
43854447
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
4386-
if (constraint_match(&fixed0_constraint, event->hw.config))
4387-
return &fixed0_counter0_1_constraint;
4448+
if (constraint_match(&fixed0_constraint, event->hw.config)) {
4449+
/* The fixed counter 0 doesn't support LBR event logging. */
4450+
if (branch_sample_counters(event))
4451+
return &counter0_1_constraint;
4452+
else
4453+
return &fixed0_counter0_1_constraint;
4454+
}
43884455

43894456
switch (c->idxmsk64 & 0x3ull) {
43904457
case 0x1:
@@ -4563,7 +4630,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
45634630
goto err;
45644631
}
45654632

4566-
if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
4633+
if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
45674634
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
45684635

45694636
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -5535,15 +5602,39 @@ static ssize_t branches_show(struct device *cdev,
55355602

55365603
static DEVICE_ATTR_RO(branches);
55375604

5605+
static ssize_t branch_counter_nr_show(struct device *cdev,
5606+
struct device_attribute *attr,
5607+
char *buf)
5608+
{
5609+
return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters));
5610+
}
5611+
5612+
static DEVICE_ATTR_RO(branch_counter_nr);
5613+
5614+
static ssize_t branch_counter_width_show(struct device *cdev,
5615+
struct device_attribute *attr,
5616+
char *buf)
5617+
{
5618+
return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS);
5619+
}
5620+
5621+
static DEVICE_ATTR_RO(branch_counter_width);
5622+
55385623
static struct attribute *lbr_attrs[] = {
55395624
&dev_attr_branches.attr,
5625+
&dev_attr_branch_counter_nr.attr,
5626+
&dev_attr_branch_counter_width.attr,
55405627
NULL
55415628
};
55425629

55435630
static umode_t
55445631
lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
55455632
{
5546-
return x86_pmu.lbr_nr ? attr->mode : 0;
5633+
/* branches */
5634+
if (i == 0)
5635+
return x86_pmu.lbr_nr ? attr->mode : 0;
5636+
5637+
return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0;
55475638
}
55485639

55495640
static char pmu_name_str[30];

arch/x86/events/intel/ds.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
19121912

19131913
if (has_branch_stack(event)) {
19141914
intel_pmu_store_pebs_lbrs(lbr);
1915-
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
1915+
intel_pmu_lbr_save_brstack(data, cpuc, event);
19161916
}
19171917
}
19181918

arch/x86/events/intel/lbr.c

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,25 @@ void intel_pmu_lbr_del(struct perf_event *event)
676676
WARN_ON_ONCE(cpuc->lbr_users < 0);
677677
WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
678678
perf_sched_cb_dec(event->pmu);
679+
680+
/*
681+
* The logged occurrences information is only valid for the
682+
* current LBR group. If another LBR group is scheduled in
683+
* later, the information from the stale LBRs will be wrongly
684+
* interpreted. Reset the LBRs here.
685+
*
686+
* Only clear once for a branch counter group with the leader
687+
* event. Because
688+
* - Cannot simply reset the LBRs with the !cpuc->lbr_users.
689+
* Because it's possible that the last LBR user is not in a
690+
* branch counter group, e.g., a branch_counters group +
691+
* several normal LBR events.
692+
* - The LBR reset can be done with any one of the events in a
693+
* branch counter group, since they are always scheduled together.
694+
* It's easy to force the leader event an LBR event.
695+
*/
696+
if (is_branch_counters_group(event) && event == event->group_leader)
697+
intel_pmu_lbr_reset();
679698
}
680699

681700
static inline bool vlbr_exclude_host(void)
@@ -866,6 +885,8 @@ static __always_inline u16 get_lbr_cycles(u64 info)
866885
return cycles;
867886
}
868887

888+
static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS);
889+
869890
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
870891
struct lbr_entry *entries)
871892
{
@@ -898,11 +919,67 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
898919
e->abort = !!(info & LBR_INFO_ABORT);
899920
e->cycles = get_lbr_cycles(info);
900921
e->type = get_lbr_br_type(info);
922+
923+
/*
924+
* Leverage the reserved field of cpuc->lbr_entries[i] to
925+
* temporarily store the branch counters information.
926+
* The later code will decide what content can be disclosed
927+
* to the perf tool. Pleae see intel_pmu_lbr_counters_reorder().
928+
*/
929+
e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK;
901930
}
902931

903932
cpuc->lbr_stack.nr = i;
904933
}
905934

935+
/*
936+
* The enabled order may be different from the counter order.
937+
* Update the lbr_counters with the enabled order.
938+
*/
939+
static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc,
940+
struct perf_event *event)
941+
{
942+
int i, j, pos = 0, order[X86_PMC_IDX_MAX];
943+
struct perf_event *leader, *sibling;
944+
u64 src, dst, cnt;
945+
946+
leader = event->group_leader;
947+
if (branch_sample_counters(leader))
948+
order[pos++] = leader->hw.idx;
949+
950+
for_each_sibling_event(sibling, leader) {
951+
if (!branch_sample_counters(sibling))
952+
continue;
953+
order[pos++] = sibling->hw.idx;
954+
}
955+
956+
WARN_ON_ONCE(!pos);
957+
958+
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
959+
src = cpuc->lbr_entries[i].reserved;
960+
dst = 0;
961+
for (j = 0; j < pos; j++) {
962+
cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK;
963+
dst |= cnt << j * LBR_INFO_BR_CNTR_BITS;
964+
}
965+
cpuc->lbr_counters[i] = dst;
966+
cpuc->lbr_entries[i].reserved = 0;
967+
}
968+
}
969+
970+
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
971+
struct cpu_hw_events *cpuc,
972+
struct perf_event *event)
973+
{
974+
if (is_branch_counters_group(event)) {
975+
intel_pmu_lbr_counters_reorder(cpuc, event);
976+
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters);
977+
return;
978+
}
979+
980+
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
981+
}
982+
906983
static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
907984
{
908985
intel_pmu_store_lbr(cpuc, NULL);
@@ -1173,8 +1250,10 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
11731250
for (i = 0; i < cpuc->lbr_stack.nr; ) {
11741251
if (!cpuc->lbr_entries[i].from) {
11751252
j = i;
1176-
while (++j < cpuc->lbr_stack.nr)
1253+
while (++j < cpuc->lbr_stack.nr) {
11771254
cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
1255+
cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j];
1256+
}
11781257
cpuc->lbr_stack.nr--;
11791258
if (!cpuc->lbr_entries[i].from)
11801259
continue;
@@ -1525,8 +1604,12 @@ void __init intel_pmu_arch_lbr_init(void)
15251604
x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
15261605
x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
15271606
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
1607+
x86_pmu.lbr_counters = ecx.split.lbr_counters;
15281608
x86_pmu.lbr_nr = lbr_nr;
15291609

1610+
if (!!x86_pmu.lbr_counters)
1611+
x86_pmu.flags |= PMU_FL_BR_CNTR;
1612+
15301613
if (x86_pmu.lbr_mispred)
15311614
static_branch_enable(&x86_lbr_mispred);
15321615
if (x86_pmu.lbr_timed_lbr)

arch/x86/events/perf_event.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ static inline bool is_topdown_event(struct perf_event *event)
110110
return is_metric_event(event) || is_slots_event(event);
111111
}
112112

113+
static inline bool is_branch_counters_group(struct perf_event *event)
114+
{
115+
return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS;
116+
}
117+
113118
struct amd_nb {
114119
int nb_id; /* NorthBridge id */
115120
int refcnt; /* reference count */
@@ -283,6 +288,7 @@ struct cpu_hw_events {
283288
int lbr_pebs_users;
284289
struct perf_branch_stack lbr_stack;
285290
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
291+
u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */
286292
union {
287293
struct er_account *lbr_sel;
288294
struct er_account *lbr_ctl;
@@ -888,6 +894,7 @@ struct x86_pmu {
888894
unsigned int lbr_mispred:1;
889895
unsigned int lbr_timed_lbr:1;
890896
unsigned int lbr_br_type:1;
897+
unsigned int lbr_counters:4;
891898

892899
void (*lbr_reset)(void);
893900
void (*lbr_read)(struct cpu_hw_events *cpuc);
@@ -1012,6 +1019,7 @@ do { \
10121019
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
10131020
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
10141021
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
1022+
#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
10151023

10161024
#define EVENT_VAR(_id) event_attr_##_id
10171025
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1552,6 +1560,10 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
15521560

15531561
void intel_ds_init(void);
15541562

1563+
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
1564+
struct cpu_hw_events *cpuc,
1565+
struct perf_event *event);
1566+
15551567
void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
15561568
struct perf_event_pmu_context *next_epc);
15571569

arch/x86/events/perf_event_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
2121
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
2222
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
2323
PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
24+
PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */

arch/x86/include/asm/msr-index.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,11 @@
236236
#define LBR_INFO_CYCLES 0xffff
237237
#define LBR_INFO_BR_TYPE_OFFSET 56
238238
#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
239+
#define LBR_INFO_BR_CNTR_OFFSET 32
240+
#define LBR_INFO_BR_CNTR_NUM 4
241+
#define LBR_INFO_BR_CNTR_BITS 2
242+
#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
243+
#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
239244

240245
#define MSR_ARCH_LBR_CTL 0x000014ce
241246
#define ARCH_LBR_CTL_LBREN BIT(0)

arch/x86/include/asm/perf_event.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
3232
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
3333
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
34+
#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
3435

3536
#define INTEL_FIXED_BITS_MASK 0xFULL
3637
#define INTEL_FIXED_BITS_STRIDE 4
@@ -223,6 +224,9 @@ union cpuid28_ecx {
223224
unsigned int lbr_timed_lbr:1;
224225
/* Branch Type Field Supported */
225226
unsigned int lbr_br_type:1;
227+
unsigned int reserved:13;
228+
/* Branch counters (Event Logging) Supported */
229+
unsigned int lbr_counters:4;
226230
} split;
227231
unsigned int full;
228232
};

include/uapi/linux/perf_event.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,6 +1437,9 @@ struct perf_branch_entry {
14371437
reserved:31;
14381438
};
14391439

1440+
/* Size of used info bits in struct perf_branch_entry */
1441+
#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
1442+
14401443
union perf_sample_weight {
14411444
__u64 full;
14421445
#if defined(__LITTLE_ENDIAN_BITFIELD)

0 commit comments

Comments
 (0)