Skip to content

Commit 7e6df96

Browse files
aeglbp3tk0v
authored andcommitted
x86/resctrl: Find and enable usable telemetry events
Every event group has a private copy of the data of all telemetry event aggregators (aka "telemetry regions") tracking its feature type. Included may be regions that have the same feature type but tracking different GUID from the event group's. Traverse the event group's telemetry region data and mark all regions that are not usable by the event group as unusable by clearing those regions' MMIO addresses. A region is considered unusable if: 1) GUID does not match the GUID of the event group. 2) Package ID is invalid. 3) The enumerated size of the MMIO region does not match the expected value from the XML description file. Hereafter any telemetry region with an MMIO address is considered valid for the event group it is associated with. Enable all the event group's events as long as there is at least one usable region from where data for its events can be read. Enabling of an event can fail if the same event has already been enabled as part of another event group. It should never happen that the same event is described by different GUID supported by the same system so just WARN (via resctrl_enable_mon_event()) and skip the event. Note that it is architecturally possible that some telemetry events are only supported by a subset of the packages in the system. It is not expected that systems will ever do this. If they do the user will see event files in resctrl that always return "Unavailable". Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com
1 parent 8ccb1f8 commit 7e6df96

3 files changed

Lines changed: 68 additions & 7 deletions

File tree

arch/x86/kernel/cpu/resctrl/intel_aet.c

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
#include <linux/init.h>
1717
#include <linux/intel_pmt_features.h>
1818
#include <linux/intel_vsec.h>
19+
#include <linux/printk.h>
1920
#include <linux/resctrl.h>
2021
#include <linux/resctrl_types.h>
2122
#include <linux/stddef.h>
23+
#include <linux/topology.h>
2224
#include <linux/types.h>
2325

2426
#include "internal.h"
@@ -110,12 +112,69 @@ static struct event_group *known_event_groups[] = {
110112
_peg < &known_event_groups[ARRAY_SIZE(known_event_groups)]; \
111113
_peg++)
112114

113-
/* Stub for now */
114-
static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
115+
static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e)
115116
{
117+
if (tr->guid != e->guid)
118+
return true;
119+
if (tr->plat_info.package_id >= topology_max_packages()) {
120+
pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id,
121+
tr->guid);
122+
return true;
123+
}
124+
if (tr->size != e->mmio_size) {
125+
pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n",
126+
tr->size, e->guid, e->mmio_size);
127+
return true;
128+
}
129+
116130
return false;
117131
}
118132

133+
static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p)
134+
{
135+
bool usable_regions = false;
136+
137+
for (int i = 0; i < p->count; i++) {
138+
if (skip_telem_region(&p->regions[i], e)) {
139+
/*
140+
* Clear the address field of regions that did not pass the checks in
141+
* skip_telem_region() so they will not be used by intel_aet_read_event().
142+
* This is safe to do because intel_pmt_get_regions_by_feature() allocates
143+
* a new pmt_feature_group structure to return to each caller and only makes
144+
* use of the pmt_feature_group::kref field when intel_pmt_put_feature_group()
145+
* returns the structure.
146+
*/
147+
p->regions[i].addr = NULL;
148+
149+
continue;
150+
}
151+
usable_regions = true;
152+
}
153+
154+
return usable_regions;
155+
}
156+
157+
static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
158+
{
159+
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
160+
int skipped_events = 0;
161+
162+
if (!group_has_usable_regions(e, p))
163+
return false;
164+
165+
for (int j = 0; j < e->num_events; j++) {
166+
if (!resctrl_enable_mon_event(e->evts[j].id, true,
167+
e->evts[j].bin_bits, &e->evts[j]))
168+
skipped_events++;
169+
}
170+
if (e->num_events == skipped_events) {
171+
pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid);
172+
return false;
173+
}
174+
175+
return true;
176+
}
177+
119178
static enum pmt_feature_id lookup_pfid(const char *pfname)
120179
{
121180
if (!strcmp(pfname, "energy"))

fs/resctrl/monitor.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -997,25 +997,27 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
997997
MON_EVENT(PMT_EVENT_UOPS_RETIRED, "uops_retired", RDT_RESOURCE_PERF_PKG, false),
998998
};
999999

1000-
void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
1000+
bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
10011001
unsigned int binary_bits, void *arch_priv)
10021002
{
10031003
if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
10041004
binary_bits > MAX_BINARY_BITS))
1005-
return;
1005+
return false;
10061006
if (mon_event_all[eventid].enabled) {
10071007
pr_warn("Duplicate enable for event %d\n", eventid);
1008-
return;
1008+
return false;
10091009
}
10101010
if (binary_bits && !mon_event_all[eventid].is_floating_point) {
10111011
pr_warn("Event %d may not be floating point\n", eventid);
1012-
return;
1012+
return false;
10131013
}
10141014

10151015
mon_event_all[eventid].any_cpu = any_cpu;
10161016
mon_event_all[eventid].binary_bits = binary_bits;
10171017
mon_event_all[eventid].arch_priv = arch_priv;
10181018
mon_event_all[eventid].enabled = true;
1019+
1020+
return true;
10191021
}
10201022

10211023
bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)

include/linux/resctrl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
414414
u32 resctrl_arch_system_num_rmid_idx(void);
415415
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
416416

417-
void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
417+
bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
418418
unsigned int binary_bits, void *arch_priv);
419419

420420
bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);

0 commit comments

Comments
 (0)