Skip to content

Commit 8a8ca83

Browse files
committed
Merge tag 'perf-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Thomas Gleixner: "Core: - Better handling of page table leaves on archictectures which have architectures have non-pagetable aligned huge/large pages. For such architectures a leaf can actually be part of a larger entry. - Prevent a deadlock vs exec_update_mutex Architectures: - The related updates for page size calculation of leaf entries - The usual churn to support new CPUs - Small fixes and improvements all over the place" * tag 'perf-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) perf/x86/intel: Add Tremont Topdown support uprobes/x86: Fix fall-through warnings for Clang perf/x86: Fix fall-through warnings for Clang kprobes/x86: Fix fall-through warnings for Clang perf/x86/intel/lbr: Fix the return type of get_lbr_cycles() perf/x86/intel: Fix rtm_abort_event encoding on Ice Lake x86/kprobes: Restore BTF if the single-stepping is cancelled perf: Break deadlock involving exec_update_mutex sparc64/mm: Implement pXX_leaf_size() support powerpc/8xx: Implement pXX_leaf_size() support arm64/mm: Implement pXX_leaf_size() support perf/core: Fix arch_perf_get_page_size() mm: Introduce pXX_leaf_size() mm/gup: Provide gup_get_pte() more generic perf/x86/intel: Add event constraint for CYCLE_ACTIVITY.STALLS_MEM_ANY perf/x86/intel/uncore: Add Rocket Lake support perf/x86/msr: Add Rocket Lake CPU support perf/x86/cstate: Add Rocket Lake CPU support perf/x86/intel: Add Rocket Lake CPU support perf,mm: Handle non-page-table-aligned hugetlbfs ...
2 parents e857b6f + c220804 commit 8a8ca83

21 files changed

Lines changed: 349 additions & 108 deletions

File tree

arch/arm64/include/asm/pgtable.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
415415
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
416416
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
417417
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
418+
#define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
418419
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
419420
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
420421
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
@@ -511,6 +512,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
511512
PMD_TYPE_SECT)
512513
#define pmd_leaf(pmd) pmd_sect(pmd)
513514

515+
#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
516+
#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
517+
514518
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
515519
static inline bool pud_sect(pud_t pud) { return false; }
516520
static inline bool pud_table(pud_t pud) { return true; }

arch/powerpc/include/asm/nohash/32/pte-8xx.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,29 @@ static inline pte_t pte_mkhuge(pte_t pte)
135135
}
136136

137137
#define pte_mkhuge pte_mkhuge
138+
139+
static inline unsigned long pgd_leaf_size(pgd_t pgd)
140+
{
141+
if (pgd_val(pgd) & _PMD_PAGE_8M)
142+
return SZ_8M;
143+
return SZ_4M;
144+
}
145+
146+
#define pgd_leaf_size pgd_leaf_size
147+
148+
static inline unsigned long pte_leaf_size(pte_t pte)
149+
{
150+
pte_basic_t val = pte_val(pte);
151+
152+
if (val & _PAGE_HUGE)
153+
return SZ_512K;
154+
if (val & _PAGE_SPS)
155+
return SZ_16K;
156+
return SZ_4K;
157+
}
158+
159+
#define pte_leaf_size pte_leaf_size
160+
138161
#endif
139162

140163
#endif /* __KERNEL__ */

arch/powerpc/perf/core-book3s.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2074,6 +2074,9 @@ static struct pmu power_pmu = {
20742074
.sched_task = power_pmu_sched_task,
20752075
};
20762076

2077+
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
2078+
PERF_SAMPLE_PHYS_ADDR | \
2079+
PERF_SAMPLE_DATA_PAGE_SIZE)
20772080
/*
20782081
* A counter has overflowed; update its count and record
20792082
* things if requested. Note that interrupts are hard-disabled
@@ -2129,8 +2132,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
21292132

21302133
perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
21312134

2132-
if (event->attr.sample_type &
2133-
(PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
2135+
if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
21342136
perf_get_data_addr(event, regs, &data.addr);
21352137

21362138
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {

arch/sparc/include/asm/pgtable_64.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,6 +1121,19 @@ extern unsigned long cmdline_memory_size;
11211121

11221122
asmlinkage void do_sparc64_fault(struct pt_regs *regs);
11231123

1124+
#ifdef CONFIG_HUGETLB_PAGE
1125+
1126+
#define pud_leaf_size pud_leaf_size
1127+
extern unsigned long pud_leaf_size(pud_t pud);
1128+
1129+
#define pmd_leaf_size pmd_leaf_size
1130+
extern unsigned long pmd_leaf_size(pmd_t pmd);
1131+
1132+
#define pte_leaf_size pte_leaf_size
1133+
extern unsigned long pte_leaf_size(pte_t pte);
1134+
1135+
#endif /* CONFIG_HUGETLB_PAGE */
1136+
11241137
#endif /* !(__ASSEMBLY__) */
11251138

11261139
#endif /* !(_SPARC64_PGTABLE_H) */

arch/sparc/mm/hugetlbpage.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -247,14 +247,17 @@ static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
247247
return shift;
248248
}
249249

250-
static unsigned int huge_tte_to_shift(pte_t entry)
250+
static unsigned long tte_to_shift(pte_t entry)
251251
{
252-
unsigned long shift;
253-
254252
if (tlb_type == hypervisor)
255-
shift = sun4v_huge_tte_to_shift(entry);
256-
else
257-
shift = sun4u_huge_tte_to_shift(entry);
253+
return sun4v_huge_tte_to_shift(entry);
254+
255+
return sun4u_huge_tte_to_shift(entry);
256+
}
257+
258+
static unsigned int huge_tte_to_shift(pte_t entry)
259+
{
260+
unsigned long shift = tte_to_shift(entry);
258261

259262
if (shift == PAGE_SHIFT)
260263
WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
@@ -272,6 +275,10 @@ static unsigned long huge_tte_to_size(pte_t pte)
272275
return size;
273276
}
274277

278+
unsigned long pud_leaf_size(pud_t pud) { return 1UL << tte_to_shift(*(pte_t *)&pud); }
279+
unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << tte_to_shift(*(pte_t *)&pmd); }
280+
unsigned long pte_leaf_size(pte_t pte) { return 1UL << tte_to_shift(pte); }
281+
275282
pte_t *huge_pte_alloc(struct mm_struct *mm,
276283
unsigned long addr, unsigned long sz)
277284
{

arch/x86/events/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
11741174
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
11751175
/* All the metric events are mapped onto the fixed counter 3. */
11761176
idx = INTEL_PMC_IDX_FIXED_SLOTS;
1177-
/* fall through */
1177+
fallthrough;
11781178
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
11791179
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
11801180
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +

arch/x86/events/intel/core.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,8 @@ static struct event_constraint intel_icl_event_constraints[] = {
257257
INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
258258
INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
259259
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
260-
INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
260+
INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */
261+
INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
261262
INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */
262263
INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
263264
INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
@@ -1900,6 +1901,19 @@ static __initconst const u64 tnt_hw_cache_extra_regs
19001901
},
19011902
};
19021903

1904+
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_tnt, "event=0x71,umask=0x0");
1905+
EVENT_ATTR_STR(topdown-retiring, td_retiring_tnt, "event=0xc2,umask=0x0");
1906+
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_tnt, "event=0x73,umask=0x6");
1907+
EVENT_ATTR_STR(topdown-be-bound, td_be_bound_tnt, "event=0x74,umask=0x0");
1908+
1909+
static struct attribute *tnt_events_attrs[] = {
1910+
EVENT_PTR(td_fe_bound_tnt),
1911+
EVENT_PTR(td_retiring_tnt),
1912+
EVENT_PTR(td_bad_spec_tnt),
1913+
EVENT_PTR(td_be_bound_tnt),
1914+
NULL,
1915+
};
1916+
19031917
static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
19041918
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
19051919
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0),
@@ -5173,6 +5187,7 @@ __init int intel_pmu_init(void)
51735187
x86_pmu.lbr_pt_coexist = true;
51745188
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
51755189
x86_pmu.get_event_constraints = tnt_get_event_constraints;
5190+
td_attr = tnt_events_attrs;
51765191
extra_attr = slm_format_attr;
51775192
pr_cont("Tremont events, ");
51785193
name = "Tremont";
@@ -5442,6 +5457,7 @@ __init int intel_pmu_init(void)
54425457
case INTEL_FAM6_ICELAKE:
54435458
case INTEL_FAM6_TIGERLAKE_L:
54445459
case INTEL_FAM6_TIGERLAKE:
5460+
case INTEL_FAM6_ROCKETLAKE:
54455461
x86_pmu.late_ack = true;
54465462
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
54475463
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -5464,7 +5480,7 @@ __init int intel_pmu_init(void)
54645480
mem_attr = icl_events_attrs;
54655481
td_attr = icl_td_events_attrs;
54665482
tsx_attr = icl_tsx_events_attrs;
5467-
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
5483+
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
54685484
x86_pmu.lbr_pt_coexist = true;
54695485
intel_pmu_pebs_data_source_skl(pmem);
54705486
x86_pmu.update_topdown_event = icl_update_topdown_event;

arch/x86/events/intel/cstate.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,46 +51,46 @@
5151
* perf code: 0x02
5252
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
5353
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
54-
* TNT
54+
* TNT,RKL
5555
* Scope: Core
5656
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
5757
* perf code: 0x03
5858
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
59-
* ICL,TGL
59+
* ICL,TGL,RKL
6060
* Scope: Core
6161
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
6262
* perf code: 0x00
6363
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
64-
* KBL,CML,ICL,TGL,TNT
64+
* KBL,CML,ICL,TGL,TNT,RKL
6565
* Scope: Package (physical package)
6666
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
6767
* perf code: 0x01
6868
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
69-
* GLM,CNL,KBL,CML,ICL,TGL,TNT
69+
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL
7070
* Scope: Package (physical package)
7171
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
7272
* perf code: 0x02
7373
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
7474
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL,
75-
* TNT
75+
* TNT,RKL
7676
* Scope: Package (physical package)
7777
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
7878
* perf code: 0x03
7979
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
80-
* KBL,CML,ICL,TGL
80+
* KBL,CML,ICL,TGL,RKL
8181
* Scope: Package (physical package)
8282
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
8383
* perf code: 0x04
84-
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
84+
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL
8585
* Scope: Package (physical package)
8686
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
8787
* perf code: 0x05
88-
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
88+
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL
8989
* Scope: Package (physical package)
9090
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
9191
* perf code: 0x06
9292
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
93-
* TNT
93+
* TNT,RKL
9494
* Scope: Package (physical package)
9595
*
9696
*/
@@ -649,6 +649,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
649649
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
650650
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
651651
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
652+
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates),
652653
{ },
653654
};
654655
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);

arch/x86/events/intel/ds.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,8 @@ static void adaptive_pebs_record_size_update(void)
961961

962962
#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
963963
PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
964-
PERF_SAMPLE_TRANSACTION)
964+
PERF_SAMPLE_TRANSACTION | \
965+
PERF_SAMPLE_DATA_PAGE_SIZE)
965966

966967
static u64 pebs_update_adaptive_cfg(struct perf_event *event)
967968
{
@@ -1337,6 +1338,10 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
13371338
return val;
13381339
}
13391340

1341+
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
1342+
PERF_SAMPLE_PHYS_ADDR | \
1343+
PERF_SAMPLE_DATA_PAGE_SIZE)
1344+
13401345
static void setup_pebs_fixed_sample_data(struct perf_event *event,
13411346
struct pt_regs *iregs, void *__pebs,
13421347
struct perf_sample_data *data,
@@ -1451,7 +1456,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
14511456
}
14521457

14531458

1454-
if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
1459+
if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
14551460
x86_pmu.intel_cap.pebs_format >= 1)
14561461
data->addr = pebs->dla;
14571462

@@ -1579,7 +1584,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
15791584
if (sample_type & PERF_SAMPLE_DATA_SRC)
15801585
data->data_src.val = get_data_src(event, meminfo->aux);
15811586

1582-
if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
1587+
if (sample_type & PERF_SAMPLE_ADDR_TYPE)
15831588
data->addr = meminfo->address;
15841589

15851590
if (sample_type & PERF_SAMPLE_TRANSACTION)

arch/x86/events/intel/lbr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,7 @@ static __always_inline bool get_lbr_predicted(u64 info)
919919
return !(info & LBR_INFO_MISPRED);
920920
}
921921

922-
static __always_inline bool get_lbr_cycles(u64 info)
922+
static __always_inline u16 get_lbr_cycles(u64 info)
923923
{
924924
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
925925
!(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))

0 commit comments

Comments
 (0)