Skip to content

Commit a4d1811

Browse files
anakryikoingomolnar
authored andcommitted
perf/x86/amd: Support capturing LBR from software events
Upstream commit c22ac2a ("perf: Enable branch record for software events") added ability to capture LBR (Last Branch Records) on Intel CPUs from inside BPF program at pretty much any arbitrary point. This is extremely useful capability that allows to figure out otherwise hard to debug problems, because LBR is now available based on some application-defined conditions, not just hardware-supported events. 'retsnoop' is one such tool that takes a huge advantage of this functionality and has proved to be an extremely useful tool in practice: https://github.com/anakryiko/retsnoop Now, AMD Zen4 CPUs got support for similar LBR functionality, but necessary wiring inside the kernel is not yet setup. This patch seeks to rectify this and follows a similar approach to the original patch for Intel CPUs. We implement an AMD-specific callback set to be called through perf_snapshot_branch_stack static call. Previous preparatory patches ensured that amd_pmu_core_disable_all() and __amd_pmu_lbr_disable() will be completely inlined and will have no branches, so LBR snapshot contamination will be minimized. This was tested on AMD Bergamo CPU and worked well when utilized from the aforementioned retsnoop tool. Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Sandipan Das <sandipan.das@amd.com> Link: https://lore.kernel.org/r/20240402022118.1046049-4-andrii@kernel.org
1 parent 1eddf18 commit a4d1811

1 file changed

Lines changed: 35 additions & 0 deletions

File tree

arch/x86/events/amd/core.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,37 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
907907
return amd_pmu_adjust_nmi_window(handled);
908908
}
909909

910+
/*
911+
* AMD-specific callback invoked through perf_snapshot_branch_stack static
912+
* call, defined in include/linux/perf_event.h. See its definition for API
913+
* details. It's up to caller to provide enough space in *entries* to fit all
914+
* LBR records, otherwise returned result will be truncated to *cnt* entries.
915+
*/
916+
static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
917+
{
918+
struct cpu_hw_events *cpuc;
919+
unsigned long flags;
920+
921+
/*
922+
* The sequence of steps to freeze LBR should be completely inlined
923+
* and contain no branches to minimize contamination of LBR snapshot
924+
*/
925+
local_irq_save(flags);
926+
amd_pmu_core_disable_all();
927+
__amd_pmu_lbr_disable();
928+
929+
cpuc = this_cpu_ptr(&cpu_hw_events);
930+
931+
amd_pmu_lbr_read();
932+
cnt = min(cnt, x86_pmu.lbr_nr);
933+
memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
934+
935+
amd_pmu_v2_enable_all(0);
936+
local_irq_restore(flags);
937+
938+
return cnt;
939+
}
940+
910941
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
911942
{
912943
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1443,6 +1474,10 @@ static int __init amd_core_pmu_init(void)
14431474
static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
14441475
static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
14451476
static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
1477+
1478+
/* Only support branch_stack snapshot on perfmon v2 */
1479+
if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq)
1480+
static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack);
14461481
} else if (!amd_brs_init()) {
14471482
/*
14481483
* BRS requires special event constraints and flushing on ctxsw.

0 commit comments

Comments
 (0)