|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | +/* |
| 3 | + * Perf interface to expose Dispatch Trace Log counters. |
| 4 | + * |
| 5 | + * Copyright (C) 2024 Kajol Jain, IBM Corporation |
| 6 | + */ |
| 7 | + |
| 8 | +#ifdef CONFIG_PPC_SPLPAR |
| 9 | +#define pr_fmt(fmt) "vpa_dtl: " fmt |
| 10 | + |
| 11 | +#include <asm/dtl.h> |
| 12 | +#include <linux/perf_event.h> |
| 13 | +#include <asm/plpar_wrappers.h> |
| 14 | + |
| 15 | +#define EVENT(_name, _code) enum{_name = _code} |
| 16 | + |
| 17 | +/* |
| 18 | + * Based on Power Architecture Platform Reference(PAPR) documentation, |
| 19 | + * Table 14.14. Per Virtual Processor Area, below Dispatch Trace Log(DTL) |
| 20 | + * Enable Mask used to get corresponding virtual processor dispatch |
| 21 | + * to preempt traces: |
| 22 | + * DTL_CEDE(0x1): Trace voluntary (OS initiated) virtual |
| 23 | + * processor waits |
| 24 | + * DTL_PREEMPT(0x2): Trace time slice preempts |
| 25 | + * DTL_FAULT(0x4): Trace virtual partition memory page |
| 26 | + faults. |
| 27 | + * DTL_ALL(0x7): Trace all (DTL_CEDE | DTL_PREEMPT | DTL_FAULT) |
| 28 | + * |
| 29 | + * Event codes based on Dispatch Trace Log Enable Mask. |
| 30 | + */ |
| 31 | +EVENT(DTL_CEDE, 0x1); |
| 32 | +EVENT(DTL_PREEMPT, 0x2); |
| 33 | +EVENT(DTL_FAULT, 0x4); |
| 34 | +EVENT(DTL_ALL, 0x7); |
| 35 | + |
| 36 | +GENERIC_EVENT_ATTR(dtl_cede, DTL_CEDE); |
| 37 | +GENERIC_EVENT_ATTR(dtl_preempt, DTL_PREEMPT); |
| 38 | +GENERIC_EVENT_ATTR(dtl_fault, DTL_FAULT); |
| 39 | +GENERIC_EVENT_ATTR(dtl_all, DTL_ALL); |
| 40 | + |
| 41 | +PMU_FORMAT_ATTR(event, "config:0-7"); |
| 42 | + |
| 43 | +static struct attribute *events_attr[] = { |
| 44 | + GENERIC_EVENT_PTR(DTL_CEDE), |
| 45 | + GENERIC_EVENT_PTR(DTL_PREEMPT), |
| 46 | + GENERIC_EVENT_PTR(DTL_FAULT), |
| 47 | + GENERIC_EVENT_PTR(DTL_ALL), |
| 48 | + NULL |
| 49 | +}; |
| 50 | + |
| 51 | +static struct attribute_group event_group = { |
| 52 | + .name = "events", |
| 53 | + .attrs = events_attr, |
| 54 | +}; |
| 55 | + |
| 56 | +static struct attribute *format_attrs[] = { |
| 57 | + &format_attr_event.attr, |
| 58 | + NULL, |
| 59 | +}; |
| 60 | + |
| 61 | +static const struct attribute_group format_group = { |
| 62 | + .name = "format", |
| 63 | + .attrs = format_attrs, |
| 64 | +}; |
| 65 | + |
| 66 | +static const struct attribute_group *attr_groups[] = { |
| 67 | + &format_group, |
| 68 | + &event_group, |
| 69 | + NULL, |
| 70 | +}; |
| 71 | + |
| 72 | +struct vpa_dtl { |
| 73 | + struct dtl_entry *buf; |
| 74 | + u64 last_idx; |
| 75 | +}; |
| 76 | + |
| 77 | +static DEFINE_PER_CPU(struct vpa_dtl, vpa_dtl_cpu); |
| 78 | + |
| 79 | +/* variable to capture reference count for the active dtl threads */ |
| 80 | +static int dtl_global_refc; |
| 81 | +static spinlock_t dtl_global_lock = __SPIN_LOCK_UNLOCKED(dtl_global_lock); |
| 82 | + |
| 83 | +/* |
| 84 | + * Function to dump the dispatch trace log buffer data to the |
| 85 | + * perf data. |
| 86 | + */ |
| 87 | +static void vpa_dtl_dump_sample_data(struct perf_event *event) |
| 88 | +{ |
| 89 | + return; |
| 90 | +} |
| 91 | + |
| 92 | +/* |
| 93 | + * The VPA Dispatch Trace log counters do not interrupt on overflow. |
| 94 | + * Therefore, the kernel needs to poll the counters to avoid missing |
| 95 | + * an overflow using hrtimer. The timer interval is based on sample_period |
| 96 | + * count provided by user, and minimum interval is 1 millisecond. |
| 97 | + */ |
| 98 | +static enum hrtimer_restart vpa_dtl_hrtimer_handle(struct hrtimer *hrtimer) |
| 99 | +{ |
| 100 | + struct perf_event *event; |
| 101 | + u64 period; |
| 102 | + |
| 103 | + event = container_of(hrtimer, struct perf_event, hw.hrtimer); |
| 104 | + |
| 105 | + if (event->state != PERF_EVENT_STATE_ACTIVE) |
| 106 | + return HRTIMER_NORESTART; |
| 107 | + |
| 108 | + vpa_dtl_dump_sample_data(event); |
| 109 | + period = max_t(u64, NSEC_PER_MSEC, event->hw.sample_period); |
| 110 | + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); |
| 111 | + |
| 112 | + return HRTIMER_RESTART; |
| 113 | +} |
| 114 | + |
| 115 | +static void vpa_dtl_start_hrtimer(struct perf_event *event) |
| 116 | +{ |
| 117 | + u64 period; |
| 118 | + struct hw_perf_event *hwc = &event->hw; |
| 119 | + |
| 120 | + period = max_t(u64, NSEC_PER_MSEC, hwc->sample_period); |
| 121 | + hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), HRTIMER_MODE_REL_PINNED); |
| 122 | +} |
| 123 | + |
| 124 | +static void vpa_dtl_stop_hrtimer(struct perf_event *event) |
| 125 | +{ |
| 126 | + struct hw_perf_event *hwc = &event->hw; |
| 127 | + |
| 128 | + hrtimer_cancel(&hwc->hrtimer); |
| 129 | +} |
| 130 | + |
| 131 | +static void vpa_dtl_reset_global_refc(struct perf_event *event) |
| 132 | +{ |
| 133 | + spin_lock(&dtl_global_lock); |
| 134 | + dtl_global_refc--; |
| 135 | + if (dtl_global_refc <= 0) { |
| 136 | + dtl_global_refc = 0; |
| 137 | + up_write(&dtl_access_lock); |
| 138 | + } |
| 139 | + spin_unlock(&dtl_global_lock); |
| 140 | +} |
| 141 | + |
| 142 | +static int vpa_dtl_mem_alloc(int cpu) |
| 143 | +{ |
| 144 | + struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, cpu); |
| 145 | + struct dtl_entry *buf = NULL; |
| 146 | + |
| 147 | + /* Check for dispatch trace log buffer cache */ |
| 148 | + if (!dtl_cache) |
| 149 | + return -ENOMEM; |
| 150 | + |
| 151 | + buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL | GFP_ATOMIC, cpu_to_node(cpu)); |
| 152 | + if (!buf) { |
| 153 | + pr_warn("buffer allocation failed for cpu %d\n", cpu); |
| 154 | + return -ENOMEM; |
| 155 | + } |
| 156 | + dtl->buf = buf; |
| 157 | + return 0; |
| 158 | +} |
| 159 | + |
| 160 | +static int vpa_dtl_event_init(struct perf_event *event) |
| 161 | +{ |
| 162 | + struct hw_perf_event *hwc = &event->hw; |
| 163 | + |
| 164 | + /* test the event attr type for PMU enumeration */ |
| 165 | + if (event->attr.type != event->pmu->type) |
| 166 | + return -ENOENT; |
| 167 | + |
| 168 | + if (!perfmon_capable()) |
| 169 | + return -EACCES; |
| 170 | + |
| 171 | + /* Return if this is a counting event */ |
| 172 | + if (!is_sampling_event(event)) |
| 173 | + return -EOPNOTSUPP; |
| 174 | + |
| 175 | + /* no branch sampling */ |
| 176 | + if (has_branch_stack(event)) |
| 177 | + return -EOPNOTSUPP; |
| 178 | + |
| 179 | + /* Invalid eventcode */ |
| 180 | + switch (event->attr.config) { |
| 181 | + case DTL_LOG_CEDE: |
| 182 | + case DTL_LOG_PREEMPT: |
| 183 | + case DTL_LOG_FAULT: |
| 184 | + case DTL_LOG_ALL: |
| 185 | + break; |
| 186 | + default: |
| 187 | + return -EINVAL; |
| 188 | + } |
| 189 | + |
| 190 | + spin_lock(&dtl_global_lock); |
| 191 | + |
| 192 | + /* |
| 193 | + * To ensure there are no other conflicting dtl users |
| 194 | + * (example: /proc/powerpc/vcpudispatch_stats or debugfs dtl), |
| 195 | + * below code try to take the dtl_access_lock. |
| 196 | + * The dtl_access_lock is a rwlock defined in dtl.h, which is used |
| 197 | + * to unsure there is no conflicting dtl users. |
| 198 | + * Based on below code, vpa_dtl pmu tries to take write access lock |
| 199 | + * and also checks for dtl_global_refc, to make sure that the |
| 200 | + * dtl_access_lock is taken by vpa_dtl pmu interface. |
| 201 | + */ |
| 202 | + if (dtl_global_refc == 0 && !down_write_trylock(&dtl_access_lock)) { |
| 203 | + spin_unlock(&dtl_global_lock); |
| 204 | + return -EBUSY; |
| 205 | + } |
| 206 | + |
| 207 | + /* Allocate dtl buffer memory */ |
| 208 | + if (vpa_dtl_mem_alloc(event->cpu)) { |
| 209 | + spin_unlock(&dtl_global_lock); |
| 210 | + return -ENOMEM; |
| 211 | + } |
| 212 | + |
| 213 | + /* |
| 214 | + * Increment the number of active vpa_dtl pmu threads. The |
| 215 | + * dtl_global_refc is used to keep count of cpu threads that |
| 216 | + * currently capturing dtl data using vpa_dtl pmu interface. |
| 217 | + */ |
| 218 | + dtl_global_refc++; |
| 219 | + |
| 220 | + spin_unlock(&dtl_global_lock); |
| 221 | + |
| 222 | + hrtimer_setup(&hwc->hrtimer, vpa_dtl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 223 | + |
| 224 | + /* |
| 225 | + * Since hrtimers have a fixed rate, we can do a static freq->period |
| 226 | + * mapping and avoid the whole period adjust feedback stuff. |
| 227 | + */ |
| 228 | + if (event->attr.freq) { |
| 229 | + long freq = event->attr.sample_freq; |
| 230 | + |
| 231 | + event->attr.sample_period = NSEC_PER_SEC / freq; |
| 232 | + hwc->sample_period = event->attr.sample_period; |
| 233 | + local64_set(&hwc->period_left, hwc->sample_period); |
| 234 | + hwc->last_period = hwc->sample_period; |
| 235 | + event->attr.freq = 0; |
| 236 | + } |
| 237 | + |
| 238 | + event->destroy = vpa_dtl_reset_global_refc; |
| 239 | + return 0; |
| 240 | +} |
| 241 | + |
| 242 | +static int vpa_dtl_event_add(struct perf_event *event, int flags) |
| 243 | +{ |
| 244 | + int ret, hwcpu; |
| 245 | + unsigned long addr; |
| 246 | + struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu); |
| 247 | + |
| 248 | + /* |
| 249 | + * Register our dtl buffer with the hypervisor. The |
| 250 | + * HV expects the buffer size to be passed in the second |
| 251 | + * word of the buffer. Refer section '14.11.3.2. H_REGISTER_VPA' |
| 252 | + * from PAPR for more information. |
| 253 | + */ |
| 254 | + ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); |
| 255 | + dtl->last_idx = 0; |
| 256 | + |
| 257 | + hwcpu = get_hard_smp_processor_id(event->cpu); |
| 258 | + addr = __pa(dtl->buf); |
| 259 | + |
| 260 | + ret = register_dtl(hwcpu, addr); |
| 261 | + if (ret) { |
| 262 | + pr_warn("DTL registration for cpu %d (hw %d) failed with %d\n", |
| 263 | + event->cpu, hwcpu, ret); |
| 264 | + return ret; |
| 265 | + } |
| 266 | + |
| 267 | + /* set our initial buffer indices */ |
| 268 | + lppaca_of(event->cpu).dtl_idx = 0; |
| 269 | + |
| 270 | + /* |
| 271 | + * Ensure that our updates to the lppaca fields have |
| 272 | + * occurred before we actually enable the logging |
| 273 | + */ |
| 274 | + smp_wmb(); |
| 275 | + |
| 276 | + /* enable event logging */ |
| 277 | + lppaca_of(event->cpu).dtl_enable_mask = event->attr.config; |
| 278 | + |
| 279 | + vpa_dtl_start_hrtimer(event); |
| 280 | + |
| 281 | + return 0; |
| 282 | +} |
| 283 | + |
| 284 | +static void vpa_dtl_event_del(struct perf_event *event, int flags) |
| 285 | +{ |
| 286 | + int hwcpu = get_hard_smp_processor_id(event->cpu); |
| 287 | + struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu); |
| 288 | + |
| 289 | + vpa_dtl_stop_hrtimer(event); |
| 290 | + unregister_dtl(hwcpu); |
| 291 | + kmem_cache_free(dtl_cache, dtl->buf); |
| 292 | + dtl->buf = NULL; |
| 293 | + lppaca_of(event->cpu).dtl_enable_mask = 0x0; |
| 294 | +} |
| 295 | + |
| 296 | +/* |
| 297 | + * This function definition is empty as vpa_dtl_dump_sample_data |
| 298 | + * is used to parse and dump the dispatch trace log data, |
| 299 | + * to perf data. |
| 300 | + */ |
| 301 | +static void vpa_dtl_event_read(struct perf_event *event) |
| 302 | +{ |
| 303 | +} |
| 304 | + |
| 305 | +static struct pmu vpa_dtl_pmu = { |
| 306 | + .task_ctx_nr = perf_invalid_context, |
| 307 | + |
| 308 | + .name = "vpa_dtl", |
| 309 | + .attr_groups = attr_groups, |
| 310 | + .event_init = vpa_dtl_event_init, |
| 311 | + .add = vpa_dtl_event_add, |
| 312 | + .del = vpa_dtl_event_del, |
| 313 | + .read = vpa_dtl_event_read, |
| 314 | + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_EXCLUSIVE, |
| 315 | +}; |
| 316 | + |
| 317 | +static int vpa_dtl_init(void) |
| 318 | +{ |
| 319 | + int r; |
| 320 | + |
| 321 | + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) { |
| 322 | + pr_debug("not a shared virtualized system, not enabling\n"); |
| 323 | + return -ENODEV; |
| 324 | + } |
| 325 | + |
| 326 | + /* This driver is intended only for L1 host. */ |
| 327 | + if (is_kvm_guest()) { |
| 328 | + pr_debug("Only supported for L1 host system\n"); |
| 329 | + return -ENODEV; |
| 330 | + } |
| 331 | + |
| 332 | + r = perf_pmu_register(&vpa_dtl_pmu, vpa_dtl_pmu.name, -1); |
| 333 | + if (r) |
| 334 | + return r; |
| 335 | + |
| 336 | + return 0; |
| 337 | +} |
| 338 | + |
| 339 | +device_initcall(vpa_dtl_init); |
| 340 | +#endif //CONFIG_PPC_SPLPAR |
0 commit comments