Skip to content

Commit 00cf3d6

Browse files
committed
tracing: Allow synthetic events to pass around stacktraces
Allow a stacktrace from one event to be displayed by the end event of a synthetic event. This is very useful when looking for the longest latency of a sleep or something blocked on I/O. # cd /sys/kernel/tracing/ # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 1||prev_state == 2' > events/sched/sched_switch/trigger # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger The above creates a "block_lat" synthetic event that take the stacktrace of when a task schedules out in either the interruptible or uninterruptible states, and on a new per process max $delta (the time it was scheduled out), will print the process id and the stacktrace. # echo 1 > events/synthetic/block_lat/enable # cat trace # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | kworker/u16:0-767 [006] d..4. 560.645045: block_lat: pid=767 delta=66 stack=STACK: => __schedule => schedule => pipe_read => vfs_read => ksys_read => do_syscall_64 => 0x966000aa <idle>-0 [003] d..4. 561.132117: block_lat: pid=0 delta=413787 stack=STACK: => __schedule => schedule => schedule_hrtimeout_range_clock => do_sys_poll => __x64_sys_poll => do_syscall_64 => 0x966000aa <...>-153 [006] d..4. 562.068407: block_lat: pid=153 delta=54 stack=STACK: => __schedule => schedule => io_schedule => rq_qos_wait => wbt_wait => __rq_qos_throttle => blk_mq_submit_bio => submit_bio_noacct_nocheck => ext4_bio_write_page => mpage_submit_page => mpage_process_page_bufs => mpage_prepare_extent_to_map => ext4_do_writepages => ext4_writepages => do_writepages => __writeback_single_inode Link: https://lkml.kernel.org/r/20230117152236.010941267@goodmis.org Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Tom Zanussi <zanussi@kernel.org> Cc: Ross Zwisler <zwisler@google.com> Cc: Ching-lin Yu <chinglinyu@google.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent 288709c commit 00cf3d6

4 files changed

Lines changed: 87 additions & 5 deletions

File tree

kernel/trace/trace.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ enum trace_type {
113113
#define MEM_FAIL(condition, fmt, ...) \
114114
DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
115115

116+
#define HIST_STACKTRACE_DEPTH 16
117+
#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
118+
#define HIST_STACKTRACE_SKIP 5
119+
116120
/*
117121
* syscalls are special, and need special handling, this is why
118122
* they are not included in trace_entries.h

kernel/trace/trace_events_hist.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -480,10 +480,6 @@ DEFINE_HIST_FIELD_FN(u8);
480480
#define for_each_hist_key_field(i, hist_data) \
481481
for ((i) = (hist_data)->n_vals; (i) < (hist_data)->n_fields; (i)++)
482482

483-
#define HIST_STACKTRACE_DEPTH 16
484-
#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
485-
#define HIST_STACKTRACE_SKIP 5
486-
487483
#define HITCOUNT_IDX 0
488484
#define HIST_KEY_SIZE_MAX (MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE)
489485

@@ -3869,6 +3865,9 @@ static int check_synth_field(struct synth_event *event,
38693865
&& field->is_dynamic)
38703866
return 0;
38713867

3868+
if (strstr(hist_field->type, "long[") && field->is_stack)
3869+
return 0;
3870+
38723871
if (strcmp(field->type, hist_field->type) != 0) {
38733872
if (field->size != hist_field->size ||
38743873
(!field->is_string && field->is_signed != hist_field->is_signed))

kernel/trace/trace_events_synth.c

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,14 @@ static int synth_field_is_string(char *type)
173173
return false;
174174
}
175175

176+
static int synth_field_is_stack(char *type)
177+
{
178+
if (strstr(type, "long[") != NULL)
179+
return true;
180+
181+
return false;
182+
}
183+
176184
static int synth_field_string_size(char *type)
177185
{
178186
char buf[4], *end, *start;
@@ -248,6 +256,8 @@ static int synth_field_size(char *type)
248256
size = sizeof(gfp_t);
249257
else if (synth_field_is_string(type))
250258
size = synth_field_string_size(type);
259+
else if (synth_field_is_stack(type))
260+
size = 0;
251261

252262
return size;
253263
}
@@ -292,6 +302,8 @@ static const char *synth_field_fmt(char *type)
292302
fmt = "%x";
293303
else if (synth_field_is_string(type))
294304
fmt = "%.*s";
305+
else if (synth_field_is_stack(type))
306+
fmt = "%s";
295307

296308
return fmt;
297309
}
@@ -371,6 +383,23 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
371383
i == se->n_fields - 1 ? "" : " ");
372384
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
373385
}
386+
} else if (se->fields[i]->is_stack) {
387+
u32 offset, data_offset, len;
388+
unsigned long *p, *end;
389+
390+
offset = (u32)entry->fields[n_u64];
391+
data_offset = offset & 0xffff;
392+
len = offset >> 16;
393+
394+
p = (void *)entry + data_offset;
395+
end = (void *)p + len - (sizeof(long) - 1);
396+
397+
trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
398+
399+
for (; *p && p < end; p++)
400+
trace_seq_printf(s, "=> %pS\n", (void *)*p);
401+
n_u64++;
402+
374403
} else {
375404
struct trace_print_flags __flags[] = {
376405
__def_gfpflag_names, {-1, NULL} };
@@ -446,6 +475,43 @@ static unsigned int trace_string(struct synth_trace_event *entry,
446475
return len;
447476
}
448477

478+
static unsigned int trace_stack(struct synth_trace_event *entry,
479+
struct synth_event *event,
480+
long *stack,
481+
unsigned int data_size,
482+
unsigned int *n_u64)
483+
{
484+
unsigned int len;
485+
u32 data_offset;
486+
void *data_loc;
487+
488+
data_offset = struct_size(entry, fields, event->n_u64);
489+
data_offset += data_size;
490+
491+
for (len = 0; len < HIST_STACKTRACE_DEPTH; len++) {
492+
if (!stack[len])
493+
break;
494+
}
495+
496+
/* Include the zero'd element if it fits */
497+
if (len < HIST_STACKTRACE_DEPTH)
498+
len++;
499+
500+
len *= sizeof(long);
501+
502+
/* Find the dynamic section to copy the stack into. */
503+
data_loc = (void *)entry + data_offset;
504+
memcpy(data_loc, stack, len);
505+
506+
/* Fill in the field that holds the offset/len combo */
507+
data_offset |= len << 16;
508+
*(u32 *)&entry->fields[*n_u64] = data_offset;
509+
510+
(*n_u64)++;
511+
512+
return len;
513+
}
514+
449515
static notrace void trace_event_raw_event_synth(void *__data,
450516
u64 *var_ref_vals,
451517
unsigned int *var_ref_idx)
@@ -498,6 +564,12 @@ static notrace void trace_event_raw_event_synth(void *__data,
498564
event->fields[i]->is_dynamic,
499565
data_size, &n_u64);
500566
data_size += len; /* only dynamic string increments */
567+
} if (event->fields[i]->is_stack) {
568+
long *stack = (long *)(long)var_ref_vals[val_idx];
569+
570+
len = trace_stack(entry, event, stack,
571+
data_size, &n_u64);
572+
data_size += len;
501573
} else {
502574
struct synth_field *field = event->fields[i];
503575
u64 val = var_ref_vals[val_idx];
@@ -560,6 +632,9 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
560632
event->fields[i]->is_dynamic)
561633
pos += snprintf(buf + pos, LEN_OR_ZERO,
562634
", __get_str(%s)", event->fields[i]->name);
635+
else if (event->fields[i]->is_stack)
636+
pos += snprintf(buf + pos, LEN_OR_ZERO,
637+
", __get_stacktrace(%s)", event->fields[i]->name);
563638
else
564639
pos += snprintf(buf + pos, LEN_OR_ZERO,
565640
", REC->%s", event->fields[i]->name);
@@ -696,7 +771,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
696771
ret = -EINVAL;
697772
goto free;
698773
} else if (size == 0) {
699-
if (synth_field_is_string(field->type)) {
774+
if (synth_field_is_string(field->type) ||
775+
synth_field_is_stack(field->type)) {
700776
char *type;
701777

702778
len = sizeof("__data_loc ") + strlen(field->type) + 1;
@@ -727,6 +803,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
727803

728804
if (synth_field_is_string(field->type))
729805
field->is_string = true;
806+
else if (synth_field_is_stack(field->type))
807+
field->is_stack = true;
730808

731809
field->is_signed = synth_field_signed(field->type);
732810
out:

kernel/trace/trace_synth.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ struct synth_field {
1818
bool is_signed;
1919
bool is_string;
2020
bool is_dynamic;
21+
bool is_stack;
2122
};
2223

2324
struct synth_event {

0 commit comments

Comments
 (0)