66
77#include "dwarf-regs.h" /* for EM_HOST */
88#include "syscalltbl.h"
9+ #include "util/cgroup.h"
910#include "util/hashmap.h"
1011#include "util/trace.h"
1112#include "util/util.h"
1213#include <bpf/bpf.h>
14+ #include <linux/rbtree.h>
1315#include <linux/time64.h>
1416#include <tools/libc_compat.h> /* reallocarray */
1517
1820
1921
2022static struct syscall_summary_bpf * skel ;
23+ static struct rb_root cgroups = RB_ROOT ;
2124
2225int trace_prepare_bpf_summary (enum trace_summary_mode mode )
2326{
@@ -29,9 +32,14 @@ int trace_prepare_bpf_summary(enum trace_summary_mode mode)
2932
3033 if (mode == SUMMARY__BY_THREAD )
3134 skel -> rodata -> aggr_mode = SYSCALL_AGGR_THREAD ;
35+ else if (mode == SUMMARY__BY_CGROUP )
36+ skel -> rodata -> aggr_mode = SYSCALL_AGGR_CGROUP ;
3237 else
3338 skel -> rodata -> aggr_mode = SYSCALL_AGGR_CPU ;
3439
40+ if (cgroup_is_v2 ("perf_event" ) > 0 )
41+ skel -> rodata -> use_cgroup_v2 = 1 ;
42+
3543 if (syscall_summary_bpf__load (skel ) < 0 ) {
3644 fprintf (stderr , "failed to load syscall summary bpf skeleton\n" );
3745 return -1 ;
@@ -42,6 +50,9 @@ int trace_prepare_bpf_summary(enum trace_summary_mode mode)
4250 return -1 ;
4351 }
4452
53+ if (mode == SUMMARY__BY_CGROUP )
54+ read_all_cgroups (& cgroups );
55+
4556 return 0 ;
4657}
4758
@@ -88,9 +99,13 @@ static double rel_stddev(struct syscall_stats *stat)
8899 * per-cpu analysis so it's keyed by the syscall number to combine stats
89100 * from different CPUs. And syscall_data always has a syscall_node so
90101 * it can effectively work as flat hierarchy.
102+ *
103+ * For per-cgroup stats, it uses two-level data structure like thread
104+ * syscall_data is keyed by CGROUP and has an array of node which
105+ * represents each syscall for the cgroup.
91106 */
92107struct syscall_data {
93- int key ; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU */
108+ u64 key ; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */
94109 int nr_events ;
95110 int nr_nodes ;
96111 u64 total_time ;
@@ -191,7 +206,7 @@ static int print_thread_stat(struct syscall_data *data, FILE *fp)
191206
192207 qsort (data -> nodes , data -> nr_nodes , sizeof (* data -> nodes ), nodecmp );
193208
194- printed += fprintf (fp , " thread (%d), " , data -> key );
209+ printed += fprintf (fp , " thread (%d), " , ( int ) data -> key );
195210 printed += fprintf (fp , "%d events\n\n" , data -> nr_events );
196211
197212 printed += fprintf (fp , " syscall calls errors total min avg max stddev\n" );
@@ -283,6 +298,75 @@ static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp)
283298 return printed ;
284299}
285300
301+ static int update_cgroup_stats (struct hashmap * hash , struct syscall_key * map_key ,
302+ struct syscall_stats * map_data )
303+ {
304+ struct syscall_data * data ;
305+ struct syscall_node * nodes ;
306+
307+ if (!hashmap__find (hash , map_key -> cgroup , & data )) {
308+ data = zalloc (sizeof (* data ));
309+ if (data == NULL )
310+ return - ENOMEM ;
311+
312+ data -> key = map_key -> cgroup ;
313+ if (hashmap__add (hash , data -> key , data ) < 0 ) {
314+ free (data );
315+ return - ENOMEM ;
316+ }
317+ }
318+
319+ /* update thread total stats */
320+ data -> nr_events += map_data -> count ;
321+ data -> total_time += map_data -> total_time ;
322+
323+ nodes = reallocarray (data -> nodes , data -> nr_nodes + 1 , sizeof (* nodes ));
324+ if (nodes == NULL )
325+ return - ENOMEM ;
326+
327+ data -> nodes = nodes ;
328+ nodes = & data -> nodes [data -> nr_nodes ++ ];
329+ nodes -> syscall_nr = map_key -> nr ;
330+
331+ /* each thread has an entry for each syscall, just use the stat */
332+ memcpy (& nodes -> stats , map_data , sizeof (* map_data ));
333+ return 0 ;
334+ }
335+
336+ static int print_cgroup_stat (struct syscall_data * data , FILE * fp )
337+ {
338+ int printed = 0 ;
339+ struct cgroup * cgrp = __cgroup__find (& cgroups , data -> key );
340+
341+ qsort (data -> nodes , data -> nr_nodes , sizeof (* data -> nodes ), nodecmp );
342+
343+ if (cgrp )
344+ printed += fprintf (fp , " cgroup %s," , cgrp -> name );
345+ else
346+ printed += fprintf (fp , " cgroup id:%lu," , (unsigned long )data -> key );
347+
348+ printed += fprintf (fp , " %d events\n\n" , data -> nr_events );
349+
350+ printed += fprintf (fp , " syscall calls errors total min avg max stddev\n" );
351+ printed += fprintf (fp , " (msec) (msec) (msec) (msec) (%%)\n" );
352+ printed += fprintf (fp , " --------------- -------- ------ -------- --------- --------- --------- ------\n" );
353+
354+ printed += print_common_stats (data , fp );
355+ printed += fprintf (fp , "\n\n" );
356+
357+ return printed ;
358+ }
359+
360+ static int print_cgroup_stats (struct syscall_data * * data , int nr_data , FILE * fp )
361+ {
362+ int printed = 0 ;
363+
364+ for (int i = 0 ; i < nr_data ; i ++ )
365+ printed += print_cgroup_stat (data [i ], fp );
366+
367+ return printed ;
368+ }
369+
286370int trace_print_bpf_summary (FILE * fp )
287371{
288372 struct bpf_map * map = skel -> maps .syscall_stats_map ;
@@ -305,10 +389,19 @@ int trace_print_bpf_summary(FILE *fp)
305389 struct syscall_stats stat ;
306390
307391 if (!bpf_map__lookup_elem (map , & key , sizeof (key ), & stat , sizeof (stat ), 0 )) {
308- if (skel -> rodata -> aggr_mode == SYSCALL_AGGR_THREAD )
392+ switch (skel -> rodata -> aggr_mode ) {
393+ case SYSCALL_AGGR_THREAD :
309394 update_thread_stats (& schash , & key , & stat );
310- else
395+ break ;
396+ case SYSCALL_AGGR_CPU :
311397 update_total_stats (& schash , & key , & stat );
398+ break ;
399+ case SYSCALL_AGGR_CGROUP :
400+ update_cgroup_stats (& schash , & key , & stat );
401+ break ;
402+ default :
403+ break ;
404+ }
312405 }
313406
314407 prev_key = & key ;
@@ -325,10 +418,19 @@ int trace_print_bpf_summary(FILE *fp)
325418
326419 qsort (data , nr_data , sizeof (* data ), datacmp );
327420
328- if (skel -> rodata -> aggr_mode == SYSCALL_AGGR_THREAD )
421+ switch (skel -> rodata -> aggr_mode ) {
422+ case SYSCALL_AGGR_THREAD :
329423 printed += print_thread_stats (data , nr_data , fp );
330- else
424+ break ;
425+ case SYSCALL_AGGR_CPU :
331426 printed += print_total_stats (data , nr_data , fp );
427+ break ;
428+ case SYSCALL_AGGR_CGROUP :
429+ printed += print_cgroup_stats (data , nr_data , fp );
430+ break ;
431+ default :
432+ break ;
433+ }
332434
333435 for (i = 0 ; i < nr_data && data ; i ++ ) {
334436 free (data [i ]-> nodes );
@@ -343,5 +445,14 @@ int trace_print_bpf_summary(FILE *fp)
343445
344446void trace_cleanup_bpf_summary (void )
345447{
448+ if (!RB_EMPTY_ROOT (& cgroups )) {
449+ struct cgroup * cgrp , * tmp ;
450+
451+ rbtree_postorder_for_each_entry_safe (cgrp , tmp , & cgroups , node )
452+ cgroup__put (cgrp );
453+
454+ cgroups = RB_ROOT ;
455+ }
456+
346457 syscall_summary_bpf__destroy (skel );
347458}
0 commit comments