@@ -685,30 +685,32 @@ do { \
685685 ___p; \
686686})
687687
688+ #define for_each_epc (_epc , _ctx , _pmu , _cgroup ) \
689+ list_for_each_entry(_epc, &((_ctx)->pmu_ctx_list), pmu_ctx_entry) \
690+ if (_cgroup && !_epc->nr_cgroups) \
691+ continue; \
692+ else if (_pmu && _epc->pmu != _pmu) \
693+ continue; \
694+ else
695+
688696static void perf_ctx_disable (struct perf_event_context * ctx , bool cgroup )
689697{
690698 struct perf_event_pmu_context * pmu_ctx ;
691699
692- list_for_each_entry (pmu_ctx , & ctx -> pmu_ctx_list , pmu_ctx_entry ) {
693- if (cgroup && !pmu_ctx -> nr_cgroups )
694- continue ;
700+ for_each_epc (pmu_ctx , ctx , NULL , cgroup )
695701 perf_pmu_disable (pmu_ctx -> pmu );
696- }
697702}
698703
699704static void perf_ctx_enable (struct perf_event_context * ctx , bool cgroup )
700705{
701706 struct perf_event_pmu_context * pmu_ctx ;
702707
703- list_for_each_entry (pmu_ctx , & ctx -> pmu_ctx_list , pmu_ctx_entry ) {
704- if (cgroup && !pmu_ctx -> nr_cgroups )
705- continue ;
708+ for_each_epc (pmu_ctx , ctx , NULL , cgroup )
706709 perf_pmu_enable (pmu_ctx -> pmu );
707- }
708710}
709711
710- static void ctx_sched_out (struct perf_event_context * ctx , enum event_type_t event_type );
711- static void ctx_sched_in (struct perf_event_context * ctx , enum event_type_t event_type );
712+ static void ctx_sched_out (struct perf_event_context * ctx , struct pmu * pmu , enum event_type_t event_type );
713+ static void ctx_sched_in (struct perf_event_context * ctx , struct pmu * pmu , enum event_type_t event_type );
712714
713715#ifdef CONFIG_CGROUP_PERF
714716
@@ -865,7 +867,7 @@ static void perf_cgroup_switch(struct task_struct *task)
865867 perf_ctx_lock (cpuctx , cpuctx -> task_ctx );
866868 perf_ctx_disable (& cpuctx -> ctx , true);
867869
868- ctx_sched_out (& cpuctx -> ctx , EVENT_ALL |EVENT_CGROUP );
870+ ctx_sched_out (& cpuctx -> ctx , NULL , EVENT_ALL |EVENT_CGROUP );
869871 /*
870872 * must not be done before ctxswout due
871873 * to update_cgrp_time_from_cpuctx() in
@@ -877,7 +879,7 @@ static void perf_cgroup_switch(struct task_struct *task)
877879 * perf_cgroup_set_timestamp() in ctx_sched_in()
878880 * to not have to pass task around
879881 */
880- ctx_sched_in (& cpuctx -> ctx , EVENT_ALL |EVENT_CGROUP );
882+ ctx_sched_in (& cpuctx -> ctx , NULL , EVENT_ALL |EVENT_CGROUP );
881883
882884 perf_ctx_enable (& cpuctx -> ctx , true);
883885 perf_ctx_unlock (cpuctx , cpuctx -> task_ctx );
@@ -2656,7 +2658,8 @@ static void add_event_to_ctx(struct perf_event *event,
26562658}
26572659
26582660static void task_ctx_sched_out (struct perf_event_context * ctx ,
2659- enum event_type_t event_type )
2661+ struct pmu * pmu ,
2662+ enum event_type_t event_type )
26602663{
26612664 struct perf_cpu_context * cpuctx = this_cpu_ptr (& perf_cpu_context );
26622665
@@ -2666,18 +2669,19 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
26662669 if (WARN_ON_ONCE (ctx != cpuctx -> task_ctx ))
26672670 return ;
26682671
2669- ctx_sched_out (ctx , event_type );
2672+ ctx_sched_out (ctx , pmu , event_type );
26702673}
26712674
26722675static void perf_event_sched_in (struct perf_cpu_context * cpuctx ,
2673- struct perf_event_context * ctx )
2676+ struct perf_event_context * ctx ,
2677+ struct pmu * pmu )
26742678{
2675- ctx_sched_in (& cpuctx -> ctx , EVENT_PINNED );
2679+ ctx_sched_in (& cpuctx -> ctx , pmu , EVENT_PINNED );
26762680 if (ctx )
2677- ctx_sched_in (ctx , EVENT_PINNED );
2678- ctx_sched_in (& cpuctx -> ctx , EVENT_FLEXIBLE );
2681+ ctx_sched_in (ctx , pmu , EVENT_PINNED );
2682+ ctx_sched_in (& cpuctx -> ctx , pmu , EVENT_FLEXIBLE );
26792683 if (ctx )
2680- ctx_sched_in (ctx , EVENT_FLEXIBLE );
2684+ ctx_sched_in (ctx , pmu , EVENT_FLEXIBLE );
26812685}
26822686
26832687/*
@@ -2695,16 +2699,12 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
26952699 * event_type is a bit mask of the types of events involved. For CPU events,
26962700 * event_type is only either EVENT_PINNED or EVENT_FLEXIBLE.
26972701 */
2698- /*
2699- * XXX: ctx_resched() reschedule entire perf_event_context while adding new
2700- * event to the context or enabling existing event in the context. We can
2701- * probably optimize it by rescheduling only affected pmu_ctx.
2702- */
27032702static void ctx_resched (struct perf_cpu_context * cpuctx ,
27042703 struct perf_event_context * task_ctx ,
2705- enum event_type_t event_type )
2704+ struct pmu * pmu , enum event_type_t event_type )
27062705{
27072706 bool cpu_event = !!(event_type & EVENT_CPU );
2707+ struct perf_event_pmu_context * epc ;
27082708
27092709 /*
27102710 * If pinned groups are involved, flexible groups also need to be
@@ -2715,10 +2715,14 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
27152715
27162716 event_type &= EVENT_ALL ;
27172717
2718- perf_ctx_disable (& cpuctx -> ctx , false);
2718+ for_each_epc (epc , & cpuctx -> ctx , pmu , false)
2719+ perf_pmu_disable (epc -> pmu );
2720+
27192721 if (task_ctx ) {
2720- perf_ctx_disable (task_ctx , false);
2721- task_ctx_sched_out (task_ctx , event_type );
2722+ for_each_epc (epc , task_ctx , pmu , false)
2723+ perf_pmu_disable (epc -> pmu );
2724+
2725+ task_ctx_sched_out (task_ctx , pmu , event_type );
27222726 }
27232727
27242728 /*
@@ -2729,15 +2733,19 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
27292733 * - otherwise, do nothing more.
27302734 */
27312735 if (cpu_event )
2732- ctx_sched_out (& cpuctx -> ctx , event_type );
2736+ ctx_sched_out (& cpuctx -> ctx , pmu , event_type );
27332737 else if (event_type & EVENT_PINNED )
2734- ctx_sched_out (& cpuctx -> ctx , EVENT_FLEXIBLE );
2738+ ctx_sched_out (& cpuctx -> ctx , pmu , EVENT_FLEXIBLE );
2739+
2740+ perf_event_sched_in (cpuctx , task_ctx , pmu );
27352741
2736- perf_event_sched_in (cpuctx , task_ctx );
2742+ for_each_epc (epc , & cpuctx -> ctx , pmu , false)
2743+ perf_pmu_enable (epc -> pmu );
27372744
2738- perf_ctx_enable (& cpuctx -> ctx , false);
2739- if (task_ctx )
2740- perf_ctx_enable (task_ctx , false);
2745+ if (task_ctx ) {
2746+ for_each_epc (epc , task_ctx , pmu , false)
2747+ perf_pmu_enable (epc -> pmu );
2748+ }
27412749}
27422750
27432751void perf_pmu_resched (struct pmu * pmu )
@@ -2746,7 +2754,7 @@ void perf_pmu_resched(struct pmu *pmu)
27462754 struct perf_event_context * task_ctx = cpuctx -> task_ctx ;
27472755
27482756 perf_ctx_lock (cpuctx , task_ctx );
2749- ctx_resched (cpuctx , task_ctx , EVENT_ALL |EVENT_CPU );
2757+ ctx_resched (cpuctx , task_ctx , pmu , EVENT_ALL |EVENT_CPU );
27502758 perf_ctx_unlock (cpuctx , task_ctx );
27512759}
27522760
@@ -2802,9 +2810,10 @@ static int __perf_install_in_context(void *info)
28022810#endif
28032811
28042812 if (reprogram ) {
2805- ctx_sched_out (ctx , EVENT_TIME );
2813+ ctx_sched_out (ctx , NULL , EVENT_TIME );
28062814 add_event_to_ctx (event , ctx );
2807- ctx_resched (cpuctx , task_ctx , get_event_type (event ));
2815+ ctx_resched (cpuctx , task_ctx , event -> pmu_ctx -> pmu ,
2816+ get_event_type (event ));
28082817 } else {
28092818 add_event_to_ctx (event , ctx );
28102819 }
@@ -2948,7 +2957,7 @@ static void __perf_event_enable(struct perf_event *event,
29482957 return ;
29492958
29502959 if (ctx -> is_active )
2951- ctx_sched_out (ctx , EVENT_TIME );
2960+ ctx_sched_out (ctx , NULL , EVENT_TIME );
29522961
29532962 perf_event_set_state (event , PERF_EVENT_STATE_INACTIVE );
29542963 perf_cgroup_event_enable (event , ctx );
@@ -2957,7 +2966,7 @@ static void __perf_event_enable(struct perf_event *event,
29572966 return ;
29582967
29592968 if (!event_filter_match (event )) {
2960- ctx_sched_in (ctx , EVENT_TIME );
2969+ ctx_sched_in (ctx , NULL , EVENT_TIME );
29612970 return ;
29622971 }
29632972
@@ -2966,15 +2975,15 @@ static void __perf_event_enable(struct perf_event *event,
29662975 * then don't put it on unless the group is on.
29672976 */
29682977 if (leader != event && leader -> state != PERF_EVENT_STATE_ACTIVE ) {
2969- ctx_sched_in (ctx , EVENT_TIME );
2978+ ctx_sched_in (ctx , NULL , EVENT_TIME );
29702979 return ;
29712980 }
29722981
29732982 task_ctx = cpuctx -> task_ctx ;
29742983 if (ctx -> task )
29752984 WARN_ON_ONCE (task_ctx != ctx );
29762985
2977- ctx_resched (cpuctx , task_ctx , get_event_type (event ));
2986+ ctx_resched (cpuctx , task_ctx , event -> pmu_ctx -> pmu , get_event_type (event ));
29782987}
29792988
29802989/*
@@ -3276,8 +3285,17 @@ static void __pmu_ctx_sched_out(struct perf_event_pmu_context *pmu_ctx,
32763285 perf_pmu_enable (pmu );
32773286}
32783287
3288+ /*
3289+ * Be very careful with the @pmu argument since this will change ctx state.
3290+ * The @pmu argument works for ctx_resched(), because that is symmetric in
3291+ * ctx_sched_out() / ctx_sched_in() usage and the ctx state ends up invariant.
3292+ *
3293+ * However, if you were to be asymmetrical, you could end up with messed up
3294+ * state, eg. ctx->is_active cleared even though most EPCs would still actually
3295+ * be active.
3296+ */
32793297static void
3280- ctx_sched_out (struct perf_event_context * ctx , enum event_type_t event_type )
3298+ ctx_sched_out (struct perf_event_context * ctx , struct pmu * pmu , enum event_type_t event_type )
32813299{
32823300 struct perf_cpu_context * cpuctx = this_cpu_ptr (& perf_cpu_context );
32833301 struct perf_event_pmu_context * pmu_ctx ;
@@ -3331,11 +3349,8 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
33313349
33323350 is_active ^= ctx -> is_active ; /* changed bits */
33333351
3334- list_for_each_entry (pmu_ctx , & ctx -> pmu_ctx_list , pmu_ctx_entry ) {
3335- if (cgroup && !pmu_ctx -> nr_cgroups )
3336- continue ;
3352+ for_each_epc (pmu_ctx , ctx , pmu , cgroup )
33373353 __pmu_ctx_sched_out (pmu_ctx , is_active );
3338- }
33393354}
33403355
33413356/*
@@ -3579,7 +3594,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
35793594
35803595inside_switch :
35813596 perf_ctx_sched_task_cb (ctx , false);
3582- task_ctx_sched_out (ctx , EVENT_ALL );
3597+ task_ctx_sched_out (ctx , NULL , EVENT_ALL );
35833598
35843599 perf_ctx_enable (ctx , false);
35853600 raw_spin_unlock (& ctx -> lock );
@@ -3877,29 +3892,22 @@ static void pmu_groups_sched_in(struct perf_event_context *ctx,
38773892 merge_sched_in , & can_add_hw );
38783893}
38793894
3880- static void ctx_groups_sched_in (struct perf_event_context * ctx ,
3881- struct perf_event_groups * groups ,
3882- bool cgroup )
3895+ static void __pmu_ctx_sched_in (struct perf_event_pmu_context * pmu_ctx ,
3896+ enum event_type_t event_type )
38833897{
3884- struct perf_event_pmu_context * pmu_ctx ;
3885-
3886- list_for_each_entry (pmu_ctx , & ctx -> pmu_ctx_list , pmu_ctx_entry ) {
3887- if (cgroup && !pmu_ctx -> nr_cgroups )
3888- continue ;
3889- pmu_groups_sched_in (ctx , groups , pmu_ctx -> pmu );
3890- }
3891- }
3898+ struct perf_event_context * ctx = pmu_ctx -> ctx ;
38923899
3893- static void __pmu_ctx_sched_in ( struct perf_event_context * ctx ,
3894- struct pmu * pmu )
3895- {
3896- pmu_groups_sched_in (ctx , & ctx -> flexible_groups , pmu );
3900+ if ( event_type & EVENT_PINNED )
3901+ pmu_groups_sched_in ( ctx , & ctx -> pinned_groups , pmu_ctx -> pmu );
3902+ if ( event_type & EVENT_FLEXIBLE )
3903+ pmu_groups_sched_in (ctx , & ctx -> flexible_groups , pmu_ctx -> pmu );
38973904}
38983905
38993906static void
3900- ctx_sched_in (struct perf_event_context * ctx , enum event_type_t event_type )
3907+ ctx_sched_in (struct perf_event_context * ctx , struct pmu * pmu , enum event_type_t event_type )
39013908{
39023909 struct perf_cpu_context * cpuctx = this_cpu_ptr (& perf_cpu_context );
3910+ struct perf_event_pmu_context * pmu_ctx ;
39033911 int is_active = ctx -> is_active ;
39043912 bool cgroup = event_type & EVENT_CGROUP ;
39053913
@@ -3935,12 +3943,16 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
39353943 * First go through the list and put on any pinned groups
39363944 * in order to give them the best chance of going on.
39373945 */
3938- if (is_active & EVENT_PINNED )
3939- ctx_groups_sched_in (ctx , & ctx -> pinned_groups , cgroup );
3946+ if (is_active & EVENT_PINNED ) {
3947+ for_each_epc (pmu_ctx , ctx , pmu , cgroup )
3948+ __pmu_ctx_sched_in (pmu_ctx , EVENT_PINNED );
3949+ }
39403950
39413951 /* Then walk through the lower prio flexible groups */
3942- if (is_active & EVENT_FLEXIBLE )
3943- ctx_groups_sched_in (ctx , & ctx -> flexible_groups , cgroup );
3952+ if (is_active & EVENT_FLEXIBLE ) {
3953+ for_each_epc (pmu_ctx , ctx , pmu , cgroup )
3954+ __pmu_ctx_sched_in (pmu_ctx , EVENT_FLEXIBLE );
3955+ }
39443956}
39453957
39463958static void perf_event_context_sched_in (struct task_struct * task )
@@ -3983,10 +3995,10 @@ static void perf_event_context_sched_in(struct task_struct *task)
39833995 */
39843996 if (!RB_EMPTY_ROOT (& ctx -> pinned_groups .tree )) {
39853997 perf_ctx_disable (& cpuctx -> ctx , false);
3986- ctx_sched_out (& cpuctx -> ctx , EVENT_FLEXIBLE );
3998+ ctx_sched_out (& cpuctx -> ctx , NULL , EVENT_FLEXIBLE );
39873999 }
39884000
3989- perf_event_sched_in (cpuctx , ctx );
4001+ perf_event_sched_in (cpuctx , ctx , NULL );
39904002
39914003 perf_ctx_sched_task_cb (cpuctx -> task_ctx , true);
39924004
@@ -4327,14 +4339,14 @@ static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc)
43274339 update_context_time (& cpuctx -> ctx );
43284340 __pmu_ctx_sched_out (cpu_epc , EVENT_FLEXIBLE );
43294341 rotate_ctx (& cpuctx -> ctx , cpu_event );
4330- __pmu_ctx_sched_in (& cpuctx -> ctx , pmu );
4342+ __pmu_ctx_sched_in (cpu_epc , EVENT_FLEXIBLE );
43314343 }
43324344
43334345 if (task_event )
43344346 rotate_ctx (task_epc -> ctx , task_event );
43354347
43364348 if (task_event || (task_epc && cpu_event ))
4337- __pmu_ctx_sched_in (task_epc -> ctx , pmu );
4349+ __pmu_ctx_sched_in (task_epc , EVENT_FLEXIBLE );
43384350
43394351 perf_pmu_enable (pmu );
43404352 perf_ctx_unlock (cpuctx , cpuctx -> task_ctx );
@@ -4400,7 +4412,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
44004412
44014413 cpuctx = this_cpu_ptr (& perf_cpu_context );
44024414 perf_ctx_lock (cpuctx , ctx );
4403- ctx_sched_out (ctx , EVENT_TIME );
4415+ ctx_sched_out (ctx , NULL , EVENT_TIME );
44044416
44054417 list_for_each_entry (event , & ctx -> event_list , event_entry ) {
44064418 enabled |= event_enable_on_exec (event , ctx );
@@ -4412,9 +4424,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
44124424 */
44134425 if (enabled ) {
44144426 clone_ctx = unclone_ctx (ctx );
4415- ctx_resched (cpuctx , ctx , event_type );
4427+ ctx_resched (cpuctx , ctx , NULL , event_type );
44164428 } else {
4417- ctx_sched_in (ctx , EVENT_TIME );
4429+ ctx_sched_in (ctx , NULL , EVENT_TIME );
44184430 }
44194431 perf_ctx_unlock (cpuctx , ctx );
44204432
@@ -13202,7 +13214,7 @@ static void perf_event_exit_task_context(struct task_struct *child)
1320213214 * in.
1320313215 */
1320413216 raw_spin_lock_irq (& child_ctx -> lock );
13205- task_ctx_sched_out (child_ctx , EVENT_ALL );
13217+ task_ctx_sched_out (child_ctx , NULL , EVENT_ALL );
1320613218
1320713219 /*
1320813220 * Now that the context is inactive, destroy the task <-> ctx relation
@@ -13751,7 +13763,7 @@ static void __perf_event_exit_context(void *__info)
1375113763 struct perf_event * event ;
1375213764
1375313765 raw_spin_lock (& ctx -> lock );
13754- ctx_sched_out (ctx , EVENT_TIME );
13766+ ctx_sched_out (ctx , NULL , EVENT_TIME );
1375513767 list_for_each_entry (event , & ctx -> event_list , event_entry )
1375613768 __perf_remove_from_context (event , cpuctx , ctx , (void * )DETACH_GROUP );
1375713769 raw_spin_unlock (& ctx -> lock );
0 commit comments