@@ -186,9 +186,14 @@ static void group_init(struct psi_group *group)
186186 seqcount_init (& per_cpu_ptr (group -> pcpu , cpu )-> seq );
187187 group -> avg_last_update = sched_clock ();
188188 group -> avg_next_update = group -> avg_last_update + psi_period ;
189- INIT_DELAYED_WORK (& group -> avgs_work , psi_avgs_work );
190189 mutex_init (& group -> avgs_lock );
191- /* Init trigger-related members */
190+
191+ /* Init avg trigger-related members */
192+ INIT_LIST_HEAD (& group -> avg_triggers );
193+ memset (group -> avg_nr_triggers , 0 , sizeof (group -> avg_nr_triggers ));
194+ INIT_DELAYED_WORK (& group -> avgs_work , psi_avgs_work );
195+
196+ /* Init rtpoll trigger-related members */
192197 atomic_set (& group -> rtpoll_scheduled , 0 );
193198 mutex_init (& group -> rtpoll_trigger_lock );
194199 INIT_LIST_HEAD (& group -> rtpoll_triggers );
@@ -430,21 +435,32 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
430435 return growth ;
431436}
432437
433- static u64 update_triggers (struct psi_group * group , u64 now , bool * update_total )
438+ static u64 update_triggers (struct psi_group * group , u64 now , bool * update_total ,
439+ enum psi_aggregators aggregator )
434440{
435441 struct psi_trigger * t ;
436- u64 * total = group -> total [PSI_POLL ];
442+ u64 * total = group -> total [aggregator ];
443+ struct list_head * triggers ;
444+ u64 * aggregator_total ;
437445 * update_total = false;
438446
447+ if (aggregator == PSI_AVGS ) {
448+ triggers = & group -> avg_triggers ;
449+ aggregator_total = group -> avg_total ;
450+ } else {
451+ triggers = & group -> rtpoll_triggers ;
452+ aggregator_total = group -> rtpoll_total ;
453+ }
454+
439455 /*
440456 * On subsequent updates, calculate growth deltas and let
441457 * watchers know when their specified thresholds are exceeded.
442458 */
443- list_for_each_entry (t , & group -> rtpoll_triggers , node ) {
459+ list_for_each_entry (t , triggers , node ) {
444460 u64 growth ;
445461 bool new_stall ;
446462
447- new_stall = group -> rtpoll_total [t -> state ] != total [t -> state ];
463+ new_stall = aggregator_total [t -> state ] != total [t -> state ];
448464
449465 /* Check for stall activity or a previous threshold breach */
450466 if (!new_stall && !t -> pending_event )
@@ -546,6 +562,7 @@ static void psi_avgs_work(struct work_struct *work)
546562 struct delayed_work * dwork ;
547563 struct psi_group * group ;
548564 u32 changed_states ;
565+ bool update_total ;
549566 u64 now ;
550567
551568 dwork = to_delayed_work (work );
@@ -563,8 +580,10 @@ static void psi_avgs_work(struct work_struct *work)
563580 * Once restarted, we'll catch up the running averages in one
564581 * go - see calc_avgs() and missed_periods.
565582 */
566- if (now >= group -> avg_next_update )
583+ if (now >= group -> avg_next_update ) {
584+ update_triggers (group , now , & update_total , PSI_AVGS );
567585 group -> avg_next_update = update_averages (group , now );
586+ }
568587
569588 if (changed_states & PSI_STATE_RESCHEDULE ) {
570589 schedule_delayed_work (dwork , nsecs_to_jiffies (
@@ -574,7 +593,7 @@ static void psi_avgs_work(struct work_struct *work)
574593 mutex_unlock (& group -> avgs_lock );
575594}
576595
577- static void init_triggers (struct psi_group * group , u64 now )
596+ static void init_rtpoll_triggers (struct psi_group * group , u64 now )
578597{
579598 struct psi_trigger * t ;
580599
@@ -667,7 +686,7 @@ static void psi_rtpoll_work(struct psi_group *group)
667686 if (changed_states & group -> rtpoll_states ) {
668687 /* Initialize trigger windows when entering polling mode */
669688 if (now > group -> rtpoll_until )
670- init_triggers (group , now );
689+ init_rtpoll_triggers (group , now );
671690
672691 /*
673692 * Keep the monitor active for at least the duration of the
@@ -684,7 +703,7 @@ static void psi_rtpoll_work(struct psi_group *group)
684703 }
685704
686705 if (now >= group -> rtpoll_next_update ) {
687- group -> rtpoll_next_update = update_triggers (group , now , & update_total );
706+ group -> rtpoll_next_update = update_triggers (group , now , & update_total , PSI_POLL );
688707 if (update_total )
689708 memcpy (group -> rtpoll_total , group -> total [PSI_POLL ],
690709 sizeof (group -> rtpoll_total ));
@@ -1254,16 +1273,23 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
12541273}
12551274
12561275struct psi_trigger * psi_trigger_create (struct psi_group * group ,
1257- char * buf , enum psi_res res )
1276+ char * buf , enum psi_res res , struct file * file )
12581277{
12591278 struct psi_trigger * t ;
12601279 enum psi_states state ;
12611280 u32 threshold_us ;
1281+ bool privileged ;
12621282 u32 window_us ;
12631283
12641284 if (static_branch_likely (& psi_disabled ))
12651285 return ERR_PTR (- EOPNOTSUPP );
12661286
1287+ /*
1288+ * Checking the privilege here on file->f_cred implies that a privileged user
1289+ * could open the file and delegate the write to an unprivileged one.
1290+ */
1291+ privileged = cap_raised (file -> f_cred -> cap_effective , CAP_SYS_RESOURCE );
1292+
12671293 if (sscanf (buf , "some %u %u" , & threshold_us , & window_us ) == 2 )
12681294 state = PSI_IO_SOME + res * 2 ;
12691295 else if (sscanf (buf , "full %u %u" , & threshold_us , & window_us ) == 2 )
@@ -1283,6 +1309,13 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
12831309 window_us > WINDOW_MAX_US )
12841310 return ERR_PTR (- EINVAL );
12851311
1312+ /*
1313+ * Unprivileged users can only use 2s windows so that averages aggregation
1314+ * work is used, and no RT threads need to be spawned.
1315+ */
1316+ if (!privileged && window_us % 2000000 )
1317+ return ERR_PTR (- EINVAL );
1318+
12861319 /* Check threshold */
12871320 if (threshold_us == 0 || threshold_us > window_us )
12881321 return ERR_PTR (- EINVAL );
@@ -1302,31 +1335,40 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
13021335 t -> last_event_time = 0 ;
13031336 init_waitqueue_head (& t -> event_wait );
13041337 t -> pending_event = false;
1338+ t -> aggregator = privileged ? PSI_POLL : PSI_AVGS ;
13051339
1306- mutex_lock (& group -> rtpoll_trigger_lock );
1340+ if (privileged ) {
1341+ mutex_lock (& group -> rtpoll_trigger_lock );
13071342
1308- if (!rcu_access_pointer (group -> rtpoll_task )) {
1309- struct task_struct * task ;
1343+ if (!rcu_access_pointer (group -> rtpoll_task )) {
1344+ struct task_struct * task ;
13101345
1311- task = kthread_create (psi_rtpoll_worker , group , "psimon" );
1312- if (IS_ERR (task )) {
1313- kfree (t );
1314- mutex_unlock (& group -> rtpoll_trigger_lock );
1315- return ERR_CAST (task );
1346+ task = kthread_create (psi_rtpoll_worker , group , "psimon" );
1347+ if (IS_ERR (task )) {
1348+ kfree (t );
1349+ mutex_unlock (& group -> rtpoll_trigger_lock );
1350+ return ERR_CAST (task );
1351+ }
1352+ atomic_set (& group -> rtpoll_wakeup , 0 );
1353+ wake_up_process (task );
1354+ rcu_assign_pointer (group -> rtpoll_task , task );
13161355 }
1317- atomic_set (& group -> rtpoll_wakeup , 0 );
1318- wake_up_process (task );
1319- rcu_assign_pointer (group -> rtpoll_task , task );
1320- }
13211356
1322- list_add (& t -> node , & group -> rtpoll_triggers );
1323- group -> rtpoll_min_period = min (group -> rtpoll_min_period ,
1324- div_u64 (t -> win .size , UPDATES_PER_WINDOW ));
1325- group -> rtpoll_nr_triggers [t -> state ]++ ;
1326- group -> rtpoll_states |= (1 << t -> state );
1357+ list_add (& t -> node , & group -> rtpoll_triggers );
1358+ group -> rtpoll_min_period = min (group -> rtpoll_min_period ,
1359+ div_u64 (t -> win .size , UPDATES_PER_WINDOW ));
1360+ group -> rtpoll_nr_triggers [t -> state ]++ ;
1361+ group -> rtpoll_states |= (1 << t -> state );
13271362
1328- mutex_unlock (& group -> rtpoll_trigger_lock );
1363+ mutex_unlock (& group -> rtpoll_trigger_lock );
1364+ } else {
1365+ mutex_lock (& group -> avgs_lock );
1366+
1367+ list_add (& t -> node , & group -> avg_triggers );
1368+ group -> avg_nr_triggers [t -> state ]++ ;
13291369
1370+ mutex_unlock (& group -> avgs_lock );
1371+ }
13301372 return t ;
13311373}
13321374
@@ -1350,34 +1392,41 @@ void psi_trigger_destroy(struct psi_trigger *t)
13501392 */
13511393 wake_up_pollfree (& t -> event_wait );
13521394
1353- mutex_lock (& group -> rtpoll_trigger_lock );
1354-
1355- if (!list_empty (& t -> node )) {
1356- struct psi_trigger * tmp ;
1357- u64 period = ULLONG_MAX ;
1358-
1359- list_del (& t -> node );
1360- group -> rtpoll_nr_triggers [t -> state ]-- ;
1361- if (!group -> rtpoll_nr_triggers [t -> state ])
1362- group -> rtpoll_states &= ~(1 << t -> state );
1363- /* reset min update period for the remaining triggers */
1364- list_for_each_entry (tmp , & group -> rtpoll_triggers , node )
1365- period = min (period , div_u64 (tmp -> win .size ,
1366- UPDATES_PER_WINDOW ));
1367- group -> rtpoll_min_period = period ;
1368- /* Destroy rtpoll_task when the last trigger is destroyed */
1369- if (group -> rtpoll_states == 0 ) {
1370- group -> rtpoll_until = 0 ;
1371- task_to_destroy = rcu_dereference_protected (
1372- group -> rtpoll_task ,
1373- lockdep_is_held (& group -> rtpoll_trigger_lock ));
1374- rcu_assign_pointer (group -> rtpoll_task , NULL );
1375- del_timer (& group -> rtpoll_timer );
1395+ if (t -> aggregator == PSI_AVGS ) {
1396+ mutex_lock (& group -> avgs_lock );
1397+ if (!list_empty (& t -> node )) {
1398+ list_del (& t -> node );
1399+ group -> avg_nr_triggers [t -> state ]-- ;
13761400 }
1401+ mutex_unlock (& group -> avgs_lock );
1402+ } else {
1403+ mutex_lock (& group -> rtpoll_trigger_lock );
1404+ if (!list_empty (& t -> node )) {
1405+ struct psi_trigger * tmp ;
1406+ u64 period = ULLONG_MAX ;
1407+
1408+ list_del (& t -> node );
1409+ group -> rtpoll_nr_triggers [t -> state ]-- ;
1410+ if (!group -> rtpoll_nr_triggers [t -> state ])
1411+ group -> rtpoll_states &= ~(1 << t -> state );
1412+ /* reset min update period for the remaining triggers */
1413+ list_for_each_entry (tmp , & group -> rtpoll_triggers , node )
1414+ period = min (period , div_u64 (tmp -> win .size ,
1415+ UPDATES_PER_WINDOW ));
1416+ group -> rtpoll_min_period = period ;
1417+ /* Destroy rtpoll_task when the last trigger is destroyed */
1418+ if (group -> rtpoll_states == 0 ) {
1419+ group -> rtpoll_until = 0 ;
1420+ task_to_destroy = rcu_dereference_protected (
1421+ group -> rtpoll_task ,
1422+ lockdep_is_held (& group -> rtpoll_trigger_lock ));
1423+ rcu_assign_pointer (group -> rtpoll_task , NULL );
1424+ del_timer (& group -> rtpoll_timer );
1425+ }
1426+ }
1427+ mutex_unlock (& group -> rtpoll_trigger_lock );
13771428 }
13781429
1379- mutex_unlock (& group -> rtpoll_trigger_lock );
1380-
13811430 /*
13821431 * Wait for psi_schedule_rtpoll_work RCU to complete its read-side
13831432 * critical section before destroying the trigger and optionally the
@@ -1437,27 +1486,19 @@ static int psi_cpu_show(struct seq_file *m, void *v)
14371486 return psi_show (m , & psi_system , PSI_CPU );
14381487}
14391488
1440- static int psi_open (struct file * file , int (* psi_show )(struct seq_file * , void * ))
1441- {
1442- if (file -> f_mode & FMODE_WRITE && !capable (CAP_SYS_RESOURCE ))
1443- return - EPERM ;
1444-
1445- return single_open (file , psi_show , NULL );
1446- }
1447-
14481489static int psi_io_open (struct inode * inode , struct file * file )
14491490{
1450- return psi_open (file , psi_io_show );
1491+ return single_open (file , psi_io_show , NULL );
14511492}
14521493
14531494static int psi_memory_open (struct inode * inode , struct file * file )
14541495{
1455- return psi_open (file , psi_memory_show );
1496+ return single_open (file , psi_memory_show , NULL );
14561497}
14571498
14581499static int psi_cpu_open (struct inode * inode , struct file * file )
14591500{
1460- return psi_open (file , psi_cpu_show );
1501+ return single_open (file , psi_cpu_show , NULL );
14611502}
14621503
14631504static ssize_t psi_write (struct file * file , const char __user * user_buf ,
@@ -1491,7 +1532,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
14911532 return - EBUSY ;
14921533 }
14931534
1494- new = psi_trigger_create (& psi_system , buf , res );
1535+ new = psi_trigger_create (& psi_system , buf , res , file );
14951536 if (IS_ERR (new )) {
14961537 mutex_unlock (& seq -> lock );
14971538 return PTR_ERR (new );
@@ -1571,7 +1612,7 @@ static int psi_irq_show(struct seq_file *m, void *v)
15711612
15721613static int psi_irq_open (struct inode * inode , struct file * file )
15731614{
1574- return psi_open (file , psi_irq_show );
1615+ return single_open (file , psi_irq_show , NULL );
15751616}
15761617
15771618static ssize_t psi_irq_write (struct file * file , const char __user * user_buf ,
0 commit comments