@@ -384,92 +384,6 @@ static void collect_percpu_times(struct psi_group *group,
384384 * pchanged_states = changed_states ;
385385}
386386
387- static u64 update_averages (struct psi_group * group , u64 now )
388- {
389- unsigned long missed_periods = 0 ;
390- u64 expires , period ;
391- u64 avg_next_update ;
392- int s ;
393-
394- /* avgX= */
395- expires = group -> avg_next_update ;
396- if (now - expires >= psi_period )
397- missed_periods = div_u64 (now - expires , psi_period );
398-
399- /*
400- * The periodic clock tick can get delayed for various
401- * reasons, especially on loaded systems. To avoid clock
402- * drift, we schedule the clock in fixed psi_period intervals.
403- * But the deltas we sample out of the per-cpu buckets above
404- * are based on the actual time elapsing between clock ticks.
405- */
406- avg_next_update = expires + ((1 + missed_periods ) * psi_period );
407- period = now - (group -> avg_last_update + (missed_periods * psi_period ));
408- group -> avg_last_update = now ;
409-
410- for (s = 0 ; s < NR_PSI_STATES - 1 ; s ++ ) {
411- u32 sample ;
412-
413- sample = group -> total [PSI_AVGS ][s ] - group -> avg_total [s ];
414- /*
415- * Due to the lockless sampling of the time buckets,
416- * recorded time deltas can slip into the next period,
417- * which under full pressure can result in samples in
418- * excess of the period length.
419- *
420- * We don't want to report non-sensical pressures in
421- * excess of 100%, nor do we want to drop such events
422- * on the floor. Instead we punt any overage into the
423- * future until pressure subsides. By doing this we
424- * don't underreport the occurring pressure curve, we
425- * just report it delayed by one period length.
426- *
427- * The error isn't cumulative. As soon as another
428- * delta slips from a period P to P+1, by definition
429- * it frees up its time T in P.
430- */
431- if (sample > period )
432- sample = period ;
433- group -> avg_total [s ] += sample ;
434- calc_avgs (group -> avg [s ], missed_periods , sample , period );
435- }
436-
437- return avg_next_update ;
438- }
439-
440- static void psi_avgs_work (struct work_struct * work )
441- {
442- struct delayed_work * dwork ;
443- struct psi_group * group ;
444- u32 changed_states ;
445- u64 now ;
446-
447- dwork = to_delayed_work (work );
448- group = container_of (dwork , struct psi_group , avgs_work );
449-
450- mutex_lock (& group -> avgs_lock );
451-
452- now = sched_clock ();
453-
454- collect_percpu_times (group , PSI_AVGS , & changed_states );
455- /*
456- * If there is task activity, periodically fold the per-cpu
457- * times and feed samples into the running averages. If things
458- * are idle and there is no data to process, stop the clock.
459- * Once restarted, we'll catch up the running averages in one
460- * go - see calc_avgs() and missed_periods.
461- */
462- if (now >= group -> avg_next_update )
463- group -> avg_next_update = update_averages (group , now );
464-
465- if (changed_states & PSI_STATE_RESCHEDULE ) {
466- schedule_delayed_work (dwork , nsecs_to_jiffies (
467- group -> avg_next_update - now ) + 1 );
468- }
469-
470- mutex_unlock (& group -> avgs_lock );
471- }
472-
473387/* Trigger tracking window manipulations */
474388static void window_reset (struct psi_window * win , u64 now , u64 value ,
475389 u64 prev_growth )
@@ -516,18 +430,6 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
516430 return growth ;
517431}
518432
519- static void init_triggers (struct psi_group * group , u64 now )
520- {
521- struct psi_trigger * t ;
522-
523- list_for_each_entry (t , & group -> triggers , node )
524- window_reset (& t -> win , now ,
525- group -> total [PSI_POLL ][t -> state ], 0 );
526- memcpy (group -> polling_total , group -> total [PSI_POLL ],
527- sizeof (group -> polling_total ));
528- group -> polling_next_update = now + group -> poll_min_period ;
529- }
530-
531433static u64 update_triggers (struct psi_group * group , u64 now )
532434{
533435 struct psi_trigger * t ;
@@ -590,6 +492,104 @@ static u64 update_triggers(struct psi_group *group, u64 now)
590492 return now + group -> poll_min_period ;
591493}
592494
495+ static u64 update_averages (struct psi_group * group , u64 now )
496+ {
497+ unsigned long missed_periods = 0 ;
498+ u64 expires , period ;
499+ u64 avg_next_update ;
500+ int s ;
501+
502+ /* avgX= */
503+ expires = group -> avg_next_update ;
504+ if (now - expires >= psi_period )
505+ missed_periods = div_u64 (now - expires , psi_period );
506+
507+ /*
508+ * The periodic clock tick can get delayed for various
509+ * reasons, especially on loaded systems. To avoid clock
510+ * drift, we schedule the clock in fixed psi_period intervals.
511+ * But the deltas we sample out of the per-cpu buckets above
512+ * are based on the actual time elapsing between clock ticks.
513+ */
514+ avg_next_update = expires + ((1 + missed_periods ) * psi_period );
515+ period = now - (group -> avg_last_update + (missed_periods * psi_period ));
516+ group -> avg_last_update = now ;
517+
518+ for (s = 0 ; s < NR_PSI_STATES - 1 ; s ++ ) {
519+ u32 sample ;
520+
521+ sample = group -> total [PSI_AVGS ][s ] - group -> avg_total [s ];
522+ /*
523+ * Due to the lockless sampling of the time buckets,
524+ * recorded time deltas can slip into the next period,
525+ * which under full pressure can result in samples in
526+ * excess of the period length.
527+ *
528+ * We don't want to report non-sensical pressures in
529+ * excess of 100%, nor do we want to drop such events
530+ * on the floor. Instead we punt any overage into the
531+ * future until pressure subsides. By doing this we
532+ * don't underreport the occurring pressure curve, we
533+ * just report it delayed by one period length.
534+ *
535+ * The error isn't cumulative. As soon as another
536+ * delta slips from a period P to P+1, by definition
537+ * it frees up its time T in P.
538+ */
539+ if (sample > period )
540+ sample = period ;
541+ group -> avg_total [s ] += sample ;
542+ calc_avgs (group -> avg [s ], missed_periods , sample , period );
543+ }
544+
545+ return avg_next_update ;
546+ }
547+
548+ static void psi_avgs_work (struct work_struct * work )
549+ {
550+ struct delayed_work * dwork ;
551+ struct psi_group * group ;
552+ u32 changed_states ;
553+ u64 now ;
554+
555+ dwork = to_delayed_work (work );
556+ group = container_of (dwork , struct psi_group , avgs_work );
557+
558+ mutex_lock (& group -> avgs_lock );
559+
560+ now = sched_clock ();
561+
562+ collect_percpu_times (group , PSI_AVGS , & changed_states );
563+ /*
564+ * If there is task activity, periodically fold the per-cpu
565+ * times and feed samples into the running averages. If things
566+ * are idle and there is no data to process, stop the clock.
567+ * Once restarted, we'll catch up the running averages in one
568+ * go - see calc_avgs() and missed_periods.
569+ */
570+ if (now >= group -> avg_next_update )
571+ group -> avg_next_update = update_averages (group , now );
572+
573+ if (changed_states & PSI_STATE_RESCHEDULE ) {
574+ schedule_delayed_work (dwork , nsecs_to_jiffies (
575+ group -> avg_next_update - now ) + 1 );
576+ }
577+
578+ mutex_unlock (& group -> avgs_lock );
579+ }
580+
581+ static void init_triggers (struct psi_group * group , u64 now )
582+ {
583+ struct psi_trigger * t ;
584+
585+ list_for_each_entry (t , & group -> triggers , node )
586+ window_reset (& t -> win , now ,
587+ group -> total [PSI_POLL ][t -> state ], 0 );
588+ memcpy (group -> polling_total , group -> total [PSI_POLL ],
589+ sizeof (group -> polling_total ));
590+ group -> polling_next_update = now + group -> poll_min_period ;
591+ }
592+
593593/* Schedule polling if it's not already scheduled or forced. */
594594static void psi_schedule_poll_work (struct psi_group * group , unsigned long delay ,
595595 bool force )
0 commit comments