Skip to content

Commit 7fab21f

Browse files
lellomanPeter Zijlstra
authored andcommitted
sched/psi: Rearrange polling code in preparation
Move a few functions up in the file to avoid forward declaration needed in the patch implementing unprivileged PSI triggers. Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Link: https://lore.kernel.org/r/20230330105418.77061-2-cerasuolodomenico@gmail.com
1 parent 39afe5d commit 7fab21f

1 file changed

Lines changed: 98 additions & 98 deletions

File tree

kernel/sched/psi.c

Lines changed: 98 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -384,92 +384,6 @@ static void collect_percpu_times(struct psi_group *group,
384384
*pchanged_states = changed_states;
385385
}
386386

387-
static u64 update_averages(struct psi_group *group, u64 now)
388-
{
389-
unsigned long missed_periods = 0;
390-
u64 expires, period;
391-
u64 avg_next_update;
392-
int s;
393-
394-
/* avgX= */
395-
expires = group->avg_next_update;
396-
if (now - expires >= psi_period)
397-
missed_periods = div_u64(now - expires, psi_period);
398-
399-
/*
400-
* The periodic clock tick can get delayed for various
401-
* reasons, especially on loaded systems. To avoid clock
402-
* drift, we schedule the clock in fixed psi_period intervals.
403-
* But the deltas we sample out of the per-cpu buckets above
404-
* are based on the actual time elapsing between clock ticks.
405-
*/
406-
avg_next_update = expires + ((1 + missed_periods) * psi_period);
407-
period = now - (group->avg_last_update + (missed_periods * psi_period));
408-
group->avg_last_update = now;
409-
410-
for (s = 0; s < NR_PSI_STATES - 1; s++) {
411-
u32 sample;
412-
413-
sample = group->total[PSI_AVGS][s] - group->avg_total[s];
414-
/*
415-
* Due to the lockless sampling of the time buckets,
416-
* recorded time deltas can slip into the next period,
417-
* which under full pressure can result in samples in
418-
* excess of the period length.
419-
*
420-
* We don't want to report non-sensical pressures in
421-
* excess of 100%, nor do we want to drop such events
422-
* on the floor. Instead we punt any overage into the
423-
* future until pressure subsides. By doing this we
424-
* don't underreport the occurring pressure curve, we
425-
* just report it delayed by one period length.
426-
*
427-
* The error isn't cumulative. As soon as another
428-
* delta slips from a period P to P+1, by definition
429-
* it frees up its time T in P.
430-
*/
431-
if (sample > period)
432-
sample = period;
433-
group->avg_total[s] += sample;
434-
calc_avgs(group->avg[s], missed_periods, sample, period);
435-
}
436-
437-
return avg_next_update;
438-
}
439-
440-
static void psi_avgs_work(struct work_struct *work)
441-
{
442-
struct delayed_work *dwork;
443-
struct psi_group *group;
444-
u32 changed_states;
445-
u64 now;
446-
447-
dwork = to_delayed_work(work);
448-
group = container_of(dwork, struct psi_group, avgs_work);
449-
450-
mutex_lock(&group->avgs_lock);
451-
452-
now = sched_clock();
453-
454-
collect_percpu_times(group, PSI_AVGS, &changed_states);
455-
/*
456-
* If there is task activity, periodically fold the per-cpu
457-
* times and feed samples into the running averages. If things
458-
* are idle and there is no data to process, stop the clock.
459-
* Once restarted, we'll catch up the running averages in one
460-
* go - see calc_avgs() and missed_periods.
461-
*/
462-
if (now >= group->avg_next_update)
463-
group->avg_next_update = update_averages(group, now);
464-
465-
if (changed_states & PSI_STATE_RESCHEDULE) {
466-
schedule_delayed_work(dwork, nsecs_to_jiffies(
467-
group->avg_next_update - now) + 1);
468-
}
469-
470-
mutex_unlock(&group->avgs_lock);
471-
}
472-
473387
/* Trigger tracking window manipulations */
474388
static void window_reset(struct psi_window *win, u64 now, u64 value,
475389
u64 prev_growth)
@@ -516,18 +430,6 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
516430
return growth;
517431
}
518432

519-
static void init_triggers(struct psi_group *group, u64 now)
520-
{
521-
struct psi_trigger *t;
522-
523-
list_for_each_entry(t, &group->triggers, node)
524-
window_reset(&t->win, now,
525-
group->total[PSI_POLL][t->state], 0);
526-
memcpy(group->polling_total, group->total[PSI_POLL],
527-
sizeof(group->polling_total));
528-
group->polling_next_update = now + group->poll_min_period;
529-
}
530-
531433
static u64 update_triggers(struct psi_group *group, u64 now)
532434
{
533435
struct psi_trigger *t;
@@ -590,6 +492,104 @@ static u64 update_triggers(struct psi_group *group, u64 now)
590492
return now + group->poll_min_period;
591493
}
592494

495+
static u64 update_averages(struct psi_group *group, u64 now)
496+
{
497+
unsigned long missed_periods = 0;
498+
u64 expires, period;
499+
u64 avg_next_update;
500+
int s;
501+
502+
/* avgX= */
503+
expires = group->avg_next_update;
504+
if (now - expires >= psi_period)
505+
missed_periods = div_u64(now - expires, psi_period);
506+
507+
/*
508+
* The periodic clock tick can get delayed for various
509+
* reasons, especially on loaded systems. To avoid clock
510+
* drift, we schedule the clock in fixed psi_period intervals.
511+
* But the deltas we sample out of the per-cpu buckets above
512+
* are based on the actual time elapsing between clock ticks.
513+
*/
514+
avg_next_update = expires + ((1 + missed_periods) * psi_period);
515+
period = now - (group->avg_last_update + (missed_periods * psi_period));
516+
group->avg_last_update = now;
517+
518+
for (s = 0; s < NR_PSI_STATES - 1; s++) {
519+
u32 sample;
520+
521+
sample = group->total[PSI_AVGS][s] - group->avg_total[s];
522+
/*
523+
* Due to the lockless sampling of the time buckets,
524+
* recorded time deltas can slip into the next period,
525+
* which under full pressure can result in samples in
526+
* excess of the period length.
527+
*
528+
* We don't want to report non-sensical pressures in
529+
* excess of 100%, nor do we want to drop such events
530+
* on the floor. Instead we punt any overage into the
531+
* future until pressure subsides. By doing this we
532+
* don't underreport the occurring pressure curve, we
533+
* just report it delayed by one period length.
534+
*
535+
* The error isn't cumulative. As soon as another
536+
* delta slips from a period P to P+1, by definition
537+
* it frees up its time T in P.
538+
*/
539+
if (sample > period)
540+
sample = period;
541+
group->avg_total[s] += sample;
542+
calc_avgs(group->avg[s], missed_periods, sample, period);
543+
}
544+
545+
return avg_next_update;
546+
}
547+
548+
static void psi_avgs_work(struct work_struct *work)
549+
{
550+
struct delayed_work *dwork;
551+
struct psi_group *group;
552+
u32 changed_states;
553+
u64 now;
554+
555+
dwork = to_delayed_work(work);
556+
group = container_of(dwork, struct psi_group, avgs_work);
557+
558+
mutex_lock(&group->avgs_lock);
559+
560+
now = sched_clock();
561+
562+
collect_percpu_times(group, PSI_AVGS, &changed_states);
563+
/*
564+
* If there is task activity, periodically fold the per-cpu
565+
* times and feed samples into the running averages. If things
566+
* are idle and there is no data to process, stop the clock.
567+
* Once restarted, we'll catch up the running averages in one
568+
* go - see calc_avgs() and missed_periods.
569+
*/
570+
if (now >= group->avg_next_update)
571+
group->avg_next_update = update_averages(group, now);
572+
573+
if (changed_states & PSI_STATE_RESCHEDULE) {
574+
schedule_delayed_work(dwork, nsecs_to_jiffies(
575+
group->avg_next_update - now) + 1);
576+
}
577+
578+
mutex_unlock(&group->avgs_lock);
579+
}
580+
581+
static void init_triggers(struct psi_group *group, u64 now)
582+
{
583+
struct psi_trigger *t;
584+
585+
list_for_each_entry(t, &group->triggers, node)
586+
window_reset(&t->win, now,
587+
group->total[PSI_POLL][t->state], 0);
588+
memcpy(group->polling_total, group->total[PSI_POLL],
589+
sizeof(group->polling_total));
590+
group->polling_next_update = now + group->poll_min_period;
591+
}
592+
593593
/* Schedule polling if it's not already scheduled or forced. */
594594
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay,
595595
bool force)

0 commit comments

Comments
 (0)