@@ -74,64 +74,109 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
7474}
7575
7676/**
77- * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
78- * @pos : current position
79- * @root: root of the tree to traversal
77+ * cgroup_rstat_push_children - push children cgroups into the given list
78+ * @head : current head of the list (= subtree root)
79+ * @child: first child of the root
8080 * @cpu: target cpu
81+ * Return: A new singly linked list of cgroups to be flush
8182 *
82- * Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts
83- * the traversal and %NULL return indicates the end. During traversal,
84- * each returned cgroup is unlinked from the tree. Must be called with the
85- * matching cgroup_rstat_cpu_lock held.
83+ * Iteratively traverse down the cgroup_rstat_cpu updated tree level by
84+ * level and push all the parents first before their next level children
85+ * into a singly linked list built from the tail backward like "pushing"
86+ * cgroups into a stack. The root is pushed by the caller.
87+ */
88+ static struct cgroup * cgroup_rstat_push_children (struct cgroup * head ,
89+ struct cgroup * child , int cpu )
90+ {
91+ struct cgroup * chead = child ; /* Head of child cgroup level */
92+ struct cgroup * ghead = NULL ; /* Head of grandchild cgroup level */
93+ struct cgroup * parent , * grandchild ;
94+ struct cgroup_rstat_cpu * crstatc ;
95+
96+ child -> rstat_flush_next = NULL ;
97+
98+ next_level :
99+ while (chead ) {
100+ child = chead ;
101+ chead = child -> rstat_flush_next ;
102+ parent = cgroup_parent (child );
103+
104+ /* updated_next is parent cgroup terminated */
105+ while (child != parent ) {
106+ child -> rstat_flush_next = head ;
107+ head = child ;
108+ crstatc = cgroup_rstat_cpu (child , cpu );
109+ grandchild = crstatc -> updated_children ;
110+ if (grandchild != child ) {
111+ /* Push the grand child to the next level */
112+ crstatc -> updated_children = child ;
113+ grandchild -> rstat_flush_next = ghead ;
114+ ghead = grandchild ;
115+ }
116+ child = crstatc -> updated_next ;
117+ crstatc -> updated_next = NULL ;
118+ }
119+ }
120+
121+ if (ghead ) {
122+ chead = ghead ;
123+ ghead = NULL ;
124+ goto next_level ;
125+ }
126+ return head ;
127+ }
128+
129+ /**
130+ * cgroup_rstat_updated_list - return a list of updated cgroups to be flushed
131+ * @root: root of the cgroup subtree to traverse
132+ * @cpu: target cpu
133+ * Return: A singly linked list of cgroups to be flushed
134+ *
135+ * Walks the updated rstat_cpu tree on @cpu from @root. During traversal,
136+ * each returned cgroup is unlinked from the updated tree.
86137 *
87138 * The only ordering guarantee is that, for a parent and a child pair
88- * covered by a given traversal, if a child is visited, its parent is
89- * guaranteed to be visited afterwards.
139+ * covered by a given traversal, the child is before its parent in
140+ * the list.
141+ *
142+ * Note that updated_children is self terminated and points to a list of
143+ * child cgroups if not empty. Whereas updated_next is like a sibling link
144+ * within the children list and terminated by the parent cgroup. An exception
145+ * here is the cgroup root whose updated_next can be self terminated.
90146 */
91- static struct cgroup * cgroup_rstat_cpu_pop_updated (struct cgroup * pos ,
92- struct cgroup * root , int cpu )
147+ static struct cgroup * cgroup_rstat_updated_list (struct cgroup * root , int cpu )
93148{
94- struct cgroup_rstat_cpu * rstatc ;
95- struct cgroup * parent ;
96-
97- if (pos == root )
98- return NULL ;
149+ raw_spinlock_t * cpu_lock = per_cpu_ptr (& cgroup_rstat_cpu_lock , cpu );
150+ struct cgroup_rstat_cpu * rstatc = cgroup_rstat_cpu (root , cpu );
151+ struct cgroup * head = NULL , * parent , * child ;
152+ unsigned long flags ;
99153
100154 /*
101- * We're gonna walk down to the first leaf and visit/remove it. We
102- * can pick whatever unvisited node as the starting point.
155+ * The _irqsave() is needed because cgroup_rstat_lock is
156+ * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
157+ * this lock with the _irq() suffix only disables interrupts on
158+ * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
159+ * interrupts on both configurations. The _irqsave() ensures
160+ * that interrupts are always disabled and later restored.
103161 */
104- if (!pos ) {
105- pos = root ;
106- /* return NULL if this subtree is not on-list */
107- if (!cgroup_rstat_cpu (pos , cpu )-> updated_next )
108- return NULL ;
109- } else {
110- pos = cgroup_parent (pos );
111- }
162+ raw_spin_lock_irqsave (cpu_lock , flags );
112163
113- /* walk down to the first leaf */
114- while (true) {
115- rstatc = cgroup_rstat_cpu (pos , cpu );
116- if (rstatc -> updated_children == pos )
117- break ;
118- pos = rstatc -> updated_children ;
119- }
164+ /* Return NULL if this subtree is not on-list */
165+ if (!rstatc -> updated_next )
166+ goto unlock_ret ;
120167
121168 /*
122- * Unlink @pos from the tree. As the updated_children list is
169+ * Unlink @root from its parent. As the updated_children list is
123170 * singly linked, we have to walk it to find the removal point.
124- * However, due to the way we traverse, @pos will be the first
125- * child in most cases. The only exception is @root.
126171 */
127- parent = cgroup_parent (pos );
172+ parent = cgroup_parent (root );
128173 if (parent ) {
129174 struct cgroup_rstat_cpu * prstatc ;
130175 struct cgroup * * nextp ;
131176
132177 prstatc = cgroup_rstat_cpu (parent , cpu );
133178 nextp = & prstatc -> updated_children ;
134- while (* nextp != pos ) {
179+ while (* nextp != root ) {
135180 struct cgroup_rstat_cpu * nrstatc ;
136181
137182 nrstatc = cgroup_rstat_cpu (* nextp , cpu );
@@ -142,31 +187,15 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
142187 }
143188
144189 rstatc -> updated_next = NULL ;
145- return pos ;
146- }
147190
148- /* Return a list of updated cgroups to be flushed */
149- static struct cgroup * cgroup_rstat_updated_list (struct cgroup * root , int cpu )
150- {
151- raw_spinlock_t * cpu_lock = per_cpu_ptr (& cgroup_rstat_cpu_lock , cpu );
152- struct cgroup * head , * tail , * next ;
153- unsigned long flags ;
154-
155- /*
156- * The _irqsave() is needed because cgroup_rstat_lock is
157- * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
158- * this lock with the _irq() suffix only disables interrupts on
159- * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
160- * interrupts on both configurations. The _irqsave() ensures
161- * that interrupts are always disabled and later restored.
162- */
163- raw_spin_lock_irqsave (cpu_lock , flags );
164- head = tail = cgroup_rstat_cpu_pop_updated (NULL , root , cpu );
165- while (tail ) {
166- next = cgroup_rstat_cpu_pop_updated (tail , root , cpu );
167- tail -> rstat_flush_next = next ;
168- tail = next ;
169- }
191+ /* Push @root to the list first before pushing the children */
192+ head = root ;
193+ root -> rstat_flush_next = NULL ;
194+ child = rstatc -> updated_children ;
195+ rstatc -> updated_children = root ;
196+ if (child != root )
197+ head = cgroup_rstat_push_children (head , child , cpu );
198+ unlock_ret :
170199 raw_spin_unlock_irqrestore (cpu_lock , flags );
171200 return head ;
172201}
0 commit comments