3434#include "blk-ioprio.h"
3535#include "blk-throttle.h"
3636
37+ static void __blkcg_rstat_flush (struct blkcg * blkcg , int cpu );
38+
3739/*
3840 * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
3941 * blkcg_pol_register_mutex nests outside of it and synchronizes entire
@@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
5658
5759bool blkcg_debug_stats = false;
5860
61+ static DEFINE_RAW_SPINLOCK (blkg_stat_lock );
62+
5963#define BLKG_DESTROY_BATCH_SIZE 64
6064
6165/*
@@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg)
163167static void __blkg_release (struct rcu_head * rcu )
164168{
165169 struct blkcg_gq * blkg = container_of (rcu , struct blkcg_gq , rcu_head );
170+ struct blkcg * blkcg = blkg -> blkcg ;
171+ int cpu ;
166172
167173#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
168174 WARN_ON (!bio_list_empty (& blkg -> async_bios ));
169175#endif
176+ /*
177+ * Flush all the non-empty percpu lockless lists before releasing
178+ * us, given these stat belongs to us.
179+ *
180+ * blkg_stat_lock is for serializing blkg stat update
181+ */
182+ for_each_possible_cpu (cpu )
183+ __blkcg_rstat_flush (blkcg , cpu );
170184
171185 /* release the blkcg and parent blkg refs this blkg has been holding */
172186 css_put (& blkg -> blkcg -> css );
@@ -951,23 +965,26 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
951965 u64_stats_update_end_irqrestore (& blkg -> iostat .sync , flags );
952966}
953967
954- static void blkcg_rstat_flush (struct cgroup_subsys_state * css , int cpu )
968+ static void __blkcg_rstat_flush (struct blkcg * blkcg , int cpu )
955969{
956- struct blkcg * blkcg = css_to_blkcg (css );
957970 struct llist_head * lhead = per_cpu_ptr (blkcg -> lhead , cpu );
958971 struct llist_node * lnode ;
959972 struct blkg_iostat_set * bisc , * next_bisc ;
960973
961- /* Root-level stats are sourced from system-wide IO stats */
962- if (!cgroup_parent (css -> cgroup ))
963- return ;
964-
965974 rcu_read_lock ();
966975
967976 lnode = llist_del_all (lhead );
968977 if (!lnode )
969978 goto out ;
970979
980+ /*
981+ * For covering concurrent parent blkg update from blkg_release().
982+ *
983+ * When flushing from cgroup, cgroup_rstat_lock is always held, so
984+ * this lock won't cause contention most of time.
985+ */
986+ raw_spin_lock (& blkg_stat_lock );
987+
971988 /*
972989 * Iterate only the iostat_cpu's queued in the lockless list.
973990 */
@@ -991,13 +1008,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
9911008 if (parent && parent -> parent )
9921009 blkcg_iostat_update (parent , & blkg -> iostat .cur ,
9931010 & blkg -> iostat .last );
994- percpu_ref_put (& blkg -> refcnt );
9951011 }
996-
1012+ raw_spin_unlock ( & blkg_stat_lock );
9971013out :
9981014 rcu_read_unlock ();
9991015}
10001016
1017+ static void blkcg_rstat_flush (struct cgroup_subsys_state * css , int cpu )
1018+ {
1019+ /* Root-level stats are sourced from system-wide IO stats */
1020+ if (cgroup_parent (css -> cgroup ))
1021+ __blkcg_rstat_flush (css_to_blkcg (css ), cpu );
1022+ }
1023+
10011024/*
10021025 * We source root cgroup stats from the system-wide stats to avoid
10031026 * tracking the same information twice and incurring overhead when no
@@ -2075,7 +2098,6 @@ void blk_cgroup_bio_start(struct bio *bio)
20752098
20762099 llist_add (& bis -> lnode , lhead );
20772100 WRITE_ONCE (bis -> lqueued , true);
2078- percpu_ref_get (& bis -> blkg -> refcnt );
20792101 }
20802102
20812103 u64_stats_update_end_irqrestore (& bis -> sync , flags );
0 commit comments