@@ -50,7 +50,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
5050 "sb_internal" ,
5151};
5252
53- static inline void super_lock (struct super_block * sb , bool excl )
53+ static inline void __super_lock (struct super_block * sb , bool excl )
5454{
5555 if (excl )
5656 down_write (& sb -> s_umount );
@@ -66,14 +66,9 @@ static inline void super_unlock(struct super_block *sb, bool excl)
6666 up_read (& sb -> s_umount );
6767}
6868
69- static inline void super_lock_excl (struct super_block * sb )
69+ static inline void __super_lock_excl (struct super_block * sb )
7070{
71- super_lock (sb , true);
72- }
73-
74- static inline void super_lock_shared (struct super_block * sb )
75- {
76- super_lock (sb , false);
71+ __super_lock (sb , true);
7772}
7873
7974static inline void super_unlock_excl (struct super_block * sb )
@@ -86,6 +81,99 @@ static inline void super_unlock_shared(struct super_block *sb)
8681 super_unlock (sb , false);
8782}
8883
84+ static inline bool wait_born (struct super_block * sb )
85+ {
86+ unsigned int flags ;
87+
88+ /*
89+ * Pairs with smp_store_release() in super_wake() and ensures
90+ * that we see SB_BORN or SB_DYING after we're woken.
91+ */
92+ flags = smp_load_acquire (& sb -> s_flags );
93+ return flags & (SB_BORN | SB_DYING );
94+ }
95+
96+ /**
97+ * super_lock - wait for superblock to become ready and lock it
98+ * @sb: superblock to wait for
99+ * @excl: whether exclusive access is required
100+ *
101+ * If the superblock has neither passed through vfs_get_tree() or
102+ * generic_shutdown_super() yet wait for it to happen. Either superblock
103+ * creation will succeed and SB_BORN is set by vfs_get_tree() or we're
104+ * woken and we'll see SB_DYING.
105+ *
106+ * The caller must have acquired a temporary reference on @sb->s_count.
107+ *
108+ * Return: This returns true if SB_BORN was set, false if SB_DYING was
109+ * set. The function acquires s_umount and returns with it held.
110+ */
111+ static __must_check bool super_lock (struct super_block * sb , bool excl )
112+ {
113+
114+ lockdep_assert_not_held (& sb -> s_umount );
115+
116+ relock :
117+ __super_lock (sb , excl );
118+
119+ /*
120+ * Has gone through generic_shutdown_super() in the meantime.
121+ * @sb->s_root is NULL and @sb->s_active is 0. No one needs to
122+ * grab a reference to this. Tell them so.
123+ */
124+ if (sb -> s_flags & SB_DYING )
125+ return false;
126+
127+ /* Has called ->get_tree() successfully. */
128+ if (sb -> s_flags & SB_BORN )
129+ return true;
130+
131+ super_unlock (sb , excl );
132+
133+ /* wait until the superblock is ready or dying */
134+ wait_var_event (& sb -> s_flags , wait_born (sb ));
135+
136+ /*
137+ * Neither SB_BORN nor SB_DYING are ever unset so we never loop.
138+ * Just reacquire @sb->s_umount for the caller.
139+ */
140+ goto relock ;
141+ }
142+
143+ /* wait and acquire read-side of @sb->s_umount */
144+ static inline bool super_lock_shared (struct super_block * sb )
145+ {
146+ return super_lock (sb , false);
147+ }
148+
149+ /* wait and acquire write-side of @sb->s_umount */
150+ static inline bool super_lock_excl (struct super_block * sb )
151+ {
152+ return super_lock (sb , true);
153+ }
154+
155+ /* wake waiters */
156+ #define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING)
157+ static void super_wake (struct super_block * sb , unsigned int flag )
158+ {
159+ WARN_ON_ONCE ((flag & ~SUPER_WAKE_FLAGS ));
160+ WARN_ON_ONCE (hweight32 (flag & SUPER_WAKE_FLAGS ) > 1 );
161+
162+ /*
163+ * Pairs with smp_load_acquire() in super_lock() to make sure
164+ * all initializations in the superblock are seen by the user
165+ * seeing SB_BORN sent.
166+ */
167+ smp_store_release (& sb -> s_flags , sb -> s_flags | flag );
168+ /*
169+ * Pairs with the barrier in prepare_to_wait_event() to make sure
170+ * ___wait_var_event() either sees SB_BORN set or
171+ * waitqueue_active() check in wake_up_var() sees the waiter.
172+ */
173+ smp_mb ();
174+ wake_up_var (& sb -> s_flags );
175+ }
176+
89177/*
90178 * One thing we have to be careful of with a per-sb shrinker is that we don't
91179 * drop the last active reference to the superblock from within the shrinker.
@@ -393,7 +481,7 @@ EXPORT_SYMBOL(deactivate_locked_super);
393481void deactivate_super (struct super_block * s )
394482{
395483 if (!atomic_add_unless (& s -> s_active , -1 , 1 )) {
396- super_lock_excl (s );
484+ __super_lock_excl (s );
397485 deactivate_locked_super (s );
398486 }
399487}
@@ -415,10 +503,12 @@ EXPORT_SYMBOL(deactivate_super);
415503 */
416504static int grab_super (struct super_block * s ) __releases (sb_lock )
417505{
506+ bool born ;
507+
418508 s -> s_count ++ ;
419509 spin_unlock (& sb_lock );
420- super_lock_excl (s );
421- if (( s -> s_flags & SB_BORN ) && atomic_inc_not_zero (& s -> s_active )) {
510+ born = super_lock_excl (s );
511+ if (born && atomic_inc_not_zero (& s -> s_active )) {
422512 put_super (s );
423513 return 1 ;
424514 }
@@ -447,8 +537,8 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
447537bool super_trylock_shared (struct super_block * sb )
448538{
449539 if (down_read_trylock (& sb -> s_umount )) {
450- if (!hlist_unhashed ( & sb -> s_instances ) &&
451- sb -> s_root && (sb -> s_flags & SB_BORN ))
540+ if (!( sb -> s_flags & SB_DYING ) && sb -> s_root &&
541+ (sb -> s_flags & SB_BORN ))
452542 return true;
453543 super_unlock_shared (sb );
454544 }
@@ -475,7 +565,7 @@ bool super_trylock_shared(struct super_block *sb)
475565void retire_super (struct super_block * sb )
476566{
477567 WARN_ON (!sb -> s_bdev );
478- super_lock_excl (sb );
568+ __super_lock_excl (sb );
479569 if (sb -> s_iflags & SB_I_PERSB_BDI ) {
480570 bdi_unregister (sb -> s_bdi );
481571 sb -> s_iflags &= ~SB_I_PERSB_BDI ;
@@ -557,6 +647,13 @@ void generic_shutdown_super(struct super_block *sb)
557647 /* should be initialized for __put_super_and_need_restart() */
558648 hlist_del_init (& sb -> s_instances );
559649 spin_unlock (& sb_lock );
650+ /*
651+ * Broadcast to everyone that grabbed a temporary reference to this
652+ * superblock before we removed it from @fs_supers that the superblock
653+ * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
654+ * discard this superblock and treat it as dead.
655+ */
656+ super_wake (sb , SB_DYING );
560657 super_unlock_excl (sb );
561658 if (sb -> s_bdi != & noop_backing_dev_info ) {
562659 if (sb -> s_iflags & SB_I_PERSB_BDI )
@@ -631,6 +728,11 @@ struct super_block *sget_fc(struct fs_context *fc,
631728 s -> s_type = fc -> fs_type ;
632729 s -> s_iflags |= fc -> s_iflags ;
633730 strscpy (s -> s_id , s -> s_type -> name , sizeof (s -> s_id ));
731+ /*
732+ * Make the superblock visible on @super_blocks and @fs_supers.
733+ * It's in a nascent state and users should wait on SB_BORN or
734+ * SB_DYING to be set.
735+ */
634736 list_add_tail (& s -> s_list , & super_blocks );
635737 hlist_add_head (& s -> s_instances , & s -> s_type -> fs_supers );
636738 spin_unlock (& sb_lock );
@@ -740,7 +842,8 @@ static void __iterate_supers(void (*f)(struct super_block *))
740842
741843 spin_lock (& sb_lock );
742844 list_for_each_entry (sb , & super_blocks , s_list ) {
743- if (hlist_unhashed (& sb -> s_instances ))
845+ /* Pairs with memory marrier in super_wake(). */
846+ if (smp_load_acquire (& sb -> s_flags ) & SB_DYING )
744847 continue ;
745848 sb -> s_count ++ ;
746849 spin_unlock (& sb_lock );
@@ -770,13 +873,13 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
770873
771874 spin_lock (& sb_lock );
772875 list_for_each_entry (sb , & super_blocks , s_list ) {
773- if ( hlist_unhashed ( & sb -> s_instances ))
774- continue ;
876+ bool born ;
877+
775878 sb -> s_count ++ ;
776879 spin_unlock (& sb_lock );
777880
778- super_lock_shared (sb );
779- if (sb -> s_root && ( sb -> s_flags & SB_BORN ) )
881+ born = super_lock_shared (sb );
882+ if (born && sb -> s_root )
780883 f (sb , arg );
781884 super_unlock_shared (sb );
782885
@@ -806,11 +909,13 @@ void iterate_supers_type(struct file_system_type *type,
806909
807910 spin_lock (& sb_lock );
808911 hlist_for_each_entry (sb , & type -> fs_supers , s_instances ) {
912+ bool born ;
913+
809914 sb -> s_count ++ ;
810915 spin_unlock (& sb_lock );
811916
812- super_lock_shared (sb );
813- if (sb -> s_root && ( sb -> s_flags & SB_BORN ) )
917+ born = super_lock_shared (sb );
918+ if (born && sb -> s_root )
814919 f (sb , arg );
815920 super_unlock_shared (sb );
816921
@@ -841,14 +946,11 @@ struct super_block *get_active_super(struct block_device *bdev)
841946 if (!bdev )
842947 return NULL ;
843948
844- restart :
845949 spin_lock (& sb_lock );
846950 list_for_each_entry (sb , & super_blocks , s_list ) {
847- if (hlist_unhashed (& sb -> s_instances ))
848- continue ;
849951 if (sb -> s_bdev == bdev ) {
850952 if (!grab_super (sb ))
851- goto restart ;
953+ return NULL ;
852954 super_unlock_excl (sb );
853955 return sb ;
854956 }
@@ -862,22 +964,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
862964 struct super_block * sb ;
863965
864966 spin_lock (& sb_lock );
865- rescan :
866967 list_for_each_entry (sb , & super_blocks , s_list ) {
867- if (hlist_unhashed (& sb -> s_instances ))
868- continue ;
869968 if (sb -> s_dev == dev ) {
969+ bool born ;
970+
870971 sb -> s_count ++ ;
871972 spin_unlock (& sb_lock );
872- super_lock (sb , excl );
873973 /* still alive? */
874- if (sb -> s_root && (sb -> s_flags & SB_BORN ))
974+ born = super_lock (sb , excl );
975+ if (born && sb -> s_root )
875976 return sb ;
876977 super_unlock (sb , excl );
877978 /* nope, got unmounted */
878979 spin_lock (& sb_lock );
879980 __put_super (sb );
880- goto rescan ;
981+ break ;
881982 }
882983 }
883984 spin_unlock (& sb_lock );
@@ -921,7 +1022,7 @@ int reconfigure_super(struct fs_context *fc)
9211022 if (!hlist_empty (& sb -> s_pins )) {
9221023 super_unlock_excl (sb );
9231024 group_pin_kill (& sb -> s_pins );
924- super_lock_excl (sb );
1025+ __super_lock_excl (sb );
9251026 if (!sb -> s_root )
9261027 return 0 ;
9271028 if (sb -> s_writers .frozen != SB_UNFROZEN )
@@ -984,9 +1085,9 @@ int reconfigure_super(struct fs_context *fc)
9841085
9851086static void do_emergency_remount_callback (struct super_block * sb )
9861087{
987- super_lock_excl (sb );
988- if ( sb -> s_root && sb -> s_bdev && ( sb -> s_flags & SB_BORN ) &&
989- !sb_rdonly (sb )) {
1088+ bool born = super_lock_excl (sb );
1089+
1090+ if ( born && sb -> s_root && sb -> s_bdev && !sb_rdonly (sb )) {
9901091 struct fs_context * fc ;
9911092
9921093 fc = fs_context_for_reconfigure (sb -> s_root ,
@@ -1020,8 +1121,9 @@ void emergency_remount(void)
10201121
10211122static void do_thaw_all_callback (struct super_block * sb )
10221123{
1023- super_lock_excl (sb );
1024- if (sb -> s_root && sb -> s_flags & SB_BORN ) {
1124+ bool born = super_lock_excl (sb );
1125+
1126+ if (born && sb -> s_root ) {
10251127 emergency_thaw_bdev (sb );
10261128 thaw_super_locked (sb );
10271129 } else {
@@ -1212,9 +1314,9 @@ EXPORT_SYMBOL(get_tree_keyed);
12121314 */
12131315static bool super_lock_shared_active (struct super_block * sb )
12141316{
1215- super_lock_shared (sb );
1216- if (! sb -> s_root ||
1217- ( sb -> s_flags & ( SB_ACTIVE | SB_BORN )) != ( SB_ACTIVE | SB_BORN )) {
1317+ bool born = super_lock_shared (sb );
1318+
1319+ if (! born || ! sb -> s_root || !( sb -> s_flags & SB_ACTIVE )) {
12181320 super_unlock_shared (sb );
12191321 return false;
12201322 }
@@ -1374,7 +1476,7 @@ int get_tree_bdev(struct fs_context *fc,
13741476 */
13751477 super_unlock_excl (s );
13761478 error = setup_bdev_super (s , fc -> sb_flags , fc );
1377- super_lock_excl (s );
1479+ __super_lock_excl (s );
13781480 if (!error )
13791481 error = fill_super (s , fc );
13801482 if (error ) {
@@ -1426,7 +1528,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
14261528 */
14271529 super_unlock_excl (s );
14281530 error = setup_bdev_super (s , flags , NULL );
1429- super_lock_excl (s );
1531+ __super_lock_excl (s );
14301532 if (!error )
14311533 error = fill_super (s , data , flags & SB_SILENT ? 1 : 0 );
14321534 if (error ) {
@@ -1566,13 +1668,13 @@ int vfs_get_tree(struct fs_context *fc)
15661668 WARN_ON (!sb -> s_bdi );
15671669
15681670 /*
1569- * Write barrier is for super_cache_count(). We place it before setting
1570- * SB_BORN as the data dependency between the two functions is the
1571- * superblock structure contents that we just set up, not the SB_BORN
1572- * flag.
1671+ * super_wake() contains a memory barrier which also care of
1672+ * ordering for super_cache_count(). We place it before setting
1673+ * SB_BORN as the data dependency between the two functions is
1674+ * the superblock structure contents that we just set up, not
1675+ * the SB_BORN flag.
15731676 */
1574- smp_wmb ();
1575- sb -> s_flags |= SB_BORN ;
1677+ super_wake (sb , SB_BORN );
15761678
15771679 error = security_sb_set_mnt_opts (sb , fc -> security , 0 , NULL );
15781680 if (unlikely (error )) {
@@ -1715,7 +1817,7 @@ int freeze_super(struct super_block *sb)
17151817 int ret ;
17161818
17171819 atomic_inc (& sb -> s_active );
1718- super_lock_excl (sb );
1820+ __super_lock_excl (sb );
17191821 if (sb -> s_writers .frozen != SB_UNFROZEN ) {
17201822 deactivate_locked_super (sb );
17211823 return - EBUSY ;
@@ -1737,7 +1839,7 @@ int freeze_super(struct super_block *sb)
17371839 /* Release s_umount to preserve sb_start_write -> s_umount ordering */
17381840 super_unlock_excl (sb );
17391841 sb_wait_write (sb , SB_FREEZE_WRITE );
1740- super_lock_excl (sb );
1842+ __super_lock_excl (sb );
17411843
17421844 /* Now we go and block page faults... */
17431845 sb -> s_writers .frozen = SB_FREEZE_PAGEFAULT ;
@@ -1820,7 +1922,7 @@ static int thaw_super_locked(struct super_block *sb)
18201922 */
18211923int thaw_super (struct super_block * sb )
18221924{
1823- super_lock_excl (sb );
1925+ __super_lock_excl (sb );
18241926 return thaw_super_locked (sb );
18251927}
18261928EXPORT_SYMBOL (thaw_super );
0 commit comments