@@ -629,19 +629,23 @@ void fd_install(unsigned int fd, struct file *file)
629629EXPORT_SYMBOL (fd_install );
630630
631631/**
632- * pick_file - return file associatd with fd
632+ * file_close_fd_locked - return file associated with fd
633633 * @files: file struct to retrieve file from
634634 * @fd: file descriptor to retrieve file for
635635 *
636+ * Doesn't take a separate reference count.
637+ *
636638 * Context: files_lock must be held.
637639 *
638640 * Returns: The file associated with @fd (NULL if @fd is not open)
639641 */
640- static struct file * pick_file (struct files_struct * files , unsigned fd )
642+ struct file * file_close_fd_locked (struct files_struct * files , unsigned fd )
641643{
642644 struct fdtable * fdt = files_fdtable (files );
643645 struct file * file ;
644646
647+ lockdep_assert_held (& files -> file_lock );
648+
645649 if (fd >= fdt -> max_fds )
646650 return NULL ;
647651
@@ -660,7 +664,7 @@ int close_fd(unsigned fd)
660664 struct file * file ;
661665
662666 spin_lock (& files -> file_lock );
663- file = pick_file (files , fd );
667+ file = file_close_fd_locked (files , fd );
664668 spin_unlock (& files -> file_lock );
665669 if (!file )
666670 return - EBADF ;
@@ -707,7 +711,7 @@ static inline void __range_close(struct files_struct *files, unsigned int fd,
707711 max_fd = min (max_fd , n );
708712
709713 for (; fd <= max_fd ; fd ++ ) {
710- file = pick_file (files , fd );
714+ file = file_close_fd_locked (files , fd );
711715 if (file ) {
712716 spin_unlock (& files -> file_lock );
713717 filp_close (file , files );
@@ -795,26 +799,21 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
795799 return 0 ;
796800}
797801
798- /*
799- * See close_fd_get_file() below, this variant assumes current->files->file_lock
800- * is held.
801- */
802- struct file * __close_fd_get_file (unsigned int fd )
803- {
804- return pick_file (current -> files , fd );
805- }
806-
807- /*
808- * variant of close_fd that gets a ref on the file for later fput.
809- * The caller must ensure that filp_close() called on the file.
802+ /**
803+ * file_close_fd - return file associated with fd
804+ * @fd: file descriptor to retrieve file for
805+ *
806+ * Doesn't take a separate reference count.
807+ *
808+ * Returns: The file associated with @fd (NULL if @fd is not open)
810809 */
811- struct file * close_fd_get_file (unsigned int fd )
810+ struct file * file_close_fd (unsigned int fd )
812811{
813812 struct files_struct * files = current -> files ;
814813 struct file * file ;
815814
816815 spin_lock (& files -> file_lock );
817- file = pick_file (files , fd );
816+ file = file_close_fd_locked (files , fd );
818817 spin_unlock (& files -> file_lock );
819818
820819 return file ;
@@ -959,39 +958,54 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
959958 struct file * file ;
960959 struct fdtable * fdt = rcu_dereference_raw (files -> fdt );
961960 struct file __rcu * * fdentry ;
961+ unsigned long nospec_mask ;
962962
963- if (unlikely (fd >= fdt -> max_fds ))
964- return NULL ;
965-
966- fdentry = fdt -> fd + array_index_nospec (fd , fdt -> max_fds );
963+ /* Mask is a 0 for invalid fd's, ~0 for valid ones */
964+ nospec_mask = array_index_mask_nospec (fd , fdt -> max_fds );
967965
968966 /*
969- * Ok, we have a file pointer. However, because we do
970- * this all locklessly under RCU, we may be racing with
971- * that file being closed.
972- *
973- * Such a race can take two forms:
974- *
975- * (a) the file ref already went down to zero and the
976- * file hasn't been reused yet or the file count
977- * isn't zero but the file has already been reused.
967+ * fdentry points to the 'fd' offset, or fdt->fd[0].
968+ * Loading from fdt->fd[0] is always safe, because the
969+ * array always exists.
978970 */
979- file = __get_file_rcu (fdentry );
971+ fdentry = fdt -> fd + (fd & nospec_mask );
972+
973+ /* Do the load, then mask any invalid result */
974+ file = rcu_dereference_raw (* fdentry );
975+ file = (void * )(nospec_mask & (unsigned long )file );
980976 if (unlikely (!file ))
981977 return NULL ;
982978
983- if (unlikely (IS_ERR (file )))
979+ /*
980+ * Ok, we have a file pointer that was valid at
981+ * some point, but it might have become stale since.
982+ *
983+ * We need to confirm it by incrementing the refcount
984+ * and then check the lookup again.
985+ *
986+ * atomic_long_inc_not_zero() gives us a full memory
987+ * barrier. We only really need an 'acquire' one to
988+ * protect the loads below, but we don't have that.
989+ */
990+ if (unlikely (!atomic_long_inc_not_zero (& file -> f_count )))
984991 continue ;
985992
986993 /*
994+ * Such a race can take two forms:
995+ *
996+ * (a) the file ref already went down to zero and the
997+ * file hasn't been reused yet or the file count
998+ * isn't zero but the file has already been reused.
999+ *
9871000 * (b) the file table entry has changed under us.
9881001 * Note that we don't need to re-check the 'fdt->fd'
9891002 * pointer having changed, because it always goes
9901003 * hand-in-hand with 'fdt'.
9911004 *
9921005 * If so, we need to put our ref and try again.
9931006 */
994- if (unlikely (rcu_dereference_raw (files -> fdt ) != fdt )) {
1007+ if (unlikely (file != rcu_dereference_raw (* fdentry )) ||
1008+ unlikely (rcu_dereference_raw (files -> fdt ) != fdt )) {
9951009 fput (file );
9961010 continue ;
9971011 }
@@ -1128,13 +1142,13 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
11281142 * atomic_read_acquire() pairs with atomic_dec_and_test() in
11291143 * put_files_struct().
11301144 */
1131- if (atomic_read_acquire (& files -> count ) == 1 ) {
1145+ if (likely ( atomic_read_acquire (& files -> count ) == 1 ) ) {
11321146 file = files_lookup_fd_raw (files , fd );
11331147 if (!file || unlikely (file -> f_mode & mask ))
11341148 return 0 ;
11351149 return (unsigned long )file ;
11361150 } else {
1137- file = __fget ( fd , mask );
1151+ file = __fget_files ( files , fd , mask );
11381152 if (!file )
11391153 return 0 ;
11401154 return FDPUT_FPUT | (unsigned long )file ;
@@ -1282,7 +1296,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
12821296}
12831297
12841298/**
1285- * __receive_fd () - Install received file into file descriptor table
1299+ * receive_fd () - Install received file into file descriptor table
12861300 * @file: struct file that was received from another process
12871301 * @ufd: __user pointer to write new fd number to
12881302 * @o_flags: the O_* flags to apply to the new fd entry
@@ -1296,7 +1310,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
12961310 *
12971311 * Returns newly install fd or -ve on error.
12981312 */
1299- int __receive_fd (struct file * file , int __user * ufd , unsigned int o_flags )
1313+ int receive_fd (struct file * file , int __user * ufd , unsigned int o_flags )
13001314{
13011315 int new_fd ;
13021316 int error ;
@@ -1321,6 +1335,7 @@ int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
13211335 __receive_sock (file );
13221336 return new_fd ;
13231337}
1338+ EXPORT_SYMBOL_GPL (receive_fd );
13241339
13251340int receive_fd_replace (int new_fd , struct file * file , unsigned int o_flags )
13261341{
@@ -1336,12 +1351,6 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
13361351 return new_fd ;
13371352}
13381353
1339- int receive_fd (struct file * file , unsigned int o_flags )
1340- {
1341- return __receive_fd (file , NULL , o_flags );
1342- }
1343- EXPORT_SYMBOL_GPL (receive_fd );
1344-
13451354static int ksys_dup3 (unsigned int oldfd , unsigned int newfd , int flags )
13461355{
13471356 int err = - EBADF ;
0 commit comments