Skip to content

Commit ec40758

Browse files
committed
Merge tag 'v6.4/pidfd.file' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux
Pull pidfd updates from Christian Brauner: "This adds a new pidfd_prepare() helper which allows the caller to reserve a pidfd number and allocates a new pidfd file that stashes the provided struct pid. It should be avoided installing a file descriptor into a task's file descriptor table just to close it again via close_fd() in case an error occurs. The fd has been visible to userspace and might already be in use. Instead, a file descriptor should be reserved but not installed into the caller's file descriptor table. If another failure path is hit then the reserved file descriptor and file can just be put without any userspace visible side-effects. And if all failure paths are cleared the file descriptor and file can be installed into the task's file descriptor table. This helper is now used in all places that open coded this functionality before. For example, this is currently done during copy_process() and fanotify used pidfd_create(), which returns a pidfd that has already been made visibile in the caller's file descriptor table, but then closed it using close_fd(). In one of the next merge windows there is also new functionality coming to unix domain sockets that will have to rely on pidfd_prepare()" * tag 'v6.4/pidfd.file' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: fanotify: use pidfd_prepare() fork: use pidfd_prepare() pid: add pidfd_prepare()
2 parents 3323ddc + eee3a0e commit ec40758

4 files changed

Lines changed: 104 additions & 27 deletions

File tree

fs/notify/fanotify/fanotify_user.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
663663
struct fanotify_info *info = fanotify_event_info(event);
664664
unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
665665
unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
666-
struct file *f = NULL;
666+
struct file *f = NULL, *pidfd_file = NULL;
667667
int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
668668

669669
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -718,7 +718,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
718718
!pid_has_task(event->pid, PIDTYPE_TGID)) {
719719
pidfd = FAN_NOPIDFD;
720720
} else {
721-
pidfd = pidfd_create(event->pid, 0);
721+
pidfd = pidfd_prepare(event->pid, 0, &pidfd_file);
722722
if (pidfd < 0)
723723
pidfd = FAN_EPIDFD;
724724
}
@@ -751,6 +751,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
751751
if (f)
752752
fd_install(fd, f);
753753

754+
if (pidfd_file)
755+
fd_install(pidfd, pidfd_file);
756+
754757
return metadata.event_len;
755758

756759
out_close_fd:
@@ -759,8 +762,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
759762
fput(f);
760763
}
761764

762-
if (pidfd >= 0)
763-
close_fd(pidfd);
765+
if (pidfd >= 0) {
766+
put_unused_fd(pidfd);
767+
fput(pidfd_file);
768+
}
764769

765770
return ret;
766771
}

include/linux/pid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ extern struct pid *pidfd_pid(const struct file *file);
8080
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
8181
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
8282
int pidfd_create(struct pid *pid, unsigned int flags);
83+
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
8384

8485
static inline struct pid *get_pid(struct pid *pid)
8586
{

kernel/fork.c

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1961,6 +1961,91 @@ const struct file_operations pidfd_fops = {
19611961
#endif
19621962
};
19631963

1964+
/**
1965+
* __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
1966+
* @pid: the struct pid for which to create a pidfd
1967+
* @flags: flags of the new @pidfd
1968+
* @pidfd: the pidfd to return
1969+
*
1970+
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
1971+
* caller's file descriptor table. The pidfd is reserved but not installed yet.
1972+
1973+
* The helper doesn't perform checks on @pid which makes it useful for pidfds
1974+
* created via CLONE_PIDFD where @pid has no task attached when the pidfd and
1975+
* pidfd file are prepared.
1976+
*
1977+
* If this function returns successfully the caller is responsible to either
1978+
* call fd_install() passing the returned pidfd and pidfd file as arguments in
1979+
* order to install the pidfd into its file descriptor table or they must use
1980+
* put_unused_fd() and fput() on the returned pidfd and pidfd file
1981+
* respectively.
1982+
*
1983+
* This function is useful when a pidfd must already be reserved but there
1984+
* might still be points of failure afterwards and the caller wants to ensure
1985+
* that no pidfd is leaked into its file descriptor table.
1986+
*
1987+
* Return: On success, a reserved pidfd is returned from the function and a new
1988+
* pidfd file is returned in the last argument to the function. On
1989+
* error, a negative error code is returned from the function and the
1990+
* last argument remains unchanged.
1991+
*/
1992+
static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
1993+
{
1994+
int pidfd;
1995+
struct file *pidfd_file;
1996+
1997+
if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
1998+
return -EINVAL;
1999+
2000+
pidfd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
2001+
if (pidfd < 0)
2002+
return pidfd;
2003+
2004+
pidfd_file = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
2005+
flags | O_RDWR | O_CLOEXEC);
2006+
if (IS_ERR(pidfd_file)) {
2007+
put_unused_fd(pidfd);
2008+
return PTR_ERR(pidfd_file);
2009+
}
2010+
get_pid(pid); /* held by pidfd_file now */
2011+
*ret = pidfd_file;
2012+
return pidfd;
2013+
}
2014+
2015+
/**
2016+
* pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
2017+
* @pid: the struct pid for which to create a pidfd
2018+
* @flags: flags of the new @pidfd
2019+
* @pidfd: the pidfd to return
2020+
*
2021+
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
2022+
* caller's file descriptor table. The pidfd is reserved but not installed yet.
2023+
*
2024+
* The helper verifies that @pid is used as a thread group leader.
2025+
*
2026+
* If this function returns successfully the caller is responsible to either
2027+
* call fd_install() passing the returned pidfd and pidfd file as arguments in
2028+
* order to install the pidfd into its file descriptor table or they must use
2029+
* put_unused_fd() and fput() on the returned pidfd and pidfd file
2030+
* respectively.
2031+
*
2032+
* This function is useful when a pidfd must already be reserved but there
2033+
* might still be points of failure afterwards and the caller wants to ensure
2034+
* that no pidfd is leaked into its file descriptor table.
2035+
*
2036+
* Return: On success, a reserved pidfd is returned from the function and a new
2037+
* pidfd file is returned in the last argument to the function. On
2038+
* error, a negative error code is returned from the function and the
2039+
* last argument remains unchanged.
2040+
*/
2041+
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
2042+
{
2043+
if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
2044+
return -EINVAL;
2045+
2046+
return __pidfd_prepare(pid, flags, ret);
2047+
}
2048+
19642049
static void __delayed_free_task(struct rcu_head *rhp)
19652050
{
19662051
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
@@ -2309,21 +2394,12 @@ __latent_entropy struct task_struct *copy_process(
23092394
* if the fd table isn't shared).
23102395
*/
23112396
if (clone_flags & CLONE_PIDFD) {
2312-
retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
2397+
/* Note that no task has been attached to @pid yet. */
2398+
retval = __pidfd_prepare(pid, O_RDWR | O_CLOEXEC, &pidfile);
23132399
if (retval < 0)
23142400
goto bad_fork_free_pid;
2315-
23162401
pidfd = retval;
23172402

2318-
pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
2319-
O_RDWR | O_CLOEXEC);
2320-
if (IS_ERR(pidfile)) {
2321-
put_unused_fd(pidfd);
2322-
retval = PTR_ERR(pidfile);
2323-
goto bad_fork_free_pid;
2324-
}
2325-
get_pid(pid); /* held by pidfile now */
2326-
23272403
retval = put_user(pidfd, args->pidfd);
23282404
if (retval)
23292405
goto bad_fork_put_pidfd;

kernel/pid.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -594,20 +594,15 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
594594
*/
595595
int pidfd_create(struct pid *pid, unsigned int flags)
596596
{
597-
int fd;
598-
599-
if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
600-
return -EINVAL;
597+
int pidfd;
598+
struct file *pidfd_file;
601599

602-
if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
603-
return -EINVAL;
604-
605-
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
606-
flags | O_RDWR | O_CLOEXEC);
607-
if (fd < 0)
608-
put_pid(pid);
600+
pidfd = pidfd_prepare(pid, flags, &pidfd_file);
601+
if (pidfd < 0)
602+
return pidfd;
609603

610-
return fd;
604+
fd_install(pidfd, pidfd_file);
605+
return pidfd;
611606
}
612607

613608
/**

0 commit comments

Comments
 (0)