Skip to content

Commit 7477d7d

Browse files
committed
pidfs: allow to retrieve exit information
Some tools like systemd's jounral need to retrieve the exit and cgroup information after a process has already been reaped. This can e.g., happen when retrieving a pidfd via SCM_PIDFD or SCM_PEERPIDFD. Link: https://lore.kernel.org/r/20250305-work-pidfs-kill_on_last_close-v3-6-c8c3d8361705@kernel.org Reviewed-by: Jeff Layton <jlayton@kernel.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 4513522 commit 7477d7d

2 files changed

Lines changed: 70 additions & 19 deletions

File tree

fs/pidfs.c

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ struct pidfs_exit_info {
3636
};
3737

3838
struct pidfs_inode {
39-
struct pidfs_exit_info exit_info;
39+
struct pidfs_exit_info __pei;
40+
struct pidfs_exit_info *exit_info;
4041
struct inode vfs_inode;
4142
};
4243

@@ -228,17 +229,28 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
228229
return poll_flags;
229230
}
230231

231-
static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg)
232+
static inline bool pid_in_current_pidns(const struct pid *pid)
233+
{
234+
const struct pid_namespace *ns = task_active_pid_ns(current);
235+
236+
if (ns->level <= pid->level)
237+
return pid->numbers[ns->level].ns == ns;
238+
239+
return false;
240+
}
241+
242+
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
232243
{
233244
struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
245+
struct inode *inode = file_inode(file);
246+
struct pid *pid = pidfd_pid(file);
234247
size_t usize = _IOC_SIZE(cmd);
235248
struct pidfd_info kinfo = {};
249+
struct pidfs_exit_info *exit_info;
236250
struct user_namespace *user_ns;
251+
struct task_struct *task;
237252
const struct cred *c;
238253
__u64 mask;
239-
#ifdef CONFIG_CGROUPS
240-
struct cgroup *cgrp;
241-
#endif
242254

243255
if (!uinfo)
244256
return -EINVAL;
@@ -248,6 +260,37 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
248260
if (copy_from_user(&mask, &uinfo->mask, sizeof(mask)))
249261
return -EFAULT;
250262

263+
/*
264+
* Restrict information retrieval to tasks within the caller's pid
265+
* namespace hierarchy.
266+
*/
267+
if (!pid_in_current_pidns(pid))
268+
return -ESRCH;
269+
270+
if (mask & PIDFD_INFO_EXIT) {
271+
exit_info = READ_ONCE(pidfs_i(inode)->exit_info);
272+
if (exit_info) {
273+
kinfo.mask |= PIDFD_INFO_EXIT;
274+
#ifdef CONFIG_CGROUPS
275+
kinfo.cgroupid = exit_info->cgroupid;
276+
kinfo.mask |= PIDFD_INFO_CGROUPID;
277+
#endif
278+
kinfo.exit_code = exit_info->exit_code;
279+
}
280+
}
281+
282+
task = get_pid_task(pid, PIDTYPE_PID);
283+
if (!task) {
284+
/*
285+
* If the task has already been reaped, only exit
286+
* information is available
287+
*/
288+
if (!(mask & PIDFD_INFO_EXIT))
289+
return -ESRCH;
290+
291+
goto copy_out;
292+
}
293+
251294
c = get_task_cred(task);
252295
if (!c)
253296
return -ESRCH;
@@ -267,11 +310,15 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
267310
put_cred(c);
268311

269312
#ifdef CONFIG_CGROUPS
270-
rcu_read_lock();
271-
cgrp = task_dfl_cgroup(task);
272-
kinfo.cgroupid = cgroup_id(cgrp);
273-
kinfo.mask |= PIDFD_INFO_CGROUPID;
274-
rcu_read_unlock();
313+
if (!kinfo.cgroupid) {
314+
struct cgroup *cgrp;
315+
316+
rcu_read_lock();
317+
cgrp = task_dfl_cgroup(task);
318+
kinfo.cgroupid = cgroup_id(cgrp);
319+
kinfo.mask |= PIDFD_INFO_CGROUPID;
320+
rcu_read_unlock();
321+
}
275322
#endif
276323

277324
/*
@@ -291,6 +338,7 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
291338
if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1))
292339
return -ESRCH;
293340

341+
copy_out:
294342
/*
295343
* If userspace and the kernel have the same struct size it can just
296344
* be copied. If userspace provides an older struct, only the bits that
@@ -325,7 +373,6 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
325373
{
326374
struct task_struct *task __free(put_task) = NULL;
327375
struct nsproxy *nsp __free(put_nsproxy) = NULL;
328-
struct pid *pid = pidfd_pid(file);
329376
struct ns_common *ns_common = NULL;
330377
struct pid_namespace *pid_ns;
331378

@@ -340,13 +387,13 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
340387
return put_user(file_inode(file)->i_generation, argp);
341388
}
342389

343-
task = get_pid_task(pid, PIDTYPE_PID);
344-
if (!task)
345-
return -ESRCH;
346-
347390
/* Extensible IOCTL that does not open namespace FDs, take a shortcut */
348391
if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO))
349-
return pidfd_info(task, cmd, arg);
392+
return pidfd_info(file, cmd, arg);
393+
394+
task = get_pid_task(pidfd_pid(file), PIDTYPE_PID);
395+
if (!task)
396+
return -ESRCH;
350397

351398
if (arg)
352399
return -EINVAL;
@@ -484,7 +531,7 @@ void pidfs_exit(struct task_struct *tsk)
484531
dentry = stashed_dentry_get(&task_pid(tsk)->stashed);
485532
if (dentry) {
486533
struct inode *inode = d_inode(dentry);
487-
struct pidfs_exit_info *exit_info = &pidfs_i(inode)->exit_info;
534+
struct pidfs_exit_info *exit_info = &pidfs_i(inode)->__pei;
488535
#ifdef CONFIG_CGROUPS
489536
struct cgroup *cgrp;
490537

@@ -495,6 +542,8 @@ void pidfs_exit(struct task_struct *tsk)
495542
#endif
496543
exit_info->exit_code = tsk->exit_code;
497544

545+
/* Ensure that PIDFD_GET_INFO sees either all or nothing. */
546+
smp_store_release(&pidfs_i(inode)->exit_info, &pidfs_i(inode)->__pei);
498547
dput(dentry);
499548
}
500549
}
@@ -562,7 +611,8 @@ static struct inode *pidfs_alloc_inode(struct super_block *sb)
562611
if (!pi)
563612
return NULL;
564613

565-
memset(&pi->exit_info, 0, sizeof(pi->exit_info));
614+
memset(&pi->__pei, 0, sizeof(pi->__pei));
615+
pi->exit_info = NULL;
566616

567617
return &pi->vfs_inode;
568618
}

include/uapi/linux/pidfd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */
2121
#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
2222
#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
23+
#define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */
2324

2425
#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
2526

@@ -86,7 +87,7 @@ struct pidfd_info {
8687
__u32 sgid;
8788
__u32 fsuid;
8889
__u32 fsgid;
89-
__u32 spare0[1];
90+
__s32 exit_code;
9091
};
9192

9293
#define PIDFS_IOCTL_MAGIC 0xFF

0 commit comments

Comments
 (0)