Skip to content

Commit b8cf8fd

Browse files
Miklos Szeredijankara
authored andcommitted
fanotify: add watchdog for permission events
This is to make it easier to debug issues with AV software, which time and again deadlocks with no indication of where the issue comes from, and the kernel being blamed for the deadlock. Then we need to analyze dumps to prove that the kernel is not in fact at fault. The deadlock comes from recursion: handling the event triggers another permission event, in some roundabout way, obviously, otherwise it would have been found in testing. With this patch a warning is printed when permission event is received by userspace but not answered for more than the timeout specified in /proc/sys/fs/fanotify/watchdog_timeout. The watchdog can be turned off by setting the timeout to zero (which is the default). The timeout is very coarse (T <= t < 2T) but I guess it's good enough for the purpose. Overhead should be minimal. Signed-off-by: Miklos Szeredi <mszeredi@redhat.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> Link: https://patch.msgid.link/20250909143053.112171-1-mszeredi@redhat.com Signed-off-by: Jan Kara <jack@suse.cz>
1 parent 62e59ff commit b8cf8fd

3 files changed

Lines changed: 106 additions & 0 deletions

File tree

fs/notify/fanotify/fanotify.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,9 @@ struct fanotify_perm_event {
441441
size_t count;
442442
u32 response; /* userspace answer to the event */
443443
unsigned short state; /* state of the event */
444+
unsigned short watchdog_cnt; /* already scanned by watchdog? */
444445
int fd; /* fd we passed to userspace for this event */
446+
pid_t recv_pid; /* pid of task receiving the event */
445447
union {
446448
struct fanotify_response_info_header hdr;
447449
struct fanotify_response_info_audit_rule audit_rule;

fs/notify/fanotify/fanotify_user.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050

5151
/* configurable via /proc/sys/fs/fanotify/ */
5252
static int fanotify_max_queued_events __read_mostly;
53+
static int perm_group_timeout __read_mostly;
5354

5455
#ifdef CONFIG_SYSCTL
5556

@@ -85,6 +86,14 @@ static const struct ctl_table fanotify_table[] = {
8586
.proc_handler = proc_dointvec_minmax,
8687
.extra1 = SYSCTL_ZERO
8788
},
89+
{
90+
.procname = "watchdog_timeout",
91+
.data = &perm_group_timeout,
92+
.maxlen = sizeof(int),
93+
.mode = 0644,
94+
.proc_handler = proc_dointvec_minmax,
95+
.extra1 = SYSCTL_ZERO,
96+
},
8897
};
8998

9099
static void __init fanotify_sysctls_init(void)
@@ -95,6 +104,91 @@ static void __init fanotify_sysctls_init(void)
95104
#define fanotify_sysctls_init() do { } while (0)
96105
#endif /* CONFIG_SYSCTL */
97106

107+
static LIST_HEAD(perm_group_list);
108+
static DEFINE_SPINLOCK(perm_group_lock);
109+
static void perm_group_watchdog(struct work_struct *work);
110+
static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog);
111+
112+
static void perm_group_watchdog_schedule(void)
113+
{
114+
schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout));
115+
}
116+
117+
static void perm_group_watchdog(struct work_struct *work)
118+
{
119+
struct fsnotify_group *group;
120+
struct fanotify_perm_event *event;
121+
struct task_struct *task;
122+
pid_t failed_pid = 0;
123+
124+
guard(spinlock)(&perm_group_lock);
125+
if (list_empty(&perm_group_list))
126+
return;
127+
128+
list_for_each_entry(group, &perm_group_list,
129+
fanotify_data.perm_grp_list) {
130+
/*
131+
* Ok to test without lock, racing with an addition is
132+
* fine, will deal with it next round
133+
*/
134+
if (list_empty(&group->fanotify_data.access_list))
135+
continue;
136+
137+
spin_lock(&group->notification_lock);
138+
list_for_each_entry(event, &group->fanotify_data.access_list,
139+
fae.fse.list) {
140+
if (likely(event->watchdog_cnt == 0)) {
141+
event->watchdog_cnt = 1;
142+
} else if (event->watchdog_cnt == 1) {
143+
/* Report on event only once */
144+
event->watchdog_cnt = 2;
145+
146+
/* Do not report same pid repeatedly */
147+
if (event->recv_pid == failed_pid)
148+
continue;
149+
150+
failed_pid = event->recv_pid;
151+
rcu_read_lock();
152+
task = find_task_by_pid_ns(event->recv_pid,
153+
&init_pid_ns);
154+
pr_warn_ratelimited(
155+
"PID %u (%s) failed to respond to fanotify queue for more than %d seconds\n",
156+
event->recv_pid,
157+
task ? task->comm : NULL,
158+
perm_group_timeout);
159+
rcu_read_unlock();
160+
}
161+
}
162+
spin_unlock(&group->notification_lock);
163+
}
164+
perm_group_watchdog_schedule();
165+
}
166+
167+
static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
168+
{
169+
if (!list_empty(&group->fanotify_data.perm_grp_list)) {
170+
/* Perm event watchdog can no longer scan this group. */
171+
spin_lock(&perm_group_lock);
172+
list_del_init(&group->fanotify_data.perm_grp_list);
173+
spin_unlock(&perm_group_lock);
174+
}
175+
}
176+
177+
static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
178+
{
179+
if (!perm_group_timeout)
180+
return;
181+
182+
spin_lock(&perm_group_lock);
183+
if (list_empty(&group->fanotify_data.perm_grp_list)) {
184+
/* Add to perm_group_list for monitoring by watchdog. */
185+
if (list_empty(&perm_group_list))
186+
perm_group_watchdog_schedule();
187+
list_add_tail(&group->fanotify_data.perm_grp_list, &perm_group_list);
188+
}
189+
spin_unlock(&perm_group_lock);
190+
}
191+
98192
/*
99193
* All flags that may be specified in parameter event_f_flags of fanotify_init.
100194
*
@@ -953,6 +1047,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
9531047
spin_lock(&group->notification_lock);
9541048
list_add_tail(&event->fse.list,
9551049
&group->fanotify_data.access_list);
1050+
FANOTIFY_PERM(event)->recv_pid = current->pid;
9561051
spin_unlock(&group->notification_lock);
9571052
}
9581053
}
@@ -1012,6 +1107,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
10121107
*/
10131108
fsnotify_group_stop_queueing(group);
10141109

1110+
fanotify_perm_watchdog_group_remove(group);
1111+
10151112
/*
10161113
* Process all permission events on access_list and notification queue
10171114
* and simulate reply from userspace.
@@ -1465,6 +1562,10 @@ static int fanotify_add_mark(struct fsnotify_group *group,
14651562
fsnotify_group_unlock(group);
14661563

14671564
fsnotify_put_mark(fsn_mark);
1565+
1566+
if (!ret && (mask & FANOTIFY_PERM_EVENTS))
1567+
fanotify_perm_watchdog_group_add(group);
1568+
14681569
return ret;
14691570
}
14701571

@@ -1625,6 +1726,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
16251726
group->fanotify_data.f_flags = event_f_flags;
16261727
init_waitqueue_head(&group->fanotify_data.access_waitq);
16271728
INIT_LIST_HEAD(&group->fanotify_data.access_list);
1729+
INIT_LIST_HEAD(&group->fanotify_data.perm_grp_list);
16281730
switch (class) {
16291731
case FAN_CLASS_NOTIF:
16301732
group->priority = FSNOTIFY_PRIO_NORMAL;

include/linux/fsnotify_backend.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ struct fsnotify_group {
273273
int f_flags; /* event_f_flags from fanotify_init() */
274274
struct ucounts *ucounts;
275275
mempool_t error_events_pool;
276+
/* chained on perm_group_list */
277+
struct list_head perm_grp_list;
276278
} fanotify_data;
277279
#endif /* CONFIG_FANOTIFY */
278280
};

0 commit comments

Comments
 (0)