Skip to content

Commit dcb8177

Browse files
beaubelgraverostedt
authored andcommitted
tracing/user_events: Add ioctl for disabling addresses
Enablements are now tracked by the lifetime of the task/mm. User processes need to be able to disable their addresses if tracing is requested to be turned off. Before unmapping the page would suffice. However, we now need a stronger contract. Add an ioctl to enable this. A new flag bit is added, freeing, to user_event_enabler to ensure that if the event is attempted to be removed while a fault is being handled that the remove is delayed until after the fault is reattempted. Link: https://lkml.kernel.org/r/20230328235219.203-6-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent 81f8fb6 commit dcb8177

2 files changed

Lines changed: 119 additions & 2 deletions

File tree

include/uapi/linux/user_events.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,27 @@ struct user_reg {
4646
__u32 write_index;
4747
} __attribute__((__packed__));
4848

49+
/*
50+
* Describes an event unregister, callers must set the size, address and bit.
51+
* This structure is passed to the DIAG_IOCSUNREG ioctl to disable bit updates.
52+
*/
53+
struct user_unreg {
54+
/* Input: Size of the user_unreg structure being used */
55+
__u32 size;
56+
57+
/* Input: Bit to unregister */
58+
__u8 disable_bit;
59+
60+
/* Input: Reserved, set to 0 */
61+
__u8 __reserved;
62+
63+
/* Input: Reserved, set to 0 */
64+
__u16 __reserved2;
65+
66+
/* Input: Address to unregister */
67+
__u64 disable_addr;
68+
} __attribute__((__packed__));
69+
4970
#define DIAG_IOC_MAGIC '*'
5071

5172
/* Request to register a user_event */
@@ -54,4 +75,7 @@ struct user_reg {
5475
/* Request to delete a user_event */
5576
#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char *)
5677

78+
/* Requests to unregister a user_event */
79+
#define DIAG_IOCSUNREG _IOW(DIAG_IOC_MAGIC, 2, struct user_unreg*)
80+
5781
#endif /* _UAPI_LINUX_USER_EVENTS_H */

kernel/trace/trace_events_user.c

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ struct user_event_enabler {
102102
/* Bit 6 is for faulting status of enablement */
103103
#define ENABLE_VAL_FAULTING_BIT 6
104104

105+
/* Bit 7 is for freeing status of enablement */
106+
#define ENABLE_VAL_FREEING_BIT 7
107+
105108
/* Only duplicate the bit value */
106109
#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK
107110

@@ -301,6 +304,12 @@ static void user_event_enabler_fault_fixup(struct work_struct *work)
301304
/* Prevent state changes from racing */
302305
mutex_lock(&event_mutex);
303306

307+
/* User asked for enabler to be removed during fault */
308+
if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) {
309+
user_event_enabler_destroy(enabler);
310+
goto out;
311+
}
312+
304313
/*
305314
* If we managed to get the page, re-issue the write. We do not
306315
* want to get into a possible infinite loop, which is why we only
@@ -315,7 +324,7 @@ static void user_event_enabler_fault_fixup(struct work_struct *work)
315324
user_event_enabler_write(mm, enabler, true);
316325
mmap_read_unlock(mm->mm);
317326
}
318-
327+
out:
319328
mutex_unlock(&event_mutex);
320329

321330
/* In all cases we no longer need the mm or fault */
@@ -370,7 +379,8 @@ static int user_event_enabler_write(struct user_event_mm *mm,
370379
if (refcount_read(&mm->tasks) == 0)
371380
return -ENOENT;
372381

373-
if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))))
382+
if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)) ||
383+
test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))))
374384
return -EBUSY;
375385

376386
ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT,
@@ -428,6 +438,10 @@ static bool user_event_enabler_dup(struct user_event_enabler *orig,
428438
{
429439
struct user_event_enabler *enabler;
430440

441+
/* Skip pending frees */
442+
if (unlikely(test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(orig))))
443+
return true;
444+
431445
enabler = kzalloc(sizeof(*enabler), GFP_NOWAIT);
432446

433447
if (!enabler)
@@ -2086,6 +2100,79 @@ static long user_events_ioctl_del(struct user_event_file_info *info,
20862100
return ret;
20872101
}
20882102

2103+
static long user_unreg_get(struct user_unreg __user *ureg,
2104+
struct user_unreg *kreg)
2105+
{
2106+
u32 size;
2107+
long ret;
2108+
2109+
ret = get_user(size, &ureg->size);
2110+
2111+
if (ret)
2112+
return ret;
2113+
2114+
if (size > PAGE_SIZE)
2115+
return -E2BIG;
2116+
2117+
if (size < offsetofend(struct user_unreg, disable_addr))
2118+
return -EINVAL;
2119+
2120+
ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
2121+
2122+
/* Ensure no reserved values, since we don't support any yet */
2123+
if (kreg->__reserved || kreg->__reserved2)
2124+
return -EINVAL;
2125+
2126+
return ret;
2127+
}
2128+
2129+
/*
2130+
* Unregisters an enablement address/bit within a task/user mm.
2131+
*/
2132+
static long user_events_ioctl_unreg(unsigned long uarg)
2133+
{
2134+
struct user_unreg __user *ureg = (struct user_unreg __user *)uarg;
2135+
struct user_event_mm *mm = current->user_event_mm;
2136+
struct user_event_enabler *enabler, *next;
2137+
struct user_unreg reg;
2138+
long ret;
2139+
2140+
ret = user_unreg_get(ureg, &reg);
2141+
2142+
if (ret)
2143+
return ret;
2144+
2145+
if (!mm)
2146+
return -ENOENT;
2147+
2148+
ret = -ENOENT;
2149+
2150+
/*
2151+
* Flags freeing and faulting are used to indicate if the enabler is in
2152+
* use at all. When faulting is set a page-fault is occurring asyncly.
2153+
* During async fault if freeing is set, the enabler will be destroyed.
2154+
* If no async fault is happening, we can destroy it now since we hold
2155+
* the event_mutex during these checks.
2156+
*/
2157+
mutex_lock(&event_mutex);
2158+
2159+
list_for_each_entry_safe(enabler, next, &mm->enablers, link)
2160+
if (enabler->addr == reg.disable_addr &&
2161+
(enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) {
2162+
set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
2163+
2164+
if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)))
2165+
user_event_enabler_destroy(enabler);
2166+
2167+
/* Removed at least one */
2168+
ret = 0;
2169+
}
2170+
2171+
mutex_unlock(&event_mutex);
2172+
2173+
return ret;
2174+
}
2175+
20892176
/*
20902177
* Handles the ioctl from user mode to register or alter operations.
20912178
*/
@@ -2108,6 +2195,12 @@ static long user_events_ioctl(struct file *file, unsigned int cmd,
21082195
ret = user_events_ioctl_del(info, uarg);
21092196
mutex_unlock(&group->reg_mutex);
21102197
break;
2198+
2199+
case DIAG_IOCSUNREG:
2200+
mutex_lock(&group->reg_mutex);
2201+
ret = user_events_ioctl_unreg(uarg);
2202+
mutex_unlock(&group->reg_mutex);
2203+
break;
21112204
}
21122205

21132206
return ret;

0 commit comments

Comments
 (0)