Skip to content

Commit 7235759

Browse files
beaubelgraverostedt
authored andcommitted
tracing/user_events: Use remote writes for event enablement
As part of the discussions for user_events aligned with user space tracers, it was determined that user programs should register a aligned value to set or clear a bit when an event becomes enabled. Currently a shared page is being used that requires mmap(). Remove the shared page implementation and move to a user registered address implementation. In this new model during the event registration from user programs 3 new values are specified. The first is the address to update when the event is either enabled or disabled. The second is the bit to set/clear to reflect the event being enabled. The third is the size of the value at the specified address. This allows for a local 32/64-bit value in user programs to support both kernel and user tracers. As an example, setting bit 31 for kernel tracers when the event becomes enabled allows for user tracers to use the other bits for ref counts or other flags. The kernel side updates the bit atomically, user programs need to also update these values atomically. User provided addresses must be aligned on a natural boundary, this allows for single page checking and prevents odd behaviors such as a enable value straddling 2 pages instead of a single page. Currently page faults are only logged, future patches will handle these. Link: https://lkml.kernel.org/r/20230328235219.203-4-beaub@linux.microsoft.com Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent fd59351 commit 7235759

4 files changed

Lines changed: 517 additions & 142 deletions

File tree

include/linux/user_events.h

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,63 @@
99
#ifndef _LINUX_USER_EVENTS_H
1010
#define _LINUX_USER_EVENTS_H
1111

12+
#include <linux/list.h>
13+
#include <linux/refcount.h>
14+
#include <linux/mm_types.h>
15+
#include <linux/workqueue.h>
1216
#include <uapi/linux/user_events.h>
1317

1418
#ifdef CONFIG_USER_EVENTS
1519
struct user_event_mm {
20+
struct list_head link;
21+
struct list_head enablers;
22+
struct mm_struct *mm;
23+
struct user_event_mm *next;
24+
refcount_t refcnt;
25+
refcount_t tasks;
26+
struct rcu_work put_rwork;
1627
};
17-
#endif
1828

29+
extern void user_event_mm_dup(struct task_struct *t,
30+
struct user_event_mm *old_mm);
31+
32+
extern void user_event_mm_remove(struct task_struct *t);
33+
34+
static inline void user_events_fork(struct task_struct *t,
35+
unsigned long clone_flags)
36+
{
37+
struct user_event_mm *old_mm;
38+
39+
if (!t || !current->user_event_mm)
40+
return;
41+
42+
old_mm = current->user_event_mm;
43+
44+
if (clone_flags & CLONE_VM) {
45+
t->user_event_mm = old_mm;
46+
refcount_inc(&old_mm->tasks);
47+
return;
48+
}
49+
50+
user_event_mm_dup(t, old_mm);
51+
}
52+
53+
static inline void user_events_execve(struct task_struct *t)
54+
{
55+
if (!t || !t->user_event_mm)
56+
return;
57+
58+
user_event_mm_remove(t);
59+
}
60+
61+
static inline void user_events_exit(struct task_struct *t)
62+
{
63+
if (!t || !t->user_event_mm)
64+
return;
65+
66+
user_event_mm_remove(t);
67+
}
68+
#else
1969
static inline void user_events_fork(struct task_struct *t,
2070
unsigned long clone_flags)
2171
{
@@ -28,5 +78,6 @@ static inline void user_events_execve(struct task_struct *t)
2878
static inline void user_events_exit(struct task_struct *t)
2979
{
3080
}
81+
#endif /* CONFIG_USER_EVENTS */
3182

3283
#endif /* _LINUX_USER_EVENTS_H */

include/uapi/linux/user_events.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,21 @@ struct user_reg {
2727
/* Input: Size of the user_reg structure being used */
2828
__u32 size;
2929

30+
/* Input: Bit in enable address to use */
31+
__u8 enable_bit;
32+
33+
/* Input: Enable size in bytes at address */
34+
__u8 enable_size;
35+
36+
/* Input: Flags for future use, set to 0 */
37+
__u16 flags;
38+
39+
/* Input: Address to update when enabled */
40+
__u64 enable_addr;
41+
3042
/* Input: Pointer to string with event name, description and flags */
3143
__u64 name_args;
3244

33-
/* Output: Bitwise index of the event within the status page */
34-
__u32 status_bit;
35-
3645
/* Output: Index of the event to use when writing data */
3746
__u32 write_index;
3847
} __attribute__((__packed__));

kernel/trace/Kconfig

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -798,9 +798,10 @@ config USER_EVENTS
798798
can be used like an existing kernel trace event. User trace
799799
events are generated by writing to a tracefs file. User
800800
processes can determine if their tracing events should be
801-
generated by memory mapping a tracefs file and checking for
802-
an associated byte being non-zero.
801+
generated by registering a value and bit with the kernel
802+
that reflects when it is enabled or not.
803803

804+
See Documentation/trace/user_events.rst.
804805
If in doubt, say N.
805806

806807
config HIST_TRIGGERS

0 commit comments

Comments
 (0)