@@ -99,9 +99,23 @@ struct user_event_enabler {
9999/* Bits 0-5 are for the bit to update upon enable/disable (0-63 allowed) */
100100#define ENABLE_VAL_BIT_MASK 0x3F
101101
102+ /* Bit 6 is for faulting status of enablement */
103+ #define ENABLE_VAL_FAULTING_BIT 6
104+
102105/* Only duplicate the bit value */
103106#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK
104107
108+ #define ENABLE_BITOPS (e ) ((unsigned long *)&(e)->values)
109+
110+ /* Used for asynchronous faulting in of pages */
111+ struct user_event_enabler_fault {
112+ struct work_struct work ;
113+ struct user_event_mm * mm ;
114+ struct user_event_enabler * enabler ;
115+ };
116+
117+ static struct kmem_cache * fault_cache ;
118+
105119/* Global list of memory descriptors using user_events */
106120static LIST_HEAD (user_event_mms );
107121static DEFINE_SPINLOCK (user_event_mms_lock );
@@ -263,7 +277,85 @@ static int user_event_mm_fault_in(struct user_event_mm *mm, unsigned long uaddr)
263277}
264278
265279static int user_event_enabler_write (struct user_event_mm * mm ,
266- struct user_event_enabler * enabler )
280+ struct user_event_enabler * enabler ,
281+ bool fixup_fault );
282+
283+ static void user_event_enabler_fault_fixup (struct work_struct * work )
284+ {
285+ struct user_event_enabler_fault * fault = container_of (
286+ work , struct user_event_enabler_fault , work );
287+ struct user_event_enabler * enabler = fault -> enabler ;
288+ struct user_event_mm * mm = fault -> mm ;
289+ unsigned long uaddr = enabler -> addr ;
290+ int ret ;
291+
292+ ret = user_event_mm_fault_in (mm , uaddr );
293+
294+ if (ret && ret != - ENOENT ) {
295+ struct user_event * user = enabler -> event ;
296+
297+ pr_warn ("user_events: Fault for mm: 0x%pK @ 0x%llx event: %s\n" ,
298+ mm -> mm , (unsigned long long )uaddr , EVENT_NAME (user ));
299+ }
300+
301+ /* Prevent state changes from racing */
302+ mutex_lock (& event_mutex );
303+
304+ /*
305+ * If we managed to get the page, re-issue the write. We do not
306+ * want to get into a possible infinite loop, which is why we only
307+ * attempt again directly if the page came in. If we couldn't get
308+ * the page here, then we will try again the next time the event is
309+ * enabled/disabled.
310+ */
311+ clear_bit (ENABLE_VAL_FAULTING_BIT , ENABLE_BITOPS (enabler ));
312+
313+ if (!ret ) {
314+ mmap_read_lock (mm -> mm );
315+ user_event_enabler_write (mm , enabler , true);
316+ mmap_read_unlock (mm -> mm );
317+ }
318+
319+ mutex_unlock (& event_mutex );
320+
321+ /* In all cases we no longer need the mm or fault */
322+ user_event_mm_put (mm );
323+ kmem_cache_free (fault_cache , fault );
324+ }
325+
326+ static bool user_event_enabler_queue_fault (struct user_event_mm * mm ,
327+ struct user_event_enabler * enabler )
328+ {
329+ struct user_event_enabler_fault * fault ;
330+
331+ fault = kmem_cache_zalloc (fault_cache , GFP_NOWAIT | __GFP_NOWARN );
332+
333+ if (!fault )
334+ return false;
335+
336+ INIT_WORK (& fault -> work , user_event_enabler_fault_fixup );
337+ fault -> mm = user_event_mm_get (mm );
338+ fault -> enabler = enabler ;
339+
340+ /* Don't try to queue in again while we have a pending fault */
341+ set_bit (ENABLE_VAL_FAULTING_BIT , ENABLE_BITOPS (enabler ));
342+
343+ if (!schedule_work (& fault -> work )) {
344+ /* Allow another attempt later */
345+ clear_bit (ENABLE_VAL_FAULTING_BIT , ENABLE_BITOPS (enabler ));
346+
347+ user_event_mm_put (mm );
348+ kmem_cache_free (fault_cache , fault );
349+
350+ return false;
351+ }
352+
353+ return true;
354+ }
355+
356+ static int user_event_enabler_write (struct user_event_mm * mm ,
357+ struct user_event_enabler * enabler ,
358+ bool fixup_fault )
267359{
268360 unsigned long uaddr = enabler -> addr ;
269361 unsigned long * ptr ;
@@ -278,11 +370,19 @@ static int user_event_enabler_write(struct user_event_mm *mm,
278370 if (refcount_read (& mm -> tasks ) == 0 )
279371 return - ENOENT ;
280372
373+ if (unlikely (test_bit (ENABLE_VAL_FAULTING_BIT , ENABLE_BITOPS (enabler ))))
374+ return - EBUSY ;
375+
281376 ret = pin_user_pages_remote (mm -> mm , uaddr , 1 , FOLL_WRITE | FOLL_NOFAULT ,
282377 & page , NULL , NULL );
283378
284- if (ret <= 0 ) {
285- pr_warn ("user_events: Enable write failed\n" );
379+ if (unlikely (ret <= 0 )) {
380+ if (!fixup_fault )
381+ return - EFAULT ;
382+
383+ if (!user_event_enabler_queue_fault (mm , enabler ))
384+ pr_warn ("user_events: Unable to queue fault handler\n" );
385+
286386 return - EFAULT ;
287387 }
288388
@@ -314,7 +414,7 @@ static void user_event_enabler_update(struct user_event *user)
314414
315415 list_for_each_entry_rcu (enabler , & mm -> enablers , link )
316416 if (enabler -> event == user )
317- user_event_enabler_write (mm , enabler );
417+ user_event_enabler_write (mm , enabler , true );
318418
319419 rcu_read_unlock ();
320420 mmap_read_unlock (mm -> mm );
@@ -562,7 +662,7 @@ static struct user_event_enabler
562662
563663 /* Attempt to reflect the current state within the process */
564664 mmap_read_lock (user_mm -> mm );
565- * write_result = user_event_enabler_write (user_mm , enabler );
665+ * write_result = user_event_enabler_write (user_mm , enabler , false );
566666 mmap_read_unlock (user_mm -> mm );
567667
568668 /*
@@ -2201,16 +2301,24 @@ static int __init trace_events_user_init(void)
22012301{
22022302 int ret ;
22032303
2304+ fault_cache = KMEM_CACHE (user_event_enabler_fault , 0 );
2305+
2306+ if (!fault_cache )
2307+ return - ENOMEM ;
2308+
22042309 init_group = user_event_group_create (& init_user_ns );
22052310
2206- if (!init_group )
2311+ if (!init_group ) {
2312+ kmem_cache_destroy (fault_cache );
22072313 return - ENOMEM ;
2314+ }
22082315
22092316 ret = create_user_tracefs ();
22102317
22112318 if (ret ) {
22122319 pr_warn ("user_events could not register with tracefs\n" );
22132320 user_event_group_destroy (init_group );
2321+ kmem_cache_destroy (fault_cache );
22142322 init_group = NULL ;
22152323 return ret ;
22162324 }
0 commit comments