Skip to content

Commit 187fd85

Browse files
rpptakpm00
authored andcommitted
execmem: rework execmem_cache_free()
Currently execmem_cache_free() ignores potential allocation failures that may happen in execmem_cache_add(). Besides, it uses text poking to fill the memory with trapping instructions before returning it to cache although it would be more efficient to make that memory writable, update it using memcpy and then restore ROX protection. Rework execmem_cache_free() so that in case of an error it will defer freeing of the memory to a delayed work. With this the happy fast path will now change permissions to RW, fill the memory with trapping instructions using memcpy, restore ROX permissions, add the memory back to the free cache and clear the relevant entry in busy_areas. If any step in the fast path fails, the entry in busy_areas will be marked as pending_free. These entries will be handled by a delayed work and freed asynchronously. To make the fast path faster, use __GFP_NORETRY for memory allocations and let asynchronous handler try harder with GFP_KERNEL. Link: https://lkml.kernel.org/r/20250713071730.4117334-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Daniel Gomez <da.gomez@samsung.com> Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org> Cc: Petr Pavlu <petr.pavlu@suse.com> Cc: Steven Rostedt (Google) <rostedt@goodmis.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 838955f commit 187fd85

1 file changed

Lines changed: 102 additions & 23 deletions

File tree

mm/execmem.c

Lines changed: 102 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,15 @@ struct execmem_cache {
9393
struct mutex mutex;
9494
struct maple_tree busy_areas;
9595
struct maple_tree free_areas;
96+
unsigned int pending_free_cnt; /* protected by mutex */
9697
};
9798

99+
/* delay to schedule asynchronous free if fast path free fails */
100+
#define FREE_DELAY (msecs_to_jiffies(10))
101+
102+
/* mark entries in busy_areas that should be freed asynchronously */
103+
#define PENDING_FREE_MASK (1 << (PAGE_SHIFT - 1))
104+
98105
static struct execmem_cache execmem_cache = {
99106
.mutex = __MUTEX_INITIALIZER(execmem_cache.mutex),
100107
.busy_areas = MTREE_INIT_EXT(busy_areas, MT_FLAGS_LOCK_EXTERN,
@@ -155,20 +162,17 @@ static void execmem_cache_clean(struct work_struct *work)
155162

156163
static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean);
157164

158-
static int execmem_cache_add(void *ptr, size_t size)
165+
static int execmem_cache_add_locked(void *ptr, size_t size, gfp_t gfp_mask)
159166
{
160167
struct maple_tree *free_areas = &execmem_cache.free_areas;
161-
struct mutex *mutex = &execmem_cache.mutex;
162168
unsigned long addr = (unsigned long)ptr;
163169
MA_STATE(mas, free_areas, addr - 1, addr + 1);
164170
unsigned long lower, upper;
165171
void *area = NULL;
166-
int err;
167172

168173
lower = addr;
169174
upper = addr + size - 1;
170175

171-
mutex_lock(mutex);
172176
area = mas_walk(&mas);
173177
if (area && mas.last == addr - 1)
174178
lower = mas.index;
@@ -178,12 +182,14 @@ static int execmem_cache_add(void *ptr, size_t size)
178182
upper = mas.last;
179183

180184
mas_set_range(&mas, lower, upper);
181-
err = mas_store_gfp(&mas, (void *)lower, GFP_KERNEL);
182-
mutex_unlock(mutex);
183-
if (err)
184-
return err;
185+
return mas_store_gfp(&mas, (void *)lower, gfp_mask);
186+
}
185187

186-
return 0;
188+
static int execmem_cache_add(void *ptr, size_t size, gfp_t gfp_mask)
189+
{
190+
guard(mutex)(&execmem_cache.mutex);
191+
192+
return execmem_cache_add_locked(ptr, size, gfp_mask);
187193
}
188194

189195
static bool within_range(struct execmem_range *range, struct ma_state *mas,
@@ -278,7 +284,7 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
278284
if (err)
279285
goto err_free_mem;
280286

281-
err = execmem_cache_add(p, alloc_size);
287+
err = execmem_cache_add(p, alloc_size, GFP_KERNEL);
282288
if (err)
283289
goto err_reset_direct_map;
284290

@@ -307,29 +313,102 @@ static void *execmem_cache_alloc(struct execmem_range *range, size_t size)
307313
return __execmem_cache_alloc(range, size);
308314
}
309315

316+
static inline bool is_pending_free(void *ptr)
317+
{
318+
return ((unsigned long)ptr & PENDING_FREE_MASK);
319+
}
320+
321+
static inline void *pending_free_set(void *ptr)
322+
{
323+
return (void *)((unsigned long)ptr | PENDING_FREE_MASK);
324+
}
325+
326+
static inline void *pending_free_clear(void *ptr)
327+
{
328+
return (void *)((unsigned long)ptr & ~PENDING_FREE_MASK);
329+
}
330+
331+
static int execmem_force_rw(void *ptr, size_t size);
332+
333+
static int __execmem_cache_free(struct ma_state *mas, void *ptr, gfp_t gfp_mask)
334+
{
335+
size_t size = mas_range_len(mas);
336+
int err;
337+
338+
err = execmem_force_rw(ptr, size);
339+
if (err)
340+
return err;
341+
342+
execmem_fill_trapping_insns(ptr, size, /* writable = */ true);
343+
execmem_restore_rox(ptr, size);
344+
345+
err = execmem_cache_add_locked(ptr, size, gfp_mask);
346+
if (err)
347+
return err;
348+
349+
mas_store_gfp(mas, NULL, gfp_mask);
350+
return 0;
351+
}
352+
353+
static void execmem_cache_free_slow(struct work_struct *work);
354+
static DECLARE_DELAYED_WORK(execmem_cache_free_work, execmem_cache_free_slow);
355+
356+
static void execmem_cache_free_slow(struct work_struct *work)
357+
{
358+
struct maple_tree *busy_areas = &execmem_cache.busy_areas;
359+
MA_STATE(mas, busy_areas, 0, ULONG_MAX);
360+
void *area;
361+
362+
guard(mutex)(&execmem_cache.mutex);
363+
364+
if (!execmem_cache.pending_free_cnt)
365+
return;
366+
367+
mas_for_each(&mas, area, ULONG_MAX) {
368+
if (!is_pending_free(area))
369+
continue;
370+
371+
area = pending_free_clear(area);
372+
if (__execmem_cache_free(&mas, area, GFP_KERNEL))
373+
continue;
374+
375+
execmem_cache.pending_free_cnt--;
376+
}
377+
378+
if (execmem_cache.pending_free_cnt)
379+
schedule_delayed_work(&execmem_cache_free_work, FREE_DELAY);
380+
else
381+
schedule_work(&execmem_cache_clean_work);
382+
}
383+
310384
static bool execmem_cache_free(void *ptr)
311385
{
312386
struct maple_tree *busy_areas = &execmem_cache.busy_areas;
313-
struct mutex *mutex = &execmem_cache.mutex;
314387
unsigned long addr = (unsigned long)ptr;
315388
MA_STATE(mas, busy_areas, addr, addr);
316-
size_t size;
317389
void *area;
390+
int err;
391+
392+
guard(mutex)(&execmem_cache.mutex);
318393

319-
mutex_lock(mutex);
320394
area = mas_walk(&mas);
321-
if (!area) {
322-
mutex_unlock(mutex);
395+
if (!area)
323396
return false;
324-
}
325-
size = mas_range_len(&mas);
326397

327-
mas_store_gfp(&mas, NULL, GFP_KERNEL);
328-
mutex_unlock(mutex);
329-
330-
execmem_fill_trapping_insns(ptr, size, /* writable = */ false);
331-
332-
execmem_cache_add(ptr, size);
398+
err = __execmem_cache_free(&mas, area, GFP_KERNEL | __GFP_NORETRY);
399+
if (err) {
400+
/*
401+
* mas points to exact slot we've got the area from, nothing
402+
* else can modify the tree because of the mutex, so there
403+
* won't be any allocations in mas_store_gfp() and it will just
404+
* change the pointer.
405+
*/
406+
area = pending_free_set(area);
407+
mas_store_gfp(&mas, area, GFP_KERNEL);
408+
execmem_cache.pending_free_cnt++;
409+
schedule_delayed_work(&execmem_cache_free_work, FREE_DELAY);
410+
return true;
411+
}
333412

334413
schedule_work(&execmem_cache_clean_work);
335414

0 commit comments

Comments
 (0)