Skip to content

Commit 8ae84a2

Browse files
committed
drm/xe: Move lockdep protection from mem_access to xe_pm_runtime
The mem_access itself is not holding any lock, but attempting to train lockdep with possible scarring locks happening during runtime pm. We are going soon to kill the mem_access get and put helpers in favor of direct xe_pm_runtime calls, so let's just move this lock around to where it now belongs. v2: s/lockdep_training/lockdep_prime (Matt Auld) Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240417203952.25503-4-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1 parent 77e619a commit 8ae84a2

3 files changed

Lines changed: 37 additions & 35 deletions

File tree

drivers/gpu/drm/xe/xe_device.c

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,6 @@
4646
#include "xe_vm.h"
4747
#include "xe_wait_user_fence.h"
4848

49-
#ifdef CONFIG_LOCKDEP
50-
struct lockdep_map xe_device_mem_access_lockdep_map = {
51-
.name = "xe_device_mem_access_lockdep_map"
52-
};
53-
#endif
54-
5549
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
5650
{
5751
struct xe_device *xe = to_xe_device(dev);
@@ -780,23 +774,6 @@ void xe_device_mem_access_get(struct xe_device *xe)
780774
if (xe_pm_read_callback_task(xe) == current)
781775
return;
782776

783-
/*
784-
* Since the resume here is synchronous it can be quite easy to deadlock
785-
* if we are not careful. Also in practice it might be quite timing
786-
* sensitive to ever see the 0 -> 1 transition with the callers locks
787-
* held, so deadlocks might exist but are hard for lockdep to ever see.
788-
* With this in mind, help lockdep learn about the potentially scary
789-
* stuff that can happen inside the runtime_resume callback by acquiring
790-
* a dummy lock (it doesn't protect anything and gets compiled out on
791-
* non-debug builds). Lockdep then only needs to see the
792-
* mem_access_lockdep_map -> runtime_resume callback once, and then can
793-
* hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
794-
* For example if the (callers_locks) are ever grabbed in the
795-
* runtime_resume callback, lockdep should give us a nice splat.
796-
*/
797-
lock_map_acquire(&xe_device_mem_access_lockdep_map);
798-
lock_map_release(&xe_device_mem_access_lockdep_map);
799-
800777
xe_pm_runtime_get(xe);
801778
ref = atomic_inc_return(&xe->mem_access.ref);
802779

drivers/gpu/drm/xe/xe_device.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@ struct xe_file;
1616
#include "xe_force_wake.h"
1717
#include "xe_macros.h"
1818

19-
#ifdef CONFIG_LOCKDEP
20-
extern struct lockdep_map xe_device_mem_access_lockdep_map;
21-
#endif
22-
2319
static inline struct xe_device *to_xe_device(const struct drm_device *dev)
2420
{
2521
return container_of(dev, struct xe_device, drm);

drivers/gpu/drm/xe/xe_pm.c

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@
6868
* management (RPS).
6969
*/
7070

71+
#ifdef CONFIG_LOCKDEP
72+
struct lockdep_map xe_pm_runtime_lockdep_map = {
73+
.name = "xe_pm_runtime_lockdep_map"
74+
};
75+
#endif
76+
7177
/**
7278
* xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
7379
* @xe: xe device instance
@@ -307,27 +313,27 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
307313
xe_pm_write_callback_task(xe, current);
308314

309315
/*
310-
* The actual xe_device_mem_access_put() is always async underneath, so
316+
* The actual xe_pm_runtime_put() is always async underneath, so
311317
* exactly where that is called should makes no difference to us. However
312318
* we still need to be very careful with the locks that this callback
313319
* acquires and the locks that are acquired and held by any callers of
314-
* xe_device_mem_access_get(). We already have the matching annotation
320+
* xe_runtime_pm_get(). We already have the matching annotation
315321
* on that side, but we also need it here. For example lockdep should be
316322
* able to tell us if the following scenario is in theory possible:
317323
*
318324
* CPU0 | CPU1 (kworker)
319325
* lock(A) |
320326
* | xe_pm_runtime_suspend()
321327
* | lock(A)
322-
* xe_device_mem_access_get() |
328+
* xe_pm_runtime_get() |
323329
*
324330
* This will clearly deadlock since rpm core needs to wait for
325331
* xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
326332
* on CPU0 which prevents CPU1 making forward progress. With the
327-
* annotation here and in xe_device_mem_access_get() lockdep will see
333+
* annotation here and in xe_pm_runtime_get() lockdep will see
328334
* the potential lock inversion and give us a nice splat.
329335
*/
330-
lock_map_acquire(&xe_device_mem_access_lockdep_map);
336+
lock_map_acquire(&xe_pm_runtime_lockdep_map);
331337

332338
/*
333339
* Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
@@ -353,7 +359,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
353359

354360
xe_irq_suspend(xe);
355361
out:
356-
lock_map_release(&xe_device_mem_access_lockdep_map);
362+
lock_map_release(&xe_pm_runtime_lockdep_map);
357363
xe_pm_write_callback_task(xe, NULL);
358364
return err;
359365
}
@@ -373,7 +379,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
373379
/* Disable access_ongoing asserts and prevent recursive pm calls */
374380
xe_pm_write_callback_task(xe, current);
375381

376-
lock_map_acquire(&xe_device_mem_access_lockdep_map);
382+
lock_map_acquire(&xe_pm_runtime_lockdep_map);
377383

378384
/*
379385
* It can be possible that xe has allowed d3cold but other pcie devices
@@ -408,11 +414,31 @@ int xe_pm_runtime_resume(struct xe_device *xe)
408414
goto out;
409415
}
410416
out:
411-
lock_map_release(&xe_device_mem_access_lockdep_map);
417+
lock_map_release(&xe_pm_runtime_lockdep_map);
412418
xe_pm_write_callback_task(xe, NULL);
413419
return err;
414420
}
415421

422+
/*
423+
* For places where resume is synchronous it can be quite easy to deadlock
424+
* if we are not careful. Also in practice it might be quite timing
425+
* sensitive to ever see the 0 -> 1 transition with the callers locks
426+
* held, so deadlocks might exist but are hard for lockdep to ever see.
427+
* With this in mind, help lockdep learn about the potentially scary
428+
* stuff that can happen inside the runtime_resume callback by acquiring
429+
* a dummy lock (it doesn't protect anything and gets compiled out on
430+
* non-debug builds). Lockdep then only needs to see the
431+
* xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can
432+
* hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map.
433+
* For example if the (callers_locks) are ever grabbed in the
434+
* runtime_resume callback, lockdep should give us a nice splat.
435+
*/
436+
static void pm_runtime_lockdep_prime(void)
437+
{
438+
lock_map_acquire(&xe_pm_runtime_lockdep_map);
439+
lock_map_release(&xe_pm_runtime_lockdep_map);
440+
}
441+
416442
/**
417443
* xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
418444
* @xe: xe device instance
@@ -424,6 +450,7 @@ void xe_pm_runtime_get(struct xe_device *xe)
424450
if (xe_pm_read_callback_task(xe) == current)
425451
return;
426452

453+
pm_runtime_lockdep_prime();
427454
pm_runtime_resume(xe->drm.dev);
428455
}
429456

@@ -453,6 +480,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe)
453480
if (WARN_ON(xe_pm_read_callback_task(xe) == current))
454481
return -ELOOP;
455482

483+
pm_runtime_lockdep_prime();
456484
return pm_runtime_get_sync(xe->drm.dev);
457485
}
458486

@@ -519,6 +547,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
519547
return true;
520548
}
521549

550+
pm_runtime_lockdep_prime();
522551
return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
523552
}
524553

0 commit comments

Comments
 (0)