Skip to content

Commit 7e3f4a3

Browse files
committed
drm/xe: handle pinned memory in PM notifier
Userspace is still alive and kicking at this point so actually moving pinned stuff here is tricky. However, we can instead pre-allocate the backup storage upfront from the notifier, such that we scoop up as much as we can, and then leave the final .suspend() to do the actual copy (or allocate anything that we missed). That way the bulk of our allocations will hopefully be done outside the more restrictive .suspend(). We do need to be extra careful though, since the pinned handling can now race with PM notifier, like something becoming unpinned after we prepare it from the notifier. v2 (Thomas): - Fix kernel doc and drop the pin as soon as we are done with the restore, instead of deferring to later. Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Link: https://lore.kernel.org/r/20250416150913.434369-8-matthew.auld@intel.com
1 parent e28647b commit 7e3f4a3

5 files changed

Lines changed: 176 additions & 19 deletions

File tree

drivers/gpu/drm/xe/xe_bo.c

Lines changed: 107 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,80 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
10841084
return lret;
10851085
}
10861086

1087+
/**
1088+
* xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1089+
* up in system memory.
1090+
* @bo: The buffer object to prepare.
1091+
*
1092+
* On successful completion, the object backup pages are allocated. Expectation
1093+
* is that this is called from the PM notifier, prior to suspend/hibernation.
1094+
*
1095+
* Return: 0 on success. Negative error code on failure.
1096+
*/
1097+
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1098+
{
1099+
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1100+
struct xe_bo *backup;
1101+
int ret = 0;
1102+
1103+
xe_bo_lock(bo, false);
1104+
1105+
xe_assert(xe, !bo->backup_obj);
1106+
1107+
/*
1108+
* Since this is called from the PM notifier we might have raced with
1109+
* someone unpinning this after we dropped the pinned list lock and
1110+
* grabbing the above bo lock.
1111+
*/
1112+
if (!xe_bo_is_pinned(bo))
1113+
goto out_unlock_bo;
1114+
1115+
if (!xe_bo_is_vram(bo))
1116+
goto out_unlock_bo;
1117+
1118+
if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1119+
goto out_unlock_bo;
1120+
1121+
backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
1122+
DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1123+
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1124+
XE_BO_FLAG_PINNED);
1125+
if (IS_ERR(backup)) {
1126+
ret = PTR_ERR(backup);
1127+
goto out_unlock_bo;
1128+
}
1129+
1130+
backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1131+
ttm_bo_pin(&backup->ttm);
1132+
bo->backup_obj = backup;
1133+
1134+
out_unlock_bo:
1135+
xe_bo_unlock(bo);
1136+
return ret;
1137+
}
1138+
1139+
/**
1140+
* xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1141+
* @bo: The buffer object to undo the prepare for.
1142+
*
1143+
* Always returns 0. The backup object is removed, if still present. Expectation
1144+
* it that this called from the PM notifier when undoing the prepare step.
1145+
*
1146+
* Return: Always returns 0.
1147+
*/
1148+
int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1149+
{
1150+
xe_bo_lock(bo, false);
1151+
if (bo->backup_obj) {
1152+
ttm_bo_unpin(&bo->backup_obj->ttm);
1153+
xe_bo_put(bo->backup_obj);
1154+
bo->backup_obj = NULL;
1155+
}
1156+
xe_bo_unlock(bo);
1157+
1158+
return 0;
1159+
}
1160+
10871161
/**
10881162
* xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
10891163
* @bo: The buffer object to move.
@@ -1098,7 +1172,8 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
10981172
int xe_bo_evict_pinned(struct xe_bo *bo)
10991173
{
11001174
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1101-
struct xe_bo *backup;
1175+
struct xe_bo *backup = bo->backup_obj;
1176+
bool backup_created = false;
11021177
bool unmap = false;
11031178
int ret = 0;
11041179

@@ -1120,15 +1195,18 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
11201195
if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
11211196
goto out_unlock_bo;
11221197

1123-
backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
1124-
DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1125-
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1126-
XE_BO_FLAG_PINNED);
1127-
if (IS_ERR(backup)) {
1128-
ret = PTR_ERR(backup);
1129-
goto out_unlock_bo;
1198+
if (!backup) {
1199+
backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
1200+
DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1201+
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1202+
XE_BO_FLAG_PINNED);
1203+
if (IS_ERR(backup)) {
1204+
ret = PTR_ERR(backup);
1205+
goto out_unlock_bo;
1206+
}
1207+
backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1208+
backup_created = true;
11301209
}
1131-
backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
11321210

11331211
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
11341212
struct xe_migrate *migrate;
@@ -1175,11 +1253,12 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
11751253
bo->size);
11761254
}
11771255

1178-
bo->backup_obj = backup;
1256+
if (!bo->backup_obj)
1257+
bo->backup_obj = backup;
11791258

11801259
out_backup:
11811260
xe_bo_vunmap(backup);
1182-
if (ret)
1261+
if (ret && backup_created)
11831262
xe_bo_put(backup);
11841263
out_unlock_bo:
11851264
if (unmap)
@@ -1215,9 +1294,11 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
12151294

12161295
xe_bo_lock(bo, false);
12171296

1218-
ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1219-
if (ret)
1220-
goto out_backup;
1297+
if (!xe_bo_is_pinned(backup)) {
1298+
ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1299+
if (ret)
1300+
goto out_unlock_bo;
1301+
}
12211302

12221303
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
12231304
struct xe_migrate *migrate;
@@ -1257,7 +1338,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
12571338
if (iosys_map_is_null(&bo->vmap)) {
12581339
ret = xe_bo_vmap(bo);
12591340
if (ret)
1260-
goto out_unlock_bo;
1341+
goto out_backup;
12611342
unmap = true;
12621343
}
12631344

@@ -1269,8 +1350,11 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
12691350

12701351
out_backup:
12711352
xe_bo_vunmap(backup);
1272-
if (!bo->backup_obj)
1353+
if (!bo->backup_obj) {
1354+
if (xe_bo_is_pinned(backup))
1355+
ttm_bo_unpin(&backup->ttm);
12731356
xe_bo_put(backup);
1357+
}
12741358
out_unlock_bo:
12751359
if (unmap)
12761360
xe_bo_vunmap(bo);
@@ -2304,6 +2388,13 @@ void xe_bo_unpin(struct xe_bo *bo)
23042388
xe_assert(xe, !list_empty(&bo->pinned_link));
23052389
list_del_init(&bo->pinned_link);
23062390
spin_unlock(&xe->pinned.lock);
2391+
2392+
if (bo->backup_obj) {
2393+
if (xe_bo_is_pinned(bo->backup_obj))
2394+
ttm_bo_unpin(&bo->backup_obj->ttm);
2395+
xe_bo_put(bo->backup_obj);
2396+
bo->backup_obj = NULL;
2397+
}
23072398
}
23082399
ttm_bo_unpin(&bo->ttm);
23092400
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))

drivers/gpu/drm/xe/xe_bo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
277277
int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
278278

279279
int xe_bo_evict_pinned(struct xe_bo *bo);
280+
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo);
281+
int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo);
280282
int xe_bo_restore_pinned(struct xe_bo *bo);
281283

282284
int xe_bo_dma_unmap_pinned(struct xe_bo *bo);

drivers/gpu/drm/xe/xe_bo_evict.c

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,13 @@ static int xe_bo_apply_to_pinned(struct xe_device *xe,
3434
ret = pinned_fn(bo);
3535
if (ret && pinned_list != new_list) {
3636
spin_lock(&xe->pinned.lock);
37-
list_move(&bo->pinned_link, pinned_list);
37+
/*
38+
* We might no longer be pinned, since PM notifier can
39+
* call this. If the pinned link is now empty, keep it
40+
* that way.
41+
*/
42+
if (!list_empty(&bo->pinned_link))
43+
list_move(&bo->pinned_link, pinned_list);
3844
spin_unlock(&xe->pinned.lock);
3945
}
4046
xe_bo_put(bo);
@@ -46,6 +52,49 @@ static int xe_bo_apply_to_pinned(struct xe_device *xe,
4652
return ret;
4753
}
4854

55+
/**
56+
* xe_bo_notifier_prepare_all_pinned() - Pre-allocate the backing pages for all
57+
* pinned VRAM objects which need to be saved.
58+
* @xe: xe device
59+
*
60+
* Should be called from PM notifier when preparing for s3/s4.
61+
*
62+
* Return: 0 on success, negative error code on error.
63+
*/
64+
int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe)
65+
{
66+
int ret;
67+
68+
ret = xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present,
69+
&xe->pinned.early.kernel_bo_present,
70+
xe_bo_notifier_prepare_pinned);
71+
if (!ret)
72+
ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
73+
&xe->pinned.late.kernel_bo_present,
74+
xe_bo_notifier_prepare_pinned);
75+
76+
return ret;
77+
}
78+
79+
/**
80+
* xe_bo_notifier_unprepare_all_pinned() - Remove the backing pages for all
81+
* pinned VRAM objects which have been restored.
82+
* @xe: xe device
83+
*
84+
* Should be called from PM notifier after exiting s3/s4 (either on success or
85+
* failure).
86+
*/
87+
void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe)
88+
{
89+
(void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present,
90+
&xe->pinned.early.kernel_bo_present,
91+
xe_bo_notifier_unprepare_pinned);
92+
93+
(void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
94+
&xe->pinned.late.kernel_bo_present,
95+
xe_bo_notifier_unprepare_pinned);
96+
}
97+
4998
/**
5099
* xe_bo_evict_all_user - evict all non-pinned user BOs from VRAM
51100
* @xe: xe device

drivers/gpu/drm/xe/xe_bo_evict.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ struct xe_device;
1010

1111
int xe_bo_evict_all(struct xe_device *xe);
1212
int xe_bo_evict_all_user(struct xe_device *xe);
13+
int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe);
14+
void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe);
1315
int xe_bo_restore_early(struct xe_device *xe);
1416
int xe_bo_restore_late(struct xe_device *xe);
1517

drivers/gpu/drm/xe/xe_pm.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,9 +297,22 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
297297
case PM_SUSPEND_PREPARE:
298298
xe_pm_runtime_get(xe);
299299
err = xe_bo_evict_all_user(xe);
300-
xe_pm_runtime_put(xe);
301-
if (err)
300+
if (err) {
302301
drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
302+
xe_pm_runtime_put(xe);
303+
break;
304+
}
305+
306+
err = xe_bo_notifier_prepare_all_pinned(xe);
307+
if (err) {
308+
drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
309+
xe_pm_runtime_put(xe);
310+
}
311+
break;
312+
case PM_POST_HIBERNATION:
313+
case PM_POST_SUSPEND:
314+
xe_bo_notifier_unprepare_all_pinned(xe);
315+
xe_pm_runtime_put(xe);
303316
break;
304317
}
305318

0 commit comments

Comments
 (0)