Skip to content

Commit c4a2e5f

Browse files
committed
drm/xe: Add devcoredump chunking
Chunk devcoredump into 1.5G pieces to avoid hitting the kvmalloc limit of 2G. Simple algorithm reads 1.5G at time in xe_devcoredump_read callback as needed. Some memory allocations are changed to GFP_ATOMIC as they done in xe_devcoredump_read which holds lock in the path of reclaim. The allocations are small, so in practice should never fail. v2: - Update commit message wrt gfp atomic (John H) v6: - Drop GFP_ATOMIC change for hwconfig (John H) Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com> Link: https://lore.kernel.org/r/20250423171725.597955-2-matthew.brost@intel.com
1 parent f9e4d8b commit c4a2e5f

2 files changed

Lines changed: 49 additions & 12 deletions

File tree

drivers/gpu/drm/xe/xe_devcoredump.c

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
8080
return &q->gt->uc.guc;
8181
}
8282

83-
static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
83+
static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count,
84+
ssize_t start,
8485
struct xe_devcoredump *coredump)
8586
{
8687
struct xe_device *xe;
@@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
9495
ss = &coredump->snapshot;
9596

9697
iter.data = buffer;
97-
iter.start = 0;
98+
iter.start = start;
9899
iter.remain = count;
99100

100101
p = drm_coredump_printer(&iter);
@@ -168,6 +169,8 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
168169
ss->vm = NULL;
169170
}
170171

172+
#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G)
173+
171174
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
172175
size_t count, void *data, size_t datalen)
173176
{
@@ -183,6 +186,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
183186
/* Ensure delayed work is captured before continuing */
184187
flush_work(&ss->work);
185188

189+
if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
190+
xe_pm_runtime_get(gt_to_xe(ss->gt));
191+
186192
mutex_lock(&coredump->lock);
187193

188194
if (!ss->read.buffer) {
@@ -195,12 +201,26 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
195201
return 0;
196202
}
197203

204+
if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX ||
205+
offset < ss->read.chunk_position) {
206+
ss->read.chunk_position =
207+
ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX);
208+
209+
__xe_devcoredump_read(ss->read.buffer,
210+
XE_DEVCOREDUMP_CHUNK_MAX,
211+
ss->read.chunk_position, coredump);
212+
}
213+
198214
byte_copied = count < ss->read.size - offset ? count :
199215
ss->read.size - offset;
200-
memcpy(buffer, ss->read.buffer + offset, byte_copied);
216+
memcpy(buffer, ss->read.buffer +
217+
(offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied);
201218

202219
mutex_unlock(&coredump->lock);
203220

221+
if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
222+
xe_pm_runtime_put(gt_to_xe(ss->gt));
223+
204224
return byte_copied;
205225
}
206226

@@ -254,17 +274,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
254274
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
255275
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
256276

257-
xe_pm_runtime_put(xe);
277+
ss->read.chunk_position = 0;
258278

259279
/* Calculate devcoredump size */
260-
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
261-
262-
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
263-
if (!ss->read.buffer)
264-
return;
280+
ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump);
281+
282+
if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) {
283+
ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX,
284+
GFP_USER);
285+
if (!ss->read.buffer)
286+
goto put_pm;
287+
288+
__xe_devcoredump_read(ss->read.buffer,
289+
XE_DEVCOREDUMP_CHUNK_MAX,
290+
0, coredump);
291+
} else {
292+
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
293+
if (!ss->read.buffer)
294+
goto put_pm;
295+
296+
__xe_devcoredump_read(ss->read.buffer, ss->read.size, 0,
297+
coredump);
298+
xe_devcoredump_snapshot_free(ss);
299+
}
265300

266-
__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
267-
xe_devcoredump_snapshot_free(ss);
301+
put_pm:
302+
xe_pm_runtime_put(xe);
268303
}
269304

270305
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
@@ -425,7 +460,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi
425460
if (offset & 3)
426461
drm_printf(p, "Offset not word aligned: %zu", offset);
427462

428-
line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL);
463+
line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC);
429464
if (!line_buff) {
430465
drm_printf(p, "Failed to allocate line buffer\n");
431466
return;

drivers/gpu/drm/xe/xe_devcoredump_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot {
6666
struct {
6767
/** @read.size: size of devcoredump in human readable format */
6868
ssize_t size;
69+
/** @read.chunk_position: position of devcoredump chunk */
70+
ssize_t chunk_position;
6971
/** @read.buffer: buffer of devcoredump in human readable format */
7072
char *buffer;
7173
} read;

0 commit comments

Comments
 (0)