Skip to content

Commit 1919d16

Browse files
committed
drm/xe: Implement xe_pagefault_init
Create pagefault queues and initialize them. v2: - Fix kernel doc + add comment for number PF queue (Francois) v4: - Move init after GT init (CI, Francois) Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Francois Dugast <francois.dugast@intel.com> Tested-by: Francois Dugast <francois.dugast@intel.com> Link: https://patch.msgid.link/20251031165416.2871503-3-matthew.brost@intel.com
1 parent 620a09f commit 1919d16

3 files changed

Lines changed: 112 additions & 2 deletions

File tree

drivers/gpu/drm/xe/xe_device.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "xe_nvm.h"
5353
#include "xe_oa.h"
5454
#include "xe_observation.h"
55+
#include "xe_pagefault.h"
5556
#include "xe_pat.h"
5657
#include "xe_pcode.h"
5758
#include "xe_pm.h"
@@ -896,6 +897,10 @@ int xe_device_probe(struct xe_device *xe)
896897
return err;
897898
}
898899

900+
err = xe_pagefault_init(xe);
901+
if (err)
902+
return err;
903+
899904
if (xe->tiles->media_gt &&
900905
XE_GT_WA(xe->tiles->media_gt, 15015404425_disable))
901906
XE_DEVICE_WA_DISABLE(xe, 15015404425);

drivers/gpu/drm/xe/xe_device_types.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "xe_lmtt_types.h"
1919
#include "xe_memirq_types.h"
2020
#include "xe_oa_types.h"
21+
#include "xe_pagefault_types.h"
2122
#include "xe_platform_types.h"
2223
#include "xe_pmu_types.h"
2324
#include "xe_pt_types.h"
@@ -413,6 +414,16 @@ struct xe_device {
413414
u32 next_asid;
414415
/** @usm.lock: protects UM state */
415416
struct rw_semaphore lock;
417+
/** @usm.pf_wq: page fault work queue, unbound, high priority */
418+
struct workqueue_struct *pf_wq;
419+
/*
420+
* We pick 4 here because, in the current implementation, it
421+
* yields the best bandwidth utilization of the kernel paging
422+
* engine.
423+
*/
424+
#define XE_PAGEFAULT_QUEUE_COUNT 4
425+
/** @usm.pf_queue: Page fault queues */
426+
struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT];
416427
} usm;
417428

418429
/** @pinned: pinned BO state */

drivers/gpu/drm/xe/xe_pagefault.c

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
* Copyright © 2025 Intel Corporation
44
*/
55

6+
#include <drm/drm_managed.h>
7+
8+
#include "xe_device.h"
9+
#include "xe_gt_types.h"
610
#include "xe_pagefault.h"
711
#include "xe_pagefault_types.h"
812

@@ -21,6 +25,76 @@
2125
* xe_pagefault.c implements the consumer layer.
2226
*/
2327

28+
static int xe_pagefault_entry_size(void)
29+
{
30+
/*
31+
* Power of two alignment is not a hardware requirement, rather a
32+
* software restriction which makes the math for page fault queue
33+
* management simplier.
34+
*/
35+
return roundup_pow_of_two(sizeof(struct xe_pagefault));
36+
}
37+
38+
static void xe_pagefault_queue_work(struct work_struct *w)
39+
{
40+
/* TODO: Implement */
41+
}
42+
43+
static int xe_pagefault_queue_init(struct xe_device *xe,
44+
struct xe_pagefault_queue *pf_queue)
45+
{
46+
struct xe_gt *gt;
47+
int total_num_eus = 0;
48+
u8 id;
49+
50+
for_each_gt(gt, xe, id) {
51+
xe_dss_mask_t all_dss;
52+
int num_dss, num_eus;
53+
54+
bitmap_or(all_dss, gt->fuse_topo.g_dss_mask,
55+
gt->fuse_topo.c_dss_mask, XE_MAX_DSS_FUSE_BITS);
56+
57+
num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS);
58+
num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
59+
XE_MAX_EU_FUSE_BITS) * num_dss;
60+
61+
total_num_eus += num_eus;
62+
}
63+
64+
xe_assert(xe, total_num_eus);
65+
66+
/*
67+
* user can issue separate page faults per EU and per CS
68+
*
69+
* XXX: Multiplier required as compute UMD are getting PF queue errors
70+
* without it. Follow on why this multiplier is required.
71+
*/
72+
#define PF_MULTIPLIER 8
73+
pf_queue->size = (total_num_eus + XE_NUM_HW_ENGINES) *
74+
xe_pagefault_entry_size() * PF_MULTIPLIER;
75+
pf_queue->size = roundup_pow_of_two(pf_queue->size);
76+
#undef PF_MULTIPLIER
77+
78+
drm_dbg(&xe->drm, "xe_pagefault_entry_size=%d, total_num_eus=%d, pf_queue->size=%u",
79+
xe_pagefault_entry_size(), total_num_eus, pf_queue->size);
80+
81+
spin_lock_init(&pf_queue->lock);
82+
INIT_WORK(&pf_queue->worker, xe_pagefault_queue_work);
83+
84+
pf_queue->data = drmm_kzalloc(&xe->drm, pf_queue->size, GFP_KERNEL);
85+
if (!pf_queue->data)
86+
return -ENOMEM;
87+
88+
return 0;
89+
}
90+
91+
static void xe_pagefault_fini(void *arg)
92+
{
93+
struct xe_device *xe = arg;
94+
95+
destroy_workqueue(xe->usm.pf_wq);
96+
}
97+
2498
/**
2599
* xe_pagefault_init() - Page fault init
26100
* @xe: xe device instance
@@ -31,8 +105,28 @@
31105
*/
32106
int xe_pagefault_init(struct xe_device *xe)
33107
{
34-
/* TODO - implement */
35-
return 0;
108+
int err, i;
109+
110+
if (!xe->info.has_usm)
111+
return 0;
112+
113+
xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue",
114+
WQ_UNBOUND | WQ_HIGHPRI,
115+
XE_PAGEFAULT_QUEUE_COUNT);
116+
if (!xe->usm.pf_wq)
117+
return -ENOMEM;
118+
119+
for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) {
120+
err = xe_pagefault_queue_init(xe, xe->usm.pf_queue + i);
121+
if (err)
122+
goto err_out;
123+
}
124+
125+
return devm_add_action_or_reset(xe->drm.dev, xe_pagefault_fini, xe);
126+
127+
err_out:
128+
destroy_workqueue(xe->usm.pf_wq);
129+
return err;
36130
}
37131

38132
/**

0 commit comments

Comments
 (0)