Skip to content

Commit 620a09f

Browse files
committed
drm/xe: Stub out new pagefault layer
Stub out the new page fault layer and add kernel documentation. This is intended as a replacement for the GT page fault layer, enabling multiple producers to hook into a shared page fault consumer interface. v2: - Fix kernel doc typo (checkpatch) - Remove comment around GT (Stuart) - Add explaination around reclaim (Francois) - Add comment around u8 vs enum (Francois) - Include engine instance (Stuart) v3: - Fix XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION kernel doc (Stuart) Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com> Tested-by: Francois Dugast <francois.dugast@intel.com> Link: https://patch.msgid.link/20251031165416.2871503-2-matthew.brost@intel.com
1 parent 1a2cf01 commit 620a09f

4 files changed

Lines changed: 221 additions & 0 deletions

File tree

drivers/gpu/drm/xe/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ xe-y += xe_bb.o \
9494
xe_nvm.o \
9595
xe_oa.o \
9696
xe_observation.o \
97+
xe_pagefault.o \
9798
xe_pat.o \
9899
xe_pci.o \
99100
xe_pcode.o \

drivers/gpu/drm/xe/xe_pagefault.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// SPDX-License-Identifier: MIT
2+
/*
3+
* Copyright © 2025 Intel Corporation
4+
*/
5+
6+
#include "xe_pagefault.h"
7+
#include "xe_pagefault_types.h"
8+
9+
/**
10+
* DOC: Xe page faults
11+
*
12+
* Xe page faults are handled in two layers. The producer layer interacts with
13+
* hardware or firmware to receive and parse faults into struct xe_pagefault,
14+
* then forwards them to the consumer. The consumer layer services the faults
15+
* (e.g., memory migration, page table updates) and acknowledges the result back
16+
* to the producer, which then forwards the results to the hardware or firmware.
17+
* The consumer uses a page fault queue sized to absorb all potential faults and
18+
* a multi-threaded worker to process them. Multiple producers are supported,
19+
* with a single shared consumer.
20+
*
21+
* xe_pagefault.c implements the consumer layer.
22+
*/
23+
24+
/**
25+
* xe_pagefault_init() - Page fault init
26+
* @xe: xe device instance
27+
*
28+
* Initialize Xe page fault state. Must be done after reading fuses.
29+
*
30+
* Return: 0 on Success, errno on failure
31+
*/
32+
int xe_pagefault_init(struct xe_device *xe)
33+
{
34+
/* TODO - implement */
35+
return 0;
36+
}
37+
38+
/**
39+
* xe_pagefault_reset() - Page fault reset for a GT
40+
* @xe: xe device instance
41+
* @gt: GT being reset
42+
*
43+
* Reset the Xe page fault state for a GT; that is, squash any pending faults on
44+
* the GT.
45+
*/
46+
void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt)
47+
{
48+
/* TODO - implement */
49+
}
50+
51+
/**
52+
* xe_pagefault_handler() - Page fault handler
53+
* @xe: xe device instance
54+
* @pf: Page fault
55+
*
56+
* Sink the page fault to a queue (i.e., a memory buffer) and queue a worker to
57+
* service it. Safe to be called from IRQ or process context. Reclaim safe.
58+
*
59+
* Return: 0 on success, errno on failure
60+
*/
61+
int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf)
62+
{
63+
/* TODO - implement */
64+
return 0;
65+
}

drivers/gpu/drm/xe/xe_pagefault.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2025 Intel Corporation
4+
*/
5+
6+
#ifndef _XE_PAGEFAULT_H_
7+
#define _XE_PAGEFAULT_H_
8+
9+
struct xe_device;
10+
struct xe_gt;
11+
struct xe_pagefault;
12+
13+
int xe_pagefault_init(struct xe_device *xe);
14+
15+
void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt);
16+
17+
int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf);
18+
19+
#endif
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2025 Intel Corporation
4+
*/
5+
6+
#ifndef _XE_PAGEFAULT_TYPES_H_
7+
#define _XE_PAGEFAULT_TYPES_H_
8+
9+
#include <linux/workqueue.h>
10+
11+
struct xe_gt;
12+
struct xe_pagefault;
13+
14+
/** enum xe_pagefault_access_type - Xe page fault access type */
15+
enum xe_pagefault_access_type {
16+
/** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */
17+
XE_PAGEFAULT_ACCESS_TYPE_READ = 0,
18+
/** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */
19+
XE_PAGEFAULT_ACCESS_TYPE_WRITE = 1,
20+
/** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */
21+
XE_PAGEFAULT_ACCESS_TYPE_ATOMIC = 2,
22+
};
23+
24+
/** enum xe_pagefault_type - Xe page fault type */
25+
enum xe_pagefault_type {
26+
/** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */
27+
XE_PAGEFAULT_TYPE_NOT_PRESENT = 0,
28+
/** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access violation */
29+
XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION = 1,
30+
/** @XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION: Atomic access violation */
31+
XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION = 2,
32+
};
33+
34+
/** struct xe_pagefault_ops - Xe pagefault ops (producer) */
35+
struct xe_pagefault_ops {
36+
/**
37+
* @ack_fault: Ack fault
38+
* @pf: Page fault
39+
* @err: Error state of fault
40+
*
41+
* Page fault producer receives acknowledgment from the consumer and
42+
* sends the result to the HW/FW interface.
43+
*/
44+
void (*ack_fault)(struct xe_pagefault *pf, int err);
45+
};
46+
47+
/**
48+
* struct xe_pagefault - Xe page fault
49+
*
50+
* Generic page fault structure for communication between producer and consumer.
51+
* Carefully sized to be 64 bytes. Upon a device page fault, the producer
52+
* populates this structure, and the consumer copies it into the page-fault
53+
* queue for deferred handling.
54+
*/
55+
struct xe_pagefault {
56+
/**
57+
* @gt: GT of fault
58+
*/
59+
struct xe_gt *gt;
60+
/**
61+
* @consumer: State for the software handling the fault. Populated by
62+
* the producer and may be modified by the consumer to communicate
63+
* information back to the producer upon fault acknowledgment.
64+
*/
65+
struct {
66+
/** @consumer.page_addr: address of page fault */
67+
u64 page_addr;
68+
/** @consumer.asid: address space ID */
69+
u32 asid;
70+
/**
71+
* @consumer.access_type: access type, u8 rather than enum to
72+
* keep size compact
73+
*/
74+
u8 access_type;
75+
/**
76+
* @consumer.fault_type: fault type, u8 rather than enum to
77+
* keep size compact
78+
*/
79+
u8 fault_type;
80+
#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */
81+
/** @consumer.fault_level: fault level */
82+
u8 fault_level;
83+
/** @consumer.engine_class: engine class */
84+
u8 engine_class;
85+
/** @consumer.engine_instance: engine instance */
86+
u8 engine_instance;
87+
/** consumer.reserved: reserved bits for future expansion */
88+
u8 reserved[7];
89+
} consumer;
90+
/**
91+
* @producer: State for the producer (i.e., HW/FW interface). Populated
92+
* by the producer and should not be modified—or even inspected—by the
93+
* consumer, except for calling operations.
94+
*/
95+
struct {
96+
/** @producer.private: private pointer */
97+
void *private;
98+
/** @producer.ops: operations */
99+
const struct xe_pagefault_ops *ops;
100+
#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW 4
101+
/**
102+
* @producer.msg: page fault message, used by producer in fault
103+
* acknowledgment to formulate response to HW/FW interface.
104+
* Included in the page-fault message because the producer
105+
* typically receives the fault in a context where memory cannot
106+
* be allocated (e.g., atomic context or the reclaim path).
107+
*/
108+
u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW];
109+
} producer;
110+
};
111+
112+
/**
113+
* struct xe_pagefault_queue: Xe pagefault queue (consumer)
114+
*
115+
* Used to capture all device page faults for deferred processing. Size this
116+
* queue to absorb the device’s worst-case number of outstanding faults.
117+
*/
118+
struct xe_pagefault_queue {
119+
/**
120+
* @data: Data in queue containing struct xe_pagefault, protected by
121+
* @lock
122+
*/
123+
void *data;
124+
/** @size: Size of queue in bytes */
125+
u32 size;
126+
/** @head: Head pointer in bytes, moved by producer, protected by @lock */
127+
u32 head;
128+
/** @tail: Tail pointer in bytes, moved by consumer, protected by @lock */
129+
u32 tail;
130+
/** @lock: protects page fault queue */
131+
spinlock_t lock;
132+
/** @worker: to process page faults */
133+
struct work_struct worker;
134+
};
135+
136+
#endif

0 commit comments

Comments
 (0)