Skip to content

Commit db16f9d

Browse files
committed
drm/xe: Split TLB invalidation code in frontend and backend
The frontend exposes an API to the driver to send invalidations, handles sequence number assignment, synchronization (fences), and provides a timeout mechanism. The backend issues the actual invalidation to the hardware (or firmware). The new layering easily allows issuing TLB invalidations to different hardware or firmware interfaces. Normalize some naming while here too. Signed-off-by: Stuart Summers <stuart.summers@intel.com> Reviewed-by: Stuart Summers <stuart.summers@intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Link: https://lore.kernel.org/r/20250826182911.392550-10-stuart.summers@intel.com
1 parent 8443e8c commit db16f9d

8 files changed

Lines changed: 500 additions & 385 deletions

File tree

drivers/gpu/drm/xe/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ xe-y += xe_bb.o \
7575
xe_guc_log.o \
7676
xe_guc_pc.o \
7777
xe_guc_submit.o \
78+
xe_guc_tlb_inval.o \
7879
xe_heci_gsc.o \
7980
xe_huc.o \
8081
xe_hw_engine.o \

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -603,8 +603,6 @@ static void xe_gt_fini(void *arg)
603603
struct xe_gt *gt = arg;
604604
int i;
605605

606-
xe_gt_tlb_inval_fini(gt);
607-
608606
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
609607
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
610608

drivers/gpu/drm/xe/xe_guc_ct.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
#include "xe_guc_log.h"
3131
#include "xe_guc_relay.h"
3232
#include "xe_guc_submit.h"
33+
#include "xe_guc_tlb_inval.h"
3334
#include "xe_map.h"
3435
#include "xe_pm.h"
35-
#include "xe_tlb_inval.h"
3636
#include "xe_trace_guc.h"
3737

3838
static void receive_g2h(struct xe_guc_ct *ct);
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
// SPDX-License-Identifier: MIT
2+
/*
3+
* Copyright © 2025 Intel Corporation
4+
*/
5+
6+
#include "abi/guc_actions_abi.h"
7+
8+
#include "xe_device.h"
9+
#include "xe_gt_stats.h"
10+
#include "xe_gt_types.h"
11+
#include "xe_guc.h"
12+
#include "xe_guc_ct.h"
13+
#include "xe_guc_tlb_inval.h"
14+
#include "xe_force_wake.h"
15+
#include "xe_mmio.h"
16+
#include "xe_tlb_inval.h"
17+
18+
#include "regs/xe_guc_regs.h"
19+
20+
/*
21+
* XXX: The seqno algorithm relies on TLB invalidation being processed in order
22+
* which they currently are by the GuC, if that changes the algorithm will need
23+
* to be updated.
24+
*/
25+
26+
static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len)
27+
{
28+
struct xe_gt *gt = guc_to_gt(guc);
29+
30+
xe_gt_assert(gt, action[1]); /* Seqno */
31+
32+
xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
33+
return xe_guc_ct_send(&guc->ct, action, len,
34+
G2H_LEN_DW_TLB_INVALIDATE, 1);
35+
}
36+
37+
#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
38+
XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
39+
XE_GUC_TLB_INVAL_FLUSH_CACHE)
40+
41+
static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno)
42+
{
43+
struct xe_guc *guc = tlb_inval->private;
44+
u32 action[] = {
45+
XE_GUC_ACTION_TLB_INVALIDATION_ALL,
46+
seqno,
47+
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
48+
};
49+
50+
return send_tlb_inval(guc, action, ARRAY_SIZE(action));
51+
}
52+
53+
static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
54+
{
55+
struct xe_guc *guc = tlb_inval->private;
56+
struct xe_gt *gt = guc_to_gt(guc);
57+
struct xe_device *xe = guc_to_xe(guc);
58+
59+
/*
60+
* Returning -ECANCELED in this function is squashed at the caller and
61+
* signals waiters.
62+
*/
63+
64+
if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) {
65+
u32 action[] = {
66+
XE_GUC_ACTION_TLB_INVALIDATION,
67+
seqno,
68+
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
69+
};
70+
71+
return send_tlb_inval(guc, action, ARRAY_SIZE(action));
72+
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
73+
struct xe_mmio *mmio = &gt->mmio;
74+
unsigned int fw_ref;
75+
76+
if (IS_SRIOV_VF(xe))
77+
return -ECANCELED;
78+
79+
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
80+
if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
81+
xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
82+
PVC_GUC_TLB_INV_DESC1_INVALIDATE);
83+
xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
84+
PVC_GUC_TLB_INV_DESC0_VALID);
85+
} else {
86+
xe_mmio_write32(mmio, GUC_TLB_INV_CR,
87+
GUC_TLB_INV_CR_INVALIDATE);
88+
}
89+
xe_force_wake_put(gt_to_fw(gt), fw_ref);
90+
}
91+
92+
return -ECANCELED;
93+
}
94+
95+
/*
96+
* Ensure that roundup_pow_of_two(length) doesn't overflow.
97+
* Note that roundup_pow_of_two() operates on unsigned long,
98+
* not on u64.
99+
*/
100+
#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
101+
102+
static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
103+
u64 start, u64 end, u32 asid)
104+
{
105+
#define MAX_TLB_INVALIDATION_LEN 7
106+
struct xe_guc *guc = tlb_inval->private;
107+
struct xe_gt *gt = guc_to_gt(guc);
108+
u32 action[MAX_TLB_INVALIDATION_LEN];
109+
u64 length = end - start;
110+
int len = 0;
111+
112+
if (guc_to_xe(guc)->info.force_execlist)
113+
return -ECANCELED;
114+
115+
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
116+
action[len++] = seqno;
117+
if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
118+
length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
119+
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
120+
} else {
121+
u64 orig_start = start;
122+
u64 align;
123+
124+
if (length < SZ_4K)
125+
length = SZ_4K;
126+
127+
/*
128+
* We need to invalidate a higher granularity if start address
129+
* is not aligned to length. When start is not aligned with
130+
* length we need to find the length large enough to create an
131+
* address mask covering the required range.
132+
*/
133+
align = roundup_pow_of_two(length);
134+
start = ALIGN_DOWN(start, align);
135+
end = ALIGN(end, align);
136+
length = align;
137+
while (start + length < end) {
138+
length <<= 1;
139+
start = ALIGN_DOWN(orig_start, length);
140+
}
141+
142+
/*
143+
* Minimum invalidation size for a 2MB page that the hardware
144+
* expects is 16MB
145+
*/
146+
if (length >= SZ_2M) {
147+
length = max_t(u64, SZ_16M, length);
148+
start = ALIGN_DOWN(orig_start, length);
149+
}
150+
151+
xe_gt_assert(gt, length >= SZ_4K);
152+
xe_gt_assert(gt, is_power_of_2(length));
153+
xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
154+
ilog2(SZ_2M) + 1)));
155+
xe_gt_assert(gt, IS_ALIGNED(start, length));
156+
157+
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
158+
action[len++] = asid;
159+
action[len++] = lower_32_bits(start);
160+
action[len++] = upper_32_bits(start);
161+
action[len++] = ilog2(length) - ilog2(SZ_4K);
162+
}
163+
164+
xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
165+
166+
return send_tlb_inval(guc, action, len);
167+
}
168+
169+
static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
170+
{
171+
struct xe_guc *guc = tlb_inval->private;
172+
173+
return xe_guc_ct_initialized(&guc->ct);
174+
}
175+
176+
static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval)
177+
{
178+
struct xe_guc *guc = tlb_inval->private;
179+
180+
LNL_FLUSH_WORK(&guc->ct.g2h_worker);
181+
}
182+
183+
static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval)
184+
{
185+
struct xe_guc *guc = tlb_inval->private;
186+
187+
/* this reflects what HW/GuC needs to process TLB inv request */
188+
const long hw_tlb_timeout = HZ / 4;
189+
190+
/* this estimates actual delay caused by the CTB transport */
191+
long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct);
192+
193+
return hw_tlb_timeout + 2 * delay;
194+
}
195+
196+
static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
197+
.all = send_tlb_inval_all,
198+
.ggtt = send_tlb_inval_ggtt,
199+
.ppgtt = send_tlb_inval_ppgtt,
200+
.initialized = tlb_inval_initialized,
201+
.flush = tlb_inval_flush,
202+
.timeout_delay = tlb_inval_timeout_delay,
203+
};
204+
205+
/**
206+
* xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early
207+
* @guc: GuC object
208+
* @tlb_inval: TLB invalidation client
209+
*
210+
* Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation
211+
* client to the GuC and setting GuC backend ops.
212+
*/
213+
void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
214+
struct xe_tlb_inval *tlb_inval)
215+
{
216+
tlb_inval->private = guc;
217+
tlb_inval->ops = &guc_tlb_inval_ops;
218+
}
219+
220+
/**
221+
* xe_guc_tlb_inval_done_handler() - TLB invalidation done handler
222+
* @guc: guc
223+
* @msg: message indicating TLB invalidation done
224+
* @len: length of message
225+
*
226+
* Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
227+
* invalidation fences for seqno. Algorithm for this depends on seqno being
228+
* received in-order and asserts this assumption.
229+
*
230+
* Return: 0 on success, -EPROTO for malformed messages.
231+
*/
232+
int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
233+
{
234+
struct xe_gt *gt = guc_to_gt(guc);
235+
236+
if (unlikely(len != 1))
237+
return -EPROTO;
238+
239+
xe_tlb_inval_done_handler(&gt->tlb_inval, msg[0]);
240+
241+
return 0;
242+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2025 Intel Corporation
4+
*/
5+
6+
#ifndef _XE_GUC_TLB_INVAL_H_
7+
#define _XE_GUC_TLB_INVAL_H_
8+
9+
#include <linux/types.h>
10+
11+
struct xe_guc;
12+
struct xe_tlb_inval;
13+
14+
void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
15+
struct xe_tlb_inval *tlb_inval);
16+
17+
int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
18+
19+
#endif

0 commit comments

Comments
 (0)