Skip to content

Commit 82a499d

Browse files
committed
Merge tag 'drm-xe-fixes-2026-02-26' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
- W/a fix for multi-cast registers (Roper) - Fix xe_sync initialization issues (Shuicheng) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patch.msgid.link/aaBGHy_0RLGGIBP5@intel.com
2 parents 5e061aa + 0879c3f commit 82a499d

3 files changed

Lines changed: 81 additions & 21 deletions

File tree

drivers/gpu/drm/xe/regs/xe_engine_regs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@
9696
#define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13)
9797

9898
#define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
99+
100+
#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc)
101+
#define SELECTIVE_READ_ADDRESSING REG_BIT(30)
102+
#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23)
103+
#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16)
104+
99105
/*
100106
* CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
101107
* The lsb of each can be considered a separate enabling bit for encryption.

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
210210
return ret;
211211
}
212212

213+
/* Dwords required to emit a RMW of a register */
214+
#define EMIT_RMW_DW 20
215+
213216
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
214217
{
215-
struct xe_reg_sr *sr = &q->hwe->reg_lrc;
218+
struct xe_hw_engine *hwe = q->hwe;
219+
struct xe_reg_sr *sr = &hwe->reg_lrc;
216220
struct xe_reg_sr_entry *entry;
217-
int count_rmw = 0, count = 0, ret;
221+
int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
218222
unsigned long idx;
219223
struct xe_bb *bb;
220224
size_t bb_len = 0;
@@ -224,24 +228,44 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
224228
xa_for_each(&sr->xa, idx, entry) {
225229
if (entry->reg.masked || entry->clr_bits == ~0)
226230
++count;
231+
else if (entry->reg.mcr)
232+
++count_rmw_mcr;
227233
else
228234
++count_rmw;
229235
}
230236

231237
if (count)
232238
bb_len += count * 2 + 1;
233239

234-
if (count_rmw)
235-
bb_len += count_rmw * 20 + 7;
240+
/*
241+
* RMW of MCR registers is the same as a normal RMW, except an
242+
* additional LRI (3 dwords) is required per register to steer the read
243+
* to a nom-terminated instance.
244+
*
245+
* We could probably shorten the batch slightly by eliding the
246+
* steering for consecutive MCR registers that have the same
247+
* group/instance target, but it's not worth the extra complexity to do
248+
* so.
249+
*/
250+
bb_len += count_rmw * EMIT_RMW_DW;
251+
bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);
252+
253+
/*
254+
* After doing all RMW, we need 7 trailing dwords to clean up,
255+
* plus an additional 3 dwords to reset steering if any of the
256+
* registers were MCR.
257+
*/
258+
if (count_rmw || count_rmw_mcr)
259+
bb_len += 7 + (count_rmw_mcr ? 3 : 0);
236260

237-
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
261+
if (hwe->class == XE_ENGINE_CLASS_RENDER)
238262
/*
239263
* Big enough to emit all of the context's 3DSTATE via
240264
* xe_lrc_emit_hwe_state_instructions()
241265
*/
242-
bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
266+
bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);
243267

244-
xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
268+
xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);
245269

246270
bb = xe_bb_new(gt, bb_len, false);
247271
if (IS_ERR(bb))
@@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
276300
}
277301
}
278302

279-
if (count_rmw) {
280-
/* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
281-
303+
if (count_rmw || count_rmw_mcr) {
282304
xa_for_each(&sr->xa, idx, entry) {
283305
if (entry->reg.masked || entry->clr_bits == ~0)
284306
continue;
285307

308+
if (entry->reg.mcr) {
309+
struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
310+
u8 group, instance;
311+
312+
xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
313+
*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
314+
*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
315+
*cs++ = SELECTIVE_READ_ADDRESSING |
316+
REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
317+
REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
318+
}
319+
286320
*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
287321
*cs++ = entry->reg.addr;
288322
*cs++ = CS_GPR_REG(0, 0).addr;
@@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
308342
*cs++ = CS_GPR_REG(0, 0).addr;
309343
*cs++ = entry->reg.addr;
310344

311-
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
312-
entry->reg.addr, entry->clr_bits, entry->set_bits);
345+
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
346+
entry->reg.addr, entry->clr_bits, entry->set_bits,
347+
entry->reg.mcr ? " (MCR)" : "");
313348
}
314349

315350
/* reset used GPR */
@@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
321356
*cs++ = 0;
322357
*cs++ = CS_GPR_REG(0, 2).addr;
323358
*cs++ = 0;
359+
360+
/* reset steering */
361+
if (count_rmw_mcr) {
362+
*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
363+
*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
364+
*cs++ = 0;
365+
}
324366
}
325367

326368
cs = xe_lrc_emit_hwe_state_instructions(q, cs);

drivers/gpu/drm/xe/xe_sync.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
146146

147147
if (!signal) {
148148
sync->fence = drm_syncobj_fence_get(sync->syncobj);
149-
if (XE_IOCTL_DBG(xe, !sync->fence))
150-
return -EINVAL;
149+
if (XE_IOCTL_DBG(xe, !sync->fence)) {
150+
err = -EINVAL;
151+
goto free_sync;
152+
}
151153
}
152154
break;
153155

@@ -167,17 +169,21 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
167169

168170
if (signal) {
169171
sync->chain_fence = dma_fence_chain_alloc();
170-
if (!sync->chain_fence)
171-
return -ENOMEM;
172+
if (!sync->chain_fence) {
173+
err = -ENOMEM;
174+
goto free_sync;
175+
}
172176
} else {
173177
sync->fence = drm_syncobj_fence_get(sync->syncobj);
174-
if (XE_IOCTL_DBG(xe, !sync->fence))
175-
return -EINVAL;
178+
if (XE_IOCTL_DBG(xe, !sync->fence)) {
179+
err = -EINVAL;
180+
goto free_sync;
181+
}
176182

177183
err = dma_fence_chain_find_seqno(&sync->fence,
178184
sync_in.timeline_value);
179185
if (err)
180-
return err;
186+
goto free_sync;
181187
}
182188
break;
183189

@@ -200,8 +206,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
200206
if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
201207
return PTR_ERR(sync->ufence);
202208
sync->ufence_chain_fence = dma_fence_chain_alloc();
203-
if (!sync->ufence_chain_fence)
204-
return -ENOMEM;
209+
if (!sync->ufence_chain_fence) {
210+
err = -ENOMEM;
211+
goto free_sync;
212+
}
205213
sync->ufence_syncobj = ufence_syncobj;
206214
}
207215

@@ -216,6 +224,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
216224
sync->timeline_value = sync_in.timeline_value;
217225

218226
return 0;
227+
228+
free_sync:
229+
xe_sync_entry_cleanup(sync);
230+
return err;
219231
}
220232
ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO);
221233

0 commit comments

Comments
 (0)