Skip to content

Commit 43d37df

Browse files
mattroperodrigovivi
authored andcommitted
drm/xe/wa: Steer RMW of MCR registers while building default LRC
When generating the default LRC, if a register is not masked, we apply any save-restore programming necessary via a read-modify-write sequence that will ensure we only update the relevant bits/fields without clobbering the rest of the register. However some of the registers that need to be updated might be MCR registers which require steering to a non-terminated instance to ensure we can read back a valid, non-zero value. The steering of reads originating from a command streamer is controlled by register CS_MMIO_GROUP_INSTANCE_SELECT. Emit additional MI_LRI commands to update the steering before any RMW of an MCR register to ensure the reads are performed properly. Note that needing to perform a RMW of an MCR register while building the default LRC is pretty rare. Most of the MCR registers that are part of an engine's LRCs are also masked registers, so no MCR is necessary. Fixes: f2f9098 ("drm/xe: Avoid reading RMW registers in emit_wa_job") Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> Link: https://patch.msgid.link/20260206223058.387014-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper <matthew.d.roper@intel.com> (cherry picked from commit 6c2e331) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1 parent 6de23f8 commit 43d37df

2 files changed

Lines changed: 60 additions & 12 deletions

File tree

drivers/gpu/drm/xe/regs/xe_engine_regs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@
9696
#define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13)
9797

9898
#define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
99+
100+
#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc)
101+
#define SELECTIVE_READ_ADDRESSING REG_BIT(30)
102+
#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23)
103+
#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16)
104+
99105
/*
100106
* CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
101107
* The lsb of each can be considered a separate enabling bit for encryption.

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
210210
return ret;
211211
}
212212

213+
/* Dwords required to emit a RMW of a register */
214+
#define EMIT_RMW_DW 20
215+
213216
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
214217
{
215-
struct xe_reg_sr *sr = &q->hwe->reg_lrc;
218+
struct xe_hw_engine *hwe = q->hwe;
219+
struct xe_reg_sr *sr = &hwe->reg_lrc;
216220
struct xe_reg_sr_entry *entry;
217-
int count_rmw = 0, count = 0, ret;
221+
int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
218222
unsigned long idx;
219223
struct xe_bb *bb;
220224
size_t bb_len = 0;
@@ -224,24 +228,44 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
224228
xa_for_each(&sr->xa, idx, entry) {
225229
if (entry->reg.masked || entry->clr_bits == ~0)
226230
++count;
231+
else if (entry->reg.mcr)
232+
++count_rmw_mcr;
227233
else
228234
++count_rmw;
229235
}
230236

231237
if (count)
232238
bb_len += count * 2 + 1;
233239

234-
if (count_rmw)
235-
bb_len += count_rmw * 20 + 7;
240+
/*
241+
* RMW of MCR registers is the same as a normal RMW, except an
242+
* additional LRI (3 dwords) is required per register to steer the read
243+
* to a nom-terminated instance.
244+
*
245+
* We could probably shorten the batch slightly by eliding the
246+
* steering for consecutive MCR registers that have the same
247+
* group/instance target, but it's not worth the extra complexity to do
248+
* so.
249+
*/
250+
bb_len += count_rmw * EMIT_RMW_DW;
251+
bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);
252+
253+
/*
254+
* After doing all RMW, we need 7 trailing dwords to clean up,
255+
* plus an additional 3 dwords to reset steering if any of the
256+
* registers were MCR.
257+
*/
258+
if (count_rmw || count_rmw_mcr)
259+
bb_len += 7 + (count_rmw_mcr ? 3 : 0);
236260

237-
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
261+
if (hwe->class == XE_ENGINE_CLASS_RENDER)
238262
/*
239263
* Big enough to emit all of the context's 3DSTATE via
240264
* xe_lrc_emit_hwe_state_instructions()
241265
*/
242-
bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
266+
bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);
243267

244-
xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
268+
xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);
245269

246270
bb = xe_bb_new(gt, bb_len, false);
247271
if (IS_ERR(bb))
@@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
276300
}
277301
}
278302

279-
if (count_rmw) {
280-
/* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
281-
303+
if (count_rmw || count_rmw_mcr) {
282304
xa_for_each(&sr->xa, idx, entry) {
283305
if (entry->reg.masked || entry->clr_bits == ~0)
284306
continue;
285307

308+
if (entry->reg.mcr) {
309+
struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
310+
u8 group, instance;
311+
312+
xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
313+
*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
314+
*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
315+
*cs++ = SELECTIVE_READ_ADDRESSING |
316+
REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
317+
REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
318+
}
319+
286320
*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
287321
*cs++ = entry->reg.addr;
288322
*cs++ = CS_GPR_REG(0, 0).addr;
@@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
308342
*cs++ = CS_GPR_REG(0, 0).addr;
309343
*cs++ = entry->reg.addr;
310344

311-
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
312-
entry->reg.addr, entry->clr_bits, entry->set_bits);
345+
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
346+
entry->reg.addr, entry->clr_bits, entry->set_bits,
347+
entry->reg.mcr ? " (MCR)" : "");
313348
}
314349

315350
/* reset used GPR */
@@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
321356
*cs++ = 0;
322357
*cs++ = CS_GPR_REG(0, 2).addr;
323358
*cs++ = 0;
359+
360+
/* reset steering */
361+
if (count_rmw_mcr) {
362+
*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
363+
*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
364+
*cs++ = 0;
365+
}
324366
}
325367

326368
cs = xe_lrc_emit_hwe_state_instructions(q, cs);

0 commit comments

Comments
 (0)