@@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
210210 return ret ;
211211}
212212
213+ /* Dwords required to emit a RMW of a register */
214+ #define EMIT_RMW_DW 20
215+
213216static int emit_wa_job (struct xe_gt * gt , struct xe_exec_queue * q )
214217{
215- struct xe_reg_sr * sr = & q -> hwe -> reg_lrc ;
218+ struct xe_hw_engine * hwe = q -> hwe ;
219+ struct xe_reg_sr * sr = & hwe -> reg_lrc ;
216220 struct xe_reg_sr_entry * entry ;
217- int count_rmw = 0 , count = 0 , ret ;
221+ int count_rmw = 0 , count_rmw_mcr = 0 , count = 0 , ret ;
218222 unsigned long idx ;
219223 struct xe_bb * bb ;
220224 size_t bb_len = 0 ;
@@ -224,24 +228,44 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
224228 xa_for_each (& sr -> xa , idx , entry ) {
225229 if (entry -> reg .masked || entry -> clr_bits == ~0 )
226230 ++ count ;
231+ else if (entry -> reg .mcr )
232+ ++ count_rmw_mcr ;
227233 else
228234 ++ count_rmw ;
229235 }
230236
231237 if (count )
232238 bb_len += count * 2 + 1 ;
233239
234- if (count_rmw )
235- bb_len += count_rmw * 20 + 7 ;
240+ /*
241+ * RMW of MCR registers is the same as a normal RMW, except an
242+ * additional LRI (3 dwords) is required per register to steer the read
243+ * to a nom-terminated instance.
244+ *
245+ * We could probably shorten the batch slightly by eliding the
246+ * steering for consecutive MCR registers that have the same
247+ * group/instance target, but it's not worth the extra complexity to do
248+ * so.
249+ */
250+ bb_len += count_rmw * EMIT_RMW_DW ;
251+ bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3 );
252+
253+ /*
254+ * After doing all RMW, we need 7 trailing dwords to clean up,
255+ * plus an additional 3 dwords to reset steering if any of the
256+ * registers were MCR.
257+ */
258+ if (count_rmw || count_rmw_mcr )
259+ bb_len += 7 + (count_rmw_mcr ? 3 : 0 );
236260
237- if (q -> hwe -> class == XE_ENGINE_CLASS_RENDER )
261+ if (hwe -> class == XE_ENGINE_CLASS_RENDER )
238262 /*
239263 * Big enough to emit all of the context's 3DSTATE via
240264 * xe_lrc_emit_hwe_state_instructions()
241265 */
242- bb_len += xe_gt_lrc_size (gt , q -> hwe -> class ) / sizeof (u32 );
266+ bb_len += xe_gt_lrc_size (gt , hwe -> class ) / sizeof (u32 );
243267
244- xe_gt_dbg (gt , "LRC %s WA job: %zu dwords\n" , q -> hwe -> name , bb_len );
268+ xe_gt_dbg (gt , "LRC %s WA job: %zu dwords\n" , hwe -> name , bb_len );
245269
246270 bb = xe_bb_new (gt , bb_len , false);
247271 if (IS_ERR (bb ))
@@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
276300 }
277301 }
278302
279- if (count_rmw ) {
280- /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
281-
303+ if (count_rmw || count_rmw_mcr ) {
282304 xa_for_each (& sr -> xa , idx , entry ) {
283305 if (entry -> reg .masked || entry -> clr_bits == ~0 )
284306 continue ;
285307
308+ if (entry -> reg .mcr ) {
309+ struct xe_reg_mcr reg = { .__reg .raw = entry -> reg .raw };
310+ u8 group , instance ;
311+
312+ xe_gt_mcr_get_nonterminated_steering (gt , reg , & group , & instance );
313+ * cs ++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS (1 );
314+ * cs ++ = CS_MMIO_GROUP_INSTANCE_SELECT (hwe -> mmio_base ).addr ;
315+ * cs ++ = SELECTIVE_READ_ADDRESSING |
316+ REG_FIELD_PREP (SELECTIVE_READ_GROUP , group ) |
317+ REG_FIELD_PREP (SELECTIVE_READ_INSTANCE , instance );
318+ }
319+
286320 * cs ++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO ;
287321 * cs ++ = entry -> reg .addr ;
288322 * cs ++ = CS_GPR_REG (0 , 0 ).addr ;
@@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
308342 * cs ++ = CS_GPR_REG (0 , 0 ).addr ;
309343 * cs ++ = entry -> reg .addr ;
310344
311- xe_gt_dbg (gt , "REG[%#x] = ~%#x|%#x\n" ,
312- entry -> reg .addr , entry -> clr_bits , entry -> set_bits );
345+ xe_gt_dbg (gt , "REG[%#x] = ~%#x|%#x%s\n" ,
346+ entry -> reg .addr , entry -> clr_bits , entry -> set_bits ,
347+ entry -> reg .mcr ? " (MCR)" : "" );
313348 }
314349
315350 /* reset used GPR */
@@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
321356 * cs ++ = 0 ;
322357 * cs ++ = CS_GPR_REG (0 , 2 ).addr ;
323358 * cs ++ = 0 ;
359+
360+ /* reset steering */
361+ if (count_rmw_mcr ) {
362+ * cs ++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS (1 );
363+ * cs ++ = CS_MMIO_GROUP_INSTANCE_SELECT (q -> hwe -> mmio_base ).addr ;
364+ * cs ++ = 0 ;
365+ }
324366 }
325367
326368 cs = xe_lrc_emit_hwe_state_instructions (q , cs );
0 commit comments