Skip to content

Commit 81a45cb

Browse files
committed
drm/xe/migrate: make MI_TLB_INVALIDATE conditional
When clearing VRAM we should be able to skip invalidating the TLBs if we are only using the identity map to access VRAM (which is the common case), since no modifications are made to PTEs on the fly. Also since we use huge 1G entries within the identity map, there should be a pretty decent chance that the next packet(s) (if also clears) can avoid a tree walk if we don't shoot down the TLBs, like if we have to process a long stream of clears. For normal moves/copies, we usually always end up with the src or dst being system memory, meaning we can't only rely on the identity map and will also need to emit PTEs and so will always require a TLB flush. v2: - Update commit to explain the situation for normal copies (Matt B) - Rebase on latest changes Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://lore.kernel.org/r/20250808110452.467513-2-matthew.auld@intel.com
1 parent db16f9d commit 81a45cb

2 files changed

Lines changed: 16 additions & 12 deletions

File tree

drivers/gpu/drm/xe/xe_migrate.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
904904
goto err;
905905
}
906906

907-
xe_sched_job_add_migrate_flush(job, flush_flags);
907+
xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB);
908908
if (!fence) {
909909
err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv,
910910
DMA_RESV_USAGE_BOOKKEEP);
@@ -1288,11 +1288,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
12881288

12891289
size -= clear_L0;
12901290
/* Preemption is enabled again by the ring ops. */
1291-
if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
1291+
if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) {
12921292
xe_res_next(&src_it, clear_L0);
1293-
else
1294-
emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs,
1295-
&src_it, clear_L0, dst);
1293+
} else {
1294+
emit_pte(m, bb, clear_L0_pt, clear_vram,
1295+
clear_only_system_ccs, &src_it, clear_L0, dst);
1296+
flush_flags |= MI_INVALIDATE_TLB;
1297+
}
12961298

12971299
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
12981300
update_idx = bb->len;
@@ -1303,7 +1305,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
13031305
if (xe_migrate_needs_ccs_emit(xe)) {
13041306
emit_copy_ccs(gt, bb, clear_L0_ofs, true,
13051307
m->cleared_mem_ofs, false, clear_L0);
1306-
flush_flags = MI_FLUSH_DW_CCS;
1308+
flush_flags |= MI_FLUSH_DW_CCS;
13071309
}
13081310

13091311
job = xe_bb_create_migration_job(m->q, bb,
@@ -1638,6 +1640,8 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
16381640
goto err_sa;
16391641
}
16401642

1643+
xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
1644+
16411645
if (ops->pre_commit) {
16421646
pt_update->job = job;
16431647
err = ops->pre_commit(pt_update);
@@ -1863,7 +1867,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
18631867
goto err;
18641868
}
18651869

1866-
xe_sched_job_add_migrate_flush(job, 0);
1870+
xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
18671871

18681872
mutex_lock(&m->job_mutex);
18691873
xe_sched_job_arm(job);

drivers/gpu/drm/xe/xe_ring_ops.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
110110
return i;
111111
}
112112

113-
static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i)
113+
static int emit_flush_invalidate(u32 addr, u32 val, u32 flush_flags, u32 *dw, int i)
114114
{
115-
dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
116-
MI_FLUSH_IMM_DW;
115+
dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW |
116+
MI_FLUSH_IMM_DW | (flush_flags & MI_INVALIDATE_TLB) ?: 0;
117117

118118
dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
119119
dw[i++] = 0;
@@ -410,13 +410,13 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
410410
i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i);
411411

412412
dw[i++] = preparser_disable(true);
413-
i = emit_flush_invalidate(saddr, seqno, dw, i);
413+
i = emit_flush_invalidate(saddr, seqno, job->migrate_flush_flags, dw, i);
414414
dw[i++] = preparser_disable(false);
415415

416416
i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);
417417

418418
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno,
419-
MI_INVALIDATE_TLB | job->migrate_flush_flags,
419+
job->migrate_flush_flags,
420420
dw, i);
421421

422422
i = emit_user_interrupt(dw, i);

0 commit comments

Comments
 (0)