Skip to content

Commit 3d98a71

Browse files
mbrost05Thomas Hellström
authored andcommitted
drm/xe/vf: Start re-emission from first unsignaled job during VF migration
The LRC software ring tail is reset to the first unsignaled pending job's head. Fix the re-emission logic to begin submitting from the first unsignaled job detected, rather than scanning all pending jobs, which can cause imbalance. v2: - Include missing local changes v3: - s/skip_replay/restore_replay (Tomasz) Fixes: c25c101 ("drm/xe/vf: Replay GuC submission state on pause / unpause") Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Tomasz Lis <tomasz.lis@intel.com> Link: https://patch.msgid.link/20251121152750.240557-1-matthew.brost@intel.com (cherry picked from commit 00937fe) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
1 parent 14a8d83 commit 3d98a71

3 files changed

Lines changed: 19 additions & 15 deletions

File tree

drivers/gpu/drm/xe/xe_gpu_scheduler.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,14 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
5454
static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
5555
{
5656
struct drm_sched_job *s_job;
57+
bool restore_replay = false;
5758

5859
list_for_each_entry(s_job, &sched->base.pending_list, list) {
5960
struct drm_sched_fence *s_fence = s_job->s_fence;
6061
struct dma_fence *hw_fence = s_fence->parent;
6162

62-
if (to_xe_sched_job(s_job)->skip_emit ||
63-
(hw_fence && !dma_fence_is_signaled(hw_fence)))
63+
restore_replay |= to_xe_sched_job(s_job)->restore_replay;
64+
if (restore_replay || (hw_fence && !dma_fence_is_signaled(hw_fence)))
6465
sched->base.ops->run_job(s_job);
6566
}
6667
}

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
822822

823823
xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
824824

825-
if (!job->skip_emit || job->last_replay) {
825+
if (!job->restore_replay || job->last_replay) {
826826
if (xe_exec_queue_is_parallel(q))
827827
wq_item_append(q);
828828
else
@@ -881,10 +881,10 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
881881
if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
882882
if (!exec_queue_registered(q))
883883
register_exec_queue(q, GUC_CONTEXT_NORMAL);
884-
if (!job->skip_emit)
884+
if (!job->restore_replay)
885885
q->ring_ops->emit_job(job);
886886
submit_exec_queue(q, job);
887-
job->skip_emit = false;
887+
job->restore_replay = false;
888888
}
889889

890890
/*
@@ -2152,6 +2152,8 @@ static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q)
21522152

21532153
job = xe_sched_first_pending_job(sched);
21542154
if (job) {
2155+
job->restore_replay = true;
2156+
21552157
/*
21562158
* Adjust software tail so jobs submitted overwrite previous
21572159
* position in ring buffer with new GGTT addresses.
@@ -2241,17 +2243,18 @@ static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
22412243
struct xe_exec_queue *q)
22422244
{
22432245
struct xe_gpu_scheduler *sched = &q->guc->sched;
2244-
struct drm_sched_job *s_job;
22452246
struct xe_sched_job *job = NULL;
2247+
bool restore_replay = false;
22462248

2247-
list_for_each_entry(s_job, &sched->base.pending_list, list) {
2248-
job = to_xe_sched_job(s_job);
2249-
2250-
xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
2251-
q->guc->id, xe_sched_job_seqno(job));
2249+
list_for_each_entry(job, &sched->base.pending_list, drm.list) {
2250+
restore_replay |= job->restore_replay;
2251+
if (restore_replay) {
2252+
xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
2253+
q->guc->id, xe_sched_job_seqno(job));
22522254

2253-
q->ring_ops->emit_job(job);
2254-
job->skip_emit = true;
2255+
q->ring_ops->emit_job(job);
2256+
job->restore_replay = true;
2257+
}
22552258
}
22562259

22572260
if (job)

drivers/gpu/drm/xe/xe_sched_job_types.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ struct xe_sched_job {
6363
bool ring_ops_flush_tlb;
6464
/** @ggtt: mapped in ggtt. */
6565
bool ggtt;
66-
/** @skip_emit: skip emitting the job */
67-
bool skip_emit;
66+
/** @restore_replay: job being replayed for restore */
67+
bool restore_replay;
6868
/** @last_replay: last job being replayed */
6969
bool last_replay;
7070
/** @ptrs: per instance pointers. */

0 commit comments

Comments
 (0)