1717#include "xe_trace.h"
1818#include "regs/xe_guc_regs.h"
1919
20+ #define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
21+
2022/*
2123 * TLB inval depends on pending commands in the CT queue and then the real
2224 * invalidation time. Double up the time to process full CT queue
@@ -33,6 +35,23 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
3335 return hw_tlb_timeout + 2 * delay ;
3436}
3537
38+ static void
39+ __invalidation_fence_signal (struct xe_device * xe , struct xe_gt_tlb_invalidation_fence * fence )
40+ {
41+ bool stack = test_bit (FENCE_STACK_BIT , & fence -> base .flags );
42+
43+ trace_xe_gt_tlb_invalidation_fence_signal (xe , fence );
44+ dma_fence_signal (& fence -> base );
45+ if (!stack )
46+ dma_fence_put (& fence -> base );
47+ }
48+
49+ static void
50+ invalidation_fence_signal (struct xe_device * xe , struct xe_gt_tlb_invalidation_fence * fence )
51+ {
52+ list_del (& fence -> link );
53+ __invalidation_fence_signal (xe , fence );
54+ }
3655
3756static void xe_gt_tlb_fence_timeout (struct work_struct * work )
3857{
@@ -54,10 +73,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
5473 xe_gt_err (gt , "TLB invalidation fence timeout, seqno=%d recv=%d" ,
5574 fence -> seqno , gt -> tlb_invalidation .seqno_recv );
5675
57- list_del (& fence -> link );
5876 fence -> base .error = - ETIME ;
59- dma_fence_signal (& fence -> base );
60- dma_fence_put (& fence -> base );
77+ invalidation_fence_signal (xe , fence );
6178 }
6279 if (!list_empty (& gt -> tlb_invalidation .pending_fences ))
6380 queue_delayed_work (system_wq ,
@@ -87,21 +104,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
87104 return 0 ;
88105}
89106
90- static void
91- __invalidation_fence_signal (struct xe_device * xe , struct xe_gt_tlb_invalidation_fence * fence )
92- {
93- trace_xe_gt_tlb_invalidation_fence_signal (xe , fence );
94- dma_fence_signal (& fence -> base );
95- dma_fence_put (& fence -> base );
96- }
97-
98- static void
99- invalidation_fence_signal (struct xe_device * xe , struct xe_gt_tlb_invalidation_fence * fence )
100- {
101- list_del (& fence -> link );
102- __invalidation_fence_signal (xe , fence );
103- }
104-
105107/**
106108 * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
107109 * @gt: graphics tile
@@ -111,7 +113,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
111113void xe_gt_tlb_invalidation_reset (struct xe_gt * gt )
112114{
113115 struct xe_gt_tlb_invalidation_fence * fence , * next ;
114- struct xe_guc * guc = & gt -> uc .guc ;
115116 int pending_seqno ;
116117
117118 /*
@@ -134,7 +135,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
134135 else
135136 pending_seqno = gt -> tlb_invalidation .seqno - 1 ;
136137 WRITE_ONCE (gt -> tlb_invalidation .seqno_recv , pending_seqno );
137- wake_up_all (& guc -> ct .wq );
138138
139139 list_for_each_entry_safe (fence , next ,
140140 & gt -> tlb_invalidation .pending_fences , link )
@@ -165,6 +165,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
165165 int seqno ;
166166 int ret ;
167167
168+ xe_gt_assert (gt , fence );
169+
168170 /*
169171 * XXX: The seqno algorithm relies on TLB invalidation being processed
170172 * in order which they currently are, if that changes the algorithm will
@@ -173,10 +175,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
173175
174176 mutex_lock (& guc -> ct .lock );
175177 seqno = gt -> tlb_invalidation .seqno ;
176- if (fence ) {
177- fence -> seqno = seqno ;
178- trace_xe_gt_tlb_invalidation_fence_send (xe , fence );
179- }
178+ fence -> seqno = seqno ;
179+ trace_xe_gt_tlb_invalidation_fence_send (xe , fence );
180180 action [1 ] = seqno ;
181181 ret = xe_guc_ct_send_locked (& guc -> ct , action , len ,
182182 G2H_LEN_DW_TLB_INVALIDATE , 1 );
@@ -209,7 +209,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
209209 TLB_INVALIDATION_SEQNO_MAX ;
210210 if (!gt -> tlb_invalidation .seqno )
211211 gt -> tlb_invalidation .seqno = 1 ;
212- ret = seqno ;
213212 }
214213 mutex_unlock (& guc -> ct .lock );
215214
@@ -223,22 +222,24 @@ static int send_tlb_invalidation(struct xe_guc *guc,
223222/**
224223 * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
225224 * @gt: graphics tile
225+ * @fence: invalidation fence which will be signal on TLB invalidation
226+ * completion
226227 *
227228 * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
228- * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
229+ * caller can use the invalidation fence to wait for completion.
229230 *
230- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
231- * negative error code on error.
231+ * Return: 0 on success, negative error code on error
232232 */
233- static int xe_gt_tlb_invalidation_guc (struct xe_gt * gt )
233+ static int xe_gt_tlb_invalidation_guc (struct xe_gt * gt ,
234+ struct xe_gt_tlb_invalidation_fence * fence )
234235{
235236 u32 action [] = {
236237 XE_GUC_ACTION_TLB_INVALIDATION ,
237238 0 , /* seqno, replaced in send_tlb_invalidation */
238239 MAKE_INVAL_OP (XE_GUC_TLB_INVAL_GUC ),
239240 };
240241
241- return send_tlb_invalidation (& gt -> uc .guc , NULL , action ,
242+ return send_tlb_invalidation (& gt -> uc .guc , fence , action ,
242243 ARRAY_SIZE (action ));
243244}
244245
@@ -257,13 +258,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
257258
258259 if (xe_guc_ct_enabled (& gt -> uc .guc .ct ) &&
259260 gt -> uc .guc .submission_state .enabled ) {
260- int seqno ;
261+ struct xe_gt_tlb_invalidation_fence fence ;
262+ int ret ;
261263
262- seqno = xe_gt_tlb_invalidation_guc (gt );
263- if (seqno <= 0 )
264- return seqno ;
264+ xe_gt_tlb_invalidation_fence_init (gt , & fence , true);
265+ ret = xe_gt_tlb_invalidation_guc (gt , & fence );
266+ if (ret < 0 )
267+ return ret ;
265268
266- xe_gt_tlb_invalidation_wait ( gt , seqno );
269+ xe_gt_tlb_invalidation_fence_wait ( & fence );
267270 } else if (xe_device_uc_enabled (xe ) && !xe_device_wedged (xe )) {
268271 if (IS_SRIOV_VF (xe ))
269272 return 0 ;
@@ -290,18 +293,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
290293 *
291294 * @gt: graphics tile
292295 * @fence: invalidation fence which will be signal on TLB invalidation
293- * completion, can be NULL
296+ * completion
294297 * @start: start address
295298 * @end: end address
296299 * @asid: address space id
297300 *
298301 * Issue a range based TLB invalidation if supported, if not fallback to a full
299- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
300- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
301- * completion.
302+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
303+ * the invalidation fence to wait for completion.
302304 *
303- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
304- * negative error code on error.
305+ * Return: Negative error code on error, 0 on success
305306 */
306307int xe_gt_tlb_invalidation_range (struct xe_gt * gt ,
307308 struct xe_gt_tlb_invalidation_fence * fence ,
@@ -312,11 +313,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
312313 u32 action [MAX_TLB_INVALIDATION_LEN ];
313314 int len = 0 ;
314315
316+ xe_gt_assert (gt , fence );
317+
315318 /* Execlists not supported */
316319 if (gt_to_xe (gt )-> info .force_execlist ) {
317- if (fence )
318- __invalidation_fence_signal (xe , fence );
319-
320+ __invalidation_fence_signal (xe , fence );
320321 return 0 ;
321322 }
322323
@@ -382,12 +383,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
382383 * @vma: VMA to invalidate
383384 *
384385 * Issue a range based TLB invalidation if supported, if not fallback to a full
385- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
386- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
387- * completion.
386+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
387+ * the invalidation fence to wait for completion.
388388 *
389- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
390- * negative error code on error.
389+ * Return: Negative error code on error, 0 on success
391390 */
392391int xe_gt_tlb_invalidation_vma (struct xe_gt * gt ,
393392 struct xe_gt_tlb_invalidation_fence * fence ,
@@ -400,43 +399,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
400399 xe_vma_vm (vma )-> usm .asid );
401400}
402401
403- /**
404- * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
405- * @gt: graphics tile
406- * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
407- *
408- * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
409- *
410- * Return: 0 on success, -ETIME on TLB invalidation timeout
411- */
412- int xe_gt_tlb_invalidation_wait (struct xe_gt * gt , int seqno )
413- {
414- struct xe_guc * guc = & gt -> uc .guc ;
415- int ret ;
416-
417- /* Execlists not supported */
418- if (gt_to_xe (gt )-> info .force_execlist )
419- return 0 ;
420-
421- /*
422- * XXX: See above, this algorithm only works if seqno are always in
423- * order
424- */
425- ret = wait_event_timeout (guc -> ct .wq ,
426- tlb_invalidation_seqno_past (gt , seqno ),
427- tlb_timeout_jiffies (gt ));
428- if (!ret ) {
429- struct drm_printer p = xe_gt_err_printer (gt );
430-
431- xe_gt_err (gt , "TLB invalidation time'd out, seqno=%d, recv=%d\n" ,
432- seqno , gt -> tlb_invalidation .seqno_recv );
433- xe_guc_ct_print (& guc -> ct , & p , true);
434- return - ETIME ;
435- }
436-
437- return 0 ;
438- }
439-
440402/**
441403 * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
442404 * @guc: guc
@@ -480,12 +442,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
480442 return 0 ;
481443 }
482444
483- /*
484- * wake_up_all() and wait_event_timeout() already have the correct
485- * barriers.
486- */
487445 WRITE_ONCE (gt -> tlb_invalidation .seqno_recv , msg [0 ]);
488- wake_up_all (& guc -> ct .wq );
489446
490447 list_for_each_entry_safe (fence , next ,
491448 & gt -> tlb_invalidation .pending_fences , link ) {
@@ -530,17 +487,22 @@ static const struct dma_fence_ops invalidation_fence_ops = {
530487 * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence
531488 * @gt: GT
532489 * @fence: TLB invalidation fence to initialize
490+ * @stack: fence is stack variable
533491 *
534492 * Initialize TLB invalidation fence for use
535493 */
536494void xe_gt_tlb_invalidation_fence_init (struct xe_gt * gt ,
537- struct xe_gt_tlb_invalidation_fence * fence )
495+ struct xe_gt_tlb_invalidation_fence * fence ,
496+ bool stack )
538497{
539498 spin_lock_irq (& gt -> tlb_invalidation .lock );
540499 dma_fence_init (& fence -> base , & invalidation_fence_ops ,
541500 & gt -> tlb_invalidation .lock ,
542501 dma_fence_context_alloc (1 ), 1 );
543502 spin_unlock_irq (& gt -> tlb_invalidation .lock );
544503 INIT_LIST_HEAD (& fence -> link );
545- dma_fence_get (& fence -> base );
504+ if (stack )
505+ set_bit (FENCE_STACK_BIT , & fence -> base .flags );
506+ else
507+ dma_fence_get (& fence -> base );
546508}
0 commit comments