@@ -120,13 +120,15 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
120120 am_fence = kzalloc (sizeof (* am_fence ), GFP_KERNEL );
121121 if (!am_fence )
122122 return - ENOMEM ;
123+ am_fence -> context = 0 ;
123124 } else {
124125 am_fence = af ;
125126 }
126127 fence = & am_fence -> base ;
127128 am_fence -> ring = ring ;
128129
129130 seq = ++ ring -> fence_drv .sync_seq ;
131+ am_fence -> seq = seq ;
130132 if (af ) {
131133 dma_fence_init (fence , & amdgpu_job_fence_ops ,
132134 & ring -> fence_drv .lock ,
@@ -141,6 +143,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
141143
142144 amdgpu_ring_emit_fence (ring , ring -> fence_drv .gpu_addr ,
143145 seq , flags | AMDGPU_FENCE_FLAG_INT );
146+ amdgpu_fence_save_wptr (fence );
144147 pm_runtime_get_noresume (adev_to_drm (adev )-> dev );
145148 ptr = & ring -> fence_drv .fences [seq & ring -> fence_drv .num_fences_mask ];
146149 if (unlikely (rcu_dereference_protected (* ptr , 1 ))) {
@@ -253,6 +256,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
253256
254257 do {
255258 struct dma_fence * fence , * * ptr ;
259+ struct amdgpu_fence * am_fence ;
256260
257261 ++ last_seq ;
258262 last_seq &= drv -> num_fences_mask ;
@@ -265,6 +269,12 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
265269 if (!fence )
266270 continue ;
267271
272+ /* Save the wptr in the fence driver so we know what the last processed
273+ * wptr was. This is required for re-emitting the ring state for
274+ * queues that are reset but are not guilty and thus have no guilty fence.
275+ */
276+ am_fence = container_of (fence , struct amdgpu_fence , base );
277+ drv -> signalled_wptr = am_fence -> wptr ;
268278 dma_fence_signal (fence );
269279 dma_fence_put (fence );
270280 pm_runtime_mark_last_busy (adev_to_drm (adev )-> dev );
@@ -727,6 +737,86 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
727737 amdgpu_fence_process (ring );
728738}
729739
740+
741+ /**
742+ * Kernel queue reset handling
743+ *
744+ * The driver can reset individual queues for most engines, but those queues
745+ * may contain work from multiple contexts. Resetting the queue will reset
746+ * lose all of that state. In order to minimize the collateral damage, the
747+ * driver will save the ring contents which are not associated with the guilty
748+ * context prior to resetting the queue. After resetting the queue the queue
749+ * contents from the other contexts is re-emitted to the rings so that it can
750+ * be processed by the engine. To handle this, we save the queue's write
751+ * pointer (wptr) in the fences associated with each context. If we get a
752+ * queue timeout, we can then use the wptrs from the fences to determine
753+ * which data needs to be saved out of the queue's ring buffer.
754+ */
755+
756+ /**
757+ * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
758+ *
759+ * @fence: fence of the ring to signal
760+ *
761+ */
762+ void amdgpu_fence_driver_guilty_force_completion (struct amdgpu_fence * fence )
763+ {
764+ dma_fence_set_error (& fence -> base , - ETIME );
765+ amdgpu_fence_write (fence -> ring , fence -> seq );
766+ amdgpu_fence_process (fence -> ring );
767+ }
768+
769+ void amdgpu_fence_save_wptr (struct dma_fence * fence )
770+ {
771+ struct amdgpu_fence * am_fence = container_of (fence , struct amdgpu_fence , base );
772+
773+ am_fence -> wptr = am_fence -> ring -> wptr ;
774+ }
775+
776+ static void amdgpu_ring_backup_unprocessed_command (struct amdgpu_ring * ring ,
777+ u64 start_wptr , u32 end_wptr )
778+ {
779+ unsigned int first_idx = start_wptr & ring -> buf_mask ;
780+ unsigned int last_idx = end_wptr & ring -> buf_mask ;
781+ unsigned int i ;
782+
783+ /* Backup the contents of the ring buffer. */
784+ for (i = first_idx ; i != last_idx ; ++ i , i &= ring -> buf_mask )
785+ ring -> ring_backup [ring -> ring_backup_entries_to_copy ++ ] = ring -> ring [i ];
786+ }
787+
788+ void amdgpu_ring_backup_unprocessed_commands (struct amdgpu_ring * ring ,
789+ struct amdgpu_fence * guilty_fence )
790+ {
791+ struct dma_fence * unprocessed ;
792+ struct dma_fence __rcu * * ptr ;
793+ struct amdgpu_fence * fence ;
794+ u64 wptr , i , seqno ;
795+
796+ seqno = amdgpu_fence_read (ring );
797+ wptr = ring -> fence_drv .signalled_wptr ;
798+ ring -> ring_backup_entries_to_copy = 0 ;
799+
800+ for (i = seqno + 1 ; i <= ring -> fence_drv .sync_seq ; ++ i ) {
801+ ptr = & ring -> fence_drv .fences [i & ring -> fence_drv .num_fences_mask ];
802+ rcu_read_lock ();
803+ unprocessed = rcu_dereference (* ptr );
804+
805+ if (unprocessed && !dma_fence_is_signaled (unprocessed )) {
806+ fence = container_of (unprocessed , struct amdgpu_fence , base );
807+
808+ /* save everything if the ring is not guilty, otherwise
809+ * just save the content from other contexts.
810+ */
811+ if (!guilty_fence || (fence -> context != guilty_fence -> context ))
812+ amdgpu_ring_backup_unprocessed_command (ring , wptr ,
813+ fence -> wptr );
814+ wptr = fence -> wptr ;
815+ }
816+ rcu_read_unlock ();
817+ }
818+ }
819+
730820/*
731821 * Common fence implementation
732822 */
0 commit comments