@@ -266,6 +266,32 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
266266 spin_unlock (& vm_bo -> vm -> status_lock );
267267}
268268
269+ /**
270+ * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
271+ * @vm: the VM which state machine to reset
272+ *
273+ * Move all vm_bo object in the VM into a state where they will be updated
274+ * again during validation.
275+ */
276+ static void amdgpu_vm_bo_reset_state_machine (struct amdgpu_vm * vm )
277+ {
278+ struct amdgpu_vm_bo_base * vm_bo , * tmp ;
279+
280+ spin_lock (& vm -> status_lock );
281+ list_splice_init (& vm -> done , & vm -> invalidated );
282+ list_for_each_entry (vm_bo , & vm -> invalidated , vm_status )
283+ vm_bo -> moved = true;
284+ list_for_each_entry_safe (vm_bo , tmp , & vm -> idle , vm_status ) {
285+ struct amdgpu_bo * bo = vm_bo -> bo ;
286+
287+ if (!bo || bo -> tbo .type != ttm_bo_type_kernel )
288+ list_move (& vm_bo -> vm_status , & vm_bo -> vm -> moved );
289+ else if (bo -> parent )
290+ list_move (& vm_bo -> vm_status , & vm_bo -> vm -> relocated );
291+ }
292+ spin_unlock (& vm -> status_lock );
293+ }
294+
269295/**
270296 * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
271297 *
@@ -351,6 +377,34 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
351377 spin_unlock (& adev -> mman .bdev .lru_lock );
352378}
353379
380+ /* Create scheduler entities for page table updates */
381+ static int amdgpu_vm_init_entities (struct amdgpu_device * adev ,
382+ struct amdgpu_vm * vm )
383+ {
384+ int r ;
385+
386+ r = drm_sched_entity_init (& vm -> immediate , DRM_SCHED_PRIORITY_NORMAL ,
387+ adev -> vm_manager .vm_pte_scheds ,
388+ adev -> vm_manager .vm_pte_num_scheds , NULL );
389+ if (r )
390+ goto error ;
391+
392+ return drm_sched_entity_init (& vm -> delayed , DRM_SCHED_PRIORITY_NORMAL ,
393+ adev -> vm_manager .vm_pte_scheds ,
394+ adev -> vm_manager .vm_pte_num_scheds , NULL );
395+
396+ error :
397+ drm_sched_entity_destroy (& vm -> immediate );
398+ return r ;
399+ }
400+
401+ /* Destroy the entities for page table updates again */
402+ static void amdgpu_vm_fini_entities (struct amdgpu_vm * vm )
403+ {
404+ drm_sched_entity_destroy (& vm -> immediate );
405+ drm_sched_entity_destroy (& vm -> delayed );
406+ }
407+
354408/**
355409 * amdgpu_vm_validate_pt_bos - validate the page table BOs
356410 *
@@ -373,6 +427,14 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
373427 struct amdgpu_bo * bo ;
374428 int r ;
375429
430+ if (drm_sched_entity_error (& vm -> delayed )) {
431+ amdgpu_vm_bo_reset_state_machine (vm );
432+ amdgpu_vm_fini_entities (vm );
433+ r = amdgpu_vm_init_entities (adev , vm );
434+ if (r )
435+ return r ;
436+ }
437+
376438 spin_lock (& vm -> status_lock );
377439 while (!list_empty (& vm -> evicted )) {
378440 bo_base = list_first_entry (& vm -> evicted ,
@@ -2048,19 +2110,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
20482110 INIT_LIST_HEAD (& vm -> pt_freed );
20492111 INIT_WORK (& vm -> pt_free_work , amdgpu_vm_pt_free_work );
20502112
2051- /* create scheduler entities for page table updates */
2052- r = drm_sched_entity_init (& vm -> immediate , DRM_SCHED_PRIORITY_NORMAL ,
2053- adev -> vm_manager .vm_pte_scheds ,
2054- adev -> vm_manager .vm_pte_num_scheds , NULL );
2113+ r = amdgpu_vm_init_entities (adev , vm );
20552114 if (r )
20562115 return r ;
20572116
2058- r = drm_sched_entity_init (& vm -> delayed , DRM_SCHED_PRIORITY_NORMAL ,
2059- adev -> vm_manager .vm_pte_scheds ,
2060- adev -> vm_manager .vm_pte_num_scheds , NULL );
2061- if (r )
2062- goto error_free_immediate ;
2063-
20642117 vm -> pte_support_ats = false;
20652118 vm -> is_compute_context = false;
20662119
@@ -2121,10 +2174,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
21212174error_free_delayed :
21222175 dma_fence_put (vm -> last_tlb_flush );
21232176 dma_fence_put (vm -> last_unlocked );
2124- drm_sched_entity_destroy (& vm -> delayed );
2125-
2126- error_free_immediate :
2127- drm_sched_entity_destroy (& vm -> immediate );
2177+ amdgpu_vm_fini_entities (vm );
21282178
21292179 return r ;
21302180}
@@ -2277,8 +2327,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
22772327 amdgpu_bo_unref (& root );
22782328 WARN_ON (vm -> root .bo );
22792329
2280- drm_sched_entity_destroy (& vm -> immediate );
2281- drm_sched_entity_destroy (& vm -> delayed );
2330+ amdgpu_vm_fini_entities (vm );
22822331
22832332 if (!RB_EMPTY_ROOT (& vm -> va .rb_root )) {
22842333 dev_err (adev -> dev , "still active bo inside vm\n" );
0 commit comments