@@ -706,108 +706,106 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
706706 return ret ;
707707}
708708
709+ static int amdgpu_userq_validate_vm (void * param , struct amdgpu_bo * bo )
710+ {
711+ struct ttm_operation_ctx ctx = { false, false };
712+
713+ amdgpu_bo_placement_from_domain (bo , bo -> allowed_domains );
714+ return ttm_bo_validate (& bo -> tbo , & bo -> placement , & ctx );
715+ }
716+
717+ /* Handle all BOs on the invalidated list, validate them and update the PTs */
709718static int
710- amdgpu_userq_validate_vm_bo (void * _unused , struct amdgpu_bo * bo )
719+ amdgpu_userq_bo_validate (struct amdgpu_device * adev , struct drm_exec * exec ,
720+ struct amdgpu_vm * vm )
711721{
712722 struct ttm_operation_ctx ctx = { false, false };
723+ struct amdgpu_bo_va * bo_va ;
724+ struct amdgpu_bo * bo ;
713725 int ret ;
714726
715- amdgpu_bo_placement_from_domain (bo , bo -> allowed_domains );
727+ spin_lock (& vm -> status_lock );
728+ while (!list_empty (& vm -> invalidated )) {
729+ bo_va = list_first_entry (& vm -> invalidated ,
730+ struct amdgpu_bo_va ,
731+ base .vm_status );
732+ spin_unlock (& vm -> status_lock );
716733
717- ret = ttm_bo_validate (& bo -> tbo , & bo -> placement , & ctx );
718- if (ret )
719- DRM_ERROR ("Fail to validate\n" );
734+ bo = bo_va -> base .bo ;
735+ ret = drm_exec_prepare_obj (exec , & bo -> tbo .base , 2 );
736+ if (unlikely (ret ))
737+ return ret ;
720738
721- return ret ;
739+ amdgpu_bo_placement_from_domain (bo , bo -> allowed_domains );
740+ ret = ttm_bo_validate (& bo -> tbo , & bo -> placement , & ctx );
741+ if (ret )
742+ return ret ;
743+
744+ /* This moves the bo_va to the done list */
745+ ret = amdgpu_vm_bo_update (adev , bo_va , false);
746+ if (ret )
747+ return ret ;
748+
749+ spin_lock (& vm -> status_lock );
750+ }
751+ spin_unlock (& vm -> status_lock );
752+
753+ return 0 ;
722754}
723755
756+ /* Make sure the whole VM is ready to be used */
724757static int
725- amdgpu_userq_validate_bos (struct amdgpu_userq_mgr * uq_mgr )
758+ amdgpu_userq_vm_validate (struct amdgpu_userq_mgr * uq_mgr )
726759{
727760 struct amdgpu_fpriv * fpriv = uq_mgr_to_fpriv (uq_mgr );
728- struct amdgpu_vm * vm = & fpriv -> vm ;
729761 struct amdgpu_device * adev = uq_mgr -> adev ;
762+ struct amdgpu_vm * vm = & fpriv -> vm ;
730763 struct amdgpu_bo_va * bo_va ;
731- struct ww_acquire_ctx * ticket ;
732764 struct drm_exec exec ;
733- struct amdgpu_bo * bo ;
734- struct dma_resv * resv ;
735- bool clear , unlock ;
736- int ret = 0 ;
765+ int ret ;
737766
738767 drm_exec_init (& exec , DRM_EXEC_IGNORE_DUPLICATES , 0 );
739768 drm_exec_until_all_locked (& exec ) {
740- ret = amdgpu_vm_lock_pd (vm , & exec , 2 );
769+ ret = amdgpu_vm_lock_pd (vm , & exec , 1 );
741770 drm_exec_retry_on_contention (& exec );
742- if (unlikely (ret )) {
743- drm_file_err (uq_mgr -> file , "Failed to lock PD\n" );
771+ if (unlikely (ret ))
744772 goto unlock_all ;
745- }
746-
747- /* Lock the done list */
748- list_for_each_entry (bo_va , & vm -> done , base .vm_status ) {
749- bo = bo_va -> base .bo ;
750- if (!bo )
751- continue ;
752773
753- ret = drm_exec_lock_obj (& exec , & bo -> tbo .base );
754- drm_exec_retry_on_contention (& exec );
755- if (unlikely (ret ))
756- goto unlock_all ;
757- }
758- }
759-
760- spin_lock (& vm -> status_lock );
761- while (!list_empty (& vm -> moved )) {
762- bo_va = list_first_entry (& vm -> moved , struct amdgpu_bo_va ,
763- base .vm_status );
764- spin_unlock (& vm -> status_lock );
765-
766- /* Per VM BOs never need to bo cleared in the page tables */
767- ret = amdgpu_vm_bo_update (adev , bo_va , false);
768- if (ret )
774+ ret = amdgpu_vm_lock_done_list (vm , & exec , 1 );
775+ drm_exec_retry_on_contention (& exec );
776+ if (unlikely (ret ))
769777 goto unlock_all ;
770- spin_lock (& vm -> status_lock );
771- }
772-
773- ticket = & exec .ticket ;
774- while (!list_empty (& vm -> invalidated )) {
775- bo_va = list_first_entry (& vm -> invalidated , struct amdgpu_bo_va ,
776- base .vm_status );
777- resv = bo_va -> base .bo -> tbo .base .resv ;
778- spin_unlock (& vm -> status_lock );
779778
780- bo = bo_va -> base .bo ;
781- ret = amdgpu_userq_validate_vm_bo (NULL , bo );
782- if (ret ) {
783- drm_file_err (uq_mgr -> file , "Failed to validate BO\n" );
779+ /* This validates PDs, PTs and per VM BOs */
780+ ret = amdgpu_vm_validate (adev , vm , NULL ,
781+ amdgpu_userq_validate_vm ,
782+ NULL );
783+ if (unlikely (ret ))
784784 goto unlock_all ;
785- }
786785
787- /* Try to reserve the BO to avoid clearing its ptes */
788- if (!adev -> debug_vm && dma_resv_trylock (resv )) {
789- clear = false;
790- unlock = true;
791- /* The caller is already holding the reservation lock */
792- } else if (dma_resv_locking_ctx (resv ) == ticket ) {
793- clear = false;
794- unlock = false;
795- /* Somebody else is using the BO right now */
796- } else {
797- clear = true;
798- unlock = false;
799- }
786+ /* This locks and validates the remaining evicted BOs */
787+ ret = amdgpu_userq_bo_validate (adev , & exec , vm );
788+ drm_exec_retry_on_contention (& exec );
789+ if (unlikely (ret ))
790+ goto unlock_all ;
791+ }
800792
801- ret = amdgpu_vm_bo_update (adev , bo_va , clear );
793+ ret = amdgpu_vm_handle_moved (adev , vm , NULL );
794+ if (ret )
795+ goto unlock_all ;
802796
803- if (unlock )
804- dma_resv_unlock (resv );
805- if (ret )
806- goto unlock_all ;
797+ ret = amdgpu_vm_update_pdes (adev , vm , false);
798+ if (ret )
799+ goto unlock_all ;
807800
808- spin_lock (& vm -> status_lock );
809- }
810- spin_unlock (& vm -> status_lock );
801+ /*
802+ * We need to wait for all VM updates to finish before restarting the
803+ * queues. Using the done list like that is now ok since everything is
804+ * locked in place.
805+ */
806+ list_for_each_entry (bo_va , & vm -> done , base .vm_status )
807+ dma_fence_wait (bo_va -> last_pt_update , false);
808+ dma_fence_wait (vm -> last_update , false);
811809
812810 ret = amdgpu_eviction_fence_replace_fence (& fpriv -> evf_mgr , & exec );
813811 if (ret )
@@ -828,7 +826,7 @@ static void amdgpu_userq_restore_worker(struct work_struct *work)
828826
829827 mutex_lock (& uq_mgr -> userq_mutex );
830828
831- ret = amdgpu_userq_validate_bos (uq_mgr );
829+ ret = amdgpu_userq_vm_validate (uq_mgr );
832830 if (ret ) {
833831 drm_file_err (uq_mgr -> file , "Failed to validate BOs to restore\n" );
834832 goto unlock ;
0 commit comments