@@ -4294,6 +4294,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
42944294 mutex_init (& adev -> gfx .reset_sem_mutex );
42954295 /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
42964296 mutex_init (& adev -> enforce_isolation_mutex );
4297+ for (i = 0 ; i < MAX_XCP ; ++ i ) {
4298+ adev -> isolation [i ].spearhead = dma_fence_get_stub ();
4299+ amdgpu_sync_create (& adev -> isolation [i ].active );
4300+ amdgpu_sync_create (& adev -> isolation [i ].prev );
4301+ }
42974302 mutex_init (& adev -> gfx .kfd_sch_mutex );
42984303 mutex_init (& adev -> gfx .workload_profile_mutex );
42994304 mutex_init (& adev -> vcn .workload_profile_mutex );
@@ -4799,14 +4804,19 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
47994804
48004805void amdgpu_device_fini_sw (struct amdgpu_device * adev )
48014806{
4802- int idx ;
4807+ int i , idx ;
48034808 bool px ;
48044809
48054810 amdgpu_device_ip_fini (adev );
48064811 amdgpu_fence_driver_sw_fini (adev );
48074812 amdgpu_ucode_release (& adev -> firmware .gpu_info_fw );
48084813 adev -> accel_working = false;
48094814 dma_fence_put (rcu_dereference_protected (adev -> gang_submit , true));
4815+ for (i = 0 ; i < MAX_XCP ; ++ i ) {
4816+ dma_fence_put (adev -> isolation [i ].spearhead );
4817+ amdgpu_sync_free (& adev -> isolation [i ].active );
4818+ amdgpu_sync_free (& adev -> isolation [i ].prev );
4819+ }
48104820
48114821 amdgpu_reset_fini (adev );
48124822
@@ -6953,6 +6963,92 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
69536963 return NULL ;
69546964}
69556965
6966+ /**
6967+ * amdgpu_device_enforce_isolation - enforce HW isolation
6968+ * @adev: the amdgpu device pointer
6969+ * @ring: the HW ring the job is supposed to run on
6970+ * @job: the job which is about to be pushed to the HW ring
6971+ *
6972+ * Makes sure that only one client at a time can use the GFX block.
6973+ * Returns: The dependency to wait on before the job can be pushed to the HW.
6974+ * The function is called multiple times until NULL is returned.
6975+ */
6976+ struct dma_fence * amdgpu_device_enforce_isolation (struct amdgpu_device * adev ,
6977+ struct amdgpu_ring * ring ,
6978+ struct amdgpu_job * job )
6979+ {
6980+ struct amdgpu_isolation * isolation = & adev -> isolation [ring -> xcp_id ];
6981+ struct drm_sched_fence * f = job -> base .s_fence ;
6982+ struct dma_fence * dep ;
6983+ void * owner ;
6984+ int r ;
6985+
6986+ /*
6987+ * For now enforce isolation only for the GFX block since we only need
6988+ * the cleaner shader on those rings.
6989+ */
6990+ if (ring -> funcs -> type != AMDGPU_RING_TYPE_GFX &&
6991+ ring -> funcs -> type != AMDGPU_RING_TYPE_COMPUTE )
6992+ return NULL ;
6993+
6994+ /*
6995+ * All submissions where enforce isolation is false are handled as if
6996+ * they come from a single client. Use ~0l as the owner to distinct it
6997+ * from kernel submissions where the owner is NULL.
6998+ */
6999+ owner = job -> enforce_isolation ? f -> owner : (void * )~0l ;
7000+
7001+ mutex_lock (& adev -> enforce_isolation_mutex );
7002+
7003+ /*
7004+ * The "spearhead" submission is the first one which changes the
7005+ * ownership to its client. We always need to wait for it to be
7006+ * pushed to the HW before proceeding with anything.
7007+ */
7008+ if (& f -> scheduled != isolation -> spearhead &&
7009+ !dma_fence_is_signaled (isolation -> spearhead )) {
7010+ dep = isolation -> spearhead ;
7011+ goto out_grab_ref ;
7012+ }
7013+
7014+ if (isolation -> owner != owner ) {
7015+
7016+ /*
7017+ * Wait for any gang to be assembled before switching to a
7018+ * different owner or otherwise we could deadlock the
7019+ * submissions.
7020+ */
7021+ if (!job -> gang_submit ) {
7022+ dep = amdgpu_device_get_gang (adev );
7023+ if (!dma_fence_is_signaled (dep ))
7024+ goto out_return_dep ;
7025+ dma_fence_put (dep );
7026+ }
7027+
7028+ dma_fence_put (isolation -> spearhead );
7029+ isolation -> spearhead = dma_fence_get (& f -> scheduled );
7030+ amdgpu_sync_move (& isolation -> active , & isolation -> prev );
7031+ isolation -> owner = owner ;
7032+ }
7033+
7034+ /*
7035+ * Specifying the ring here helps to pipeline submissions even when
7036+ * isolation is enabled. If that is not desired for testing NULL can be
7037+ * used instead of the ring to enforce a CPU round trip while switching
7038+ * between clients.
7039+ */
7040+ dep = amdgpu_sync_peek_fence (& isolation -> prev , ring );
7041+ r = amdgpu_sync_fence (& isolation -> active , & f -> finished , GFP_NOWAIT );
7042+ if (r )
7043+ DRM_WARN ("OOM tracking isolation\n" );
7044+
7045+ out_grab_ref :
7046+ dma_fence_get (dep );
7047+ out_return_dep :
7048+ mutex_unlock (& adev -> enforce_isolation_mutex );
7049+ return dep ;
7050+ }
7051+
69567052bool amdgpu_device_has_display_hardware (struct amdgpu_device * adev )
69577053{
69587054 switch (adev -> asic_type ) {
0 commit comments