@@ -5461,6 +5461,11 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
54615461 amdgpu_ring_write (ring , PACKET3 (PACKET3_PFP_SYNC_ME , 0 ));
54625462 amdgpu_ring_write (ring , 0x0 );
54635463 }
5464+
5465+ /* Make sure that we can't skip the SET_Q_MODE packets when the VM
5466+ * changed in any way.
5467+ */
5468+ ring -> set_q_mode_ptr = NULL ;
54645469}
54655470
54665471static void gfx_v11_0_ring_emit_fence_kiq (struct amdgpu_ring * ring , u64 addr ,
@@ -5510,16 +5515,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
55105515 amdgpu_ring_write (ring , 0 );
55115516}
55125517
5518+ static unsigned gfx_v11_0_ring_emit_init_cond_exec (struct amdgpu_ring * ring ,
5519+ uint64_t addr )
5520+ {
5521+ unsigned ret ;
5522+
5523+ amdgpu_ring_write (ring , PACKET3 (PACKET3_COND_EXEC , 3 ));
5524+ amdgpu_ring_write (ring , lower_32_bits (addr ));
5525+ amdgpu_ring_write (ring , upper_32_bits (addr ));
5526+ /* discard following DWs if *cond_exec_gpu_addr==0 */
5527+ amdgpu_ring_write (ring , 0 );
5528+ ret = ring -> wptr & ring -> buf_mask ;
5529+ /* patch dummy value later */
5530+ amdgpu_ring_write (ring , 0 );
5531+
5532+ return ret ;
5533+ }
5534+
55135535static void gfx_v11_0_ring_emit_gfx_shadow (struct amdgpu_ring * ring ,
55145536 u64 shadow_va , u64 csa_va ,
55155537 u64 gds_va , bool init_shadow ,
55165538 int vmid )
55175539{
55185540 struct amdgpu_device * adev = ring -> adev ;
5541+ unsigned int offs , end ;
55195542
5520- if (!adev -> gfx .cp_gfx_shadow )
5543+ if (!adev -> gfx .cp_gfx_shadow || ! ring -> ring_obj )
55215544 return ;
55225545
5546+ /*
5547+ * The logic here isn't easy to understand because we need to keep state
5548+ * accross multiple executions of the function as well as between the
5549+ * CPU and GPU. The general idea is that the newly written GPU command
5550+ * has a condition on the previous one and only executed if really
5551+ * necessary.
5552+ */
5553+
5554+ /*
5555+ * The dw in the NOP controls if the next SET_Q_MODE packet should be
5556+ * executed or not. Reserve 64bits just to be on the save side.
5557+ */
5558+ amdgpu_ring_write (ring , PACKET3 (PACKET3_NOP , 1 ));
5559+ offs = ring -> wptr & ring -> buf_mask ;
5560+
5561+ /*
5562+ * We start with skipping the prefix SET_Q_MODE and always executing
5563+ * the postfix SET_Q_MODE packet. This is changed below with a
5564+ * WRITE_DATA command when the postfix executed.
5565+ */
5566+ amdgpu_ring_write (ring , shadow_va ? 1 : 0 );
5567+ amdgpu_ring_write (ring , 0 );
5568+
5569+ if (ring -> set_q_mode_offs ) {
5570+ uint64_t addr ;
5571+
5572+ addr = amdgpu_bo_gpu_offset (ring -> ring_obj );
5573+ addr += ring -> set_q_mode_offs << 2 ;
5574+ end = gfx_v11_0_ring_emit_init_cond_exec (ring , addr );
5575+ }
5576+
5577+ /*
5578+ * When the postfix SET_Q_MODE packet executes we need to make sure that the
5579+ * next prefix SET_Q_MODE packet executes as well.
5580+ */
5581+ if (!shadow_va ) {
5582+ uint64_t addr ;
5583+
5584+ addr = amdgpu_bo_gpu_offset (ring -> ring_obj );
5585+ addr += offs << 2 ;
5586+ amdgpu_ring_write (ring , PACKET3 (PACKET3_WRITE_DATA , 3 ));
5587+ amdgpu_ring_write (ring , WRITE_DATA_DST_SEL (5 ) | WR_CONFIRM );
5588+ amdgpu_ring_write (ring , lower_32_bits (addr ));
5589+ amdgpu_ring_write (ring , upper_32_bits (addr ));
5590+ amdgpu_ring_write (ring , 0x1 );
5591+ }
5592+
55235593 amdgpu_ring_write (ring , PACKET3 (PACKET3_SET_Q_PREEMPTION_MODE , 7 ));
55245594 amdgpu_ring_write (ring , lower_32_bits (shadow_va ));
55255595 amdgpu_ring_write (ring , upper_32_bits (shadow_va ));
@@ -5531,23 +5601,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
55315601 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID (vmid ) : 0 );
55325602 amdgpu_ring_write (ring , init_shadow ?
55335603 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0 );
5534- }
55355604
5536- static unsigned gfx_v11_0_ring_emit_init_cond_exec (struct amdgpu_ring * ring ,
5537- uint64_t addr )
5538- {
5539- unsigned ret ;
5605+ if (ring -> set_q_mode_offs )
5606+ amdgpu_ring_patch_cond_exec (ring , end );
55405607
5541- amdgpu_ring_write (ring , PACKET3 (PACKET3_COND_EXEC , 3 ));
5542- amdgpu_ring_write (ring , lower_32_bits (addr ));
5543- amdgpu_ring_write (ring , upper_32_bits (addr ));
5544- /* discard following DWs if *cond_exec_gpu_addr==0 */
5545- amdgpu_ring_write (ring , 0 );
5546- ret = ring -> wptr & ring -> buf_mask ;
5547- /* patch dummy value later */
5548- amdgpu_ring_write (ring , 0 );
5608+ if (shadow_va ) {
5609+ uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid ;
55495610
5550- return ret ;
5611+ /*
5612+ * If the tokens match try to skip the last postfix SET_Q_MODE
5613+ * packet to avoid saving/restoring the state all the time.
5614+ */
5615+ if (ring -> set_q_mode_ptr && ring -> set_q_mode_token == token )
5616+ * ring -> set_q_mode_ptr = 0 ;
5617+
5618+ ring -> set_q_mode_token = token ;
5619+ } else {
5620+ ring -> set_q_mode_ptr = & ring -> ring [ring -> set_q_mode_offs ];
5621+ }
5622+
5623+ ring -> set_q_mode_offs = offs ;
55515624}
55525625
55535626static int gfx_v11_0_ring_preempt_ib (struct amdgpu_ring * ring )
@@ -6114,7 +6187,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
61146187 .emit_frame_size = /* totally 247 maximum if 16 IBs */
61156188 5 + /* update_spm_vmid */
61166189 5 + /* COND_EXEC */
6117- 9 + /* SET_Q_PREEMPTION_MODE */
6190+ 22 + /* SET_Q_PREEMPTION_MODE */
61186191 7 + /* PIPELINE_SYNC */
61196192 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
61206193 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
@@ -6127,6 +6200,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
61276200 31 + /* DE_META */
61286201 3 + /* CNTX_CTRL */
61296202 5 + /* HDP_INVL */
6203+ 22 + /* SET_Q_PREEMPTION_MODE */
61306204 8 + 8 + /* FENCE x2 */
61316205 8 , /* gfx_v11_0_emit_mem_sync */
61326206 .emit_ib_size = 4 , /* gfx_v11_0_ring_emit_ib_gfx */
0 commit comments