3131#include "soc15d.h"
3232#include "soc15_hw_ip.h"
3333#include "vcn_v2_0.h"
34+ #include "mmsch_v4_0_3.h"
3435
3536#include "vcn/vcn_4_0_3_offset.h"
3637#include "vcn/vcn_4_0_3_sh_mask.h"
4445#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
4546#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
4647
48+ static int vcn_v4_0_3_start_sriov (struct amdgpu_device * adev );
4749static void vcn_v4_0_3_set_unified_ring_funcs (struct amdgpu_device * adev );
4850static void vcn_v4_0_3_set_irq_funcs (struct amdgpu_device * adev );
4951static int vcn_v4_0_3_set_powergating_state (void * handle ,
@@ -137,6 +139,12 @@ static int vcn_v4_0_3_sw_init(void *handle)
137139 amdgpu_vcn_fwlog_init (& adev -> vcn .inst [i ]);
138140 }
139141
142+ if (amdgpu_sriov_vf (adev )) {
143+ r = amdgpu_virt_alloc_mm_table (adev );
144+ if (r )
145+ return r ;
146+ }
147+
140148 if (adev -> pg_flags & AMD_PG_SUPPORT_VCN_DPG )
141149 adev -> vcn .pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode ;
142150
@@ -174,6 +182,9 @@ static int vcn_v4_0_3_sw_fini(void *handle)
174182 drm_dev_exit (idx );
175183 }
176184
185+ if (amdgpu_sriov_vf (adev ))
186+ amdgpu_virt_free_mm_table (adev );
187+
177188 r = amdgpu_vcn_suspend (adev );
178189 if (r )
179190 return r ;
@@ -196,33 +207,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
196207 struct amdgpu_ring * ring ;
197208 int i , r , vcn_inst ;
198209
199- for (i = 0 ; i < adev -> vcn .num_vcn_inst ; ++ i ) {
200- vcn_inst = GET_INST (VCN , i );
201- ring = & adev -> vcn .inst [i ].ring_enc [0 ];
210+ if (amdgpu_sriov_vf (adev )) {
211+ r = vcn_v4_0_3_start_sriov (adev );
212+ if (r )
213+ goto done ;
202214
203- if (ring -> use_doorbell ) {
204- adev -> nbio .funcs -> vcn_doorbell_range (
205- adev , ring -> use_doorbell ,
206- (adev -> doorbell_index .vcn .vcn_ring0_1 << 1 ) +
207- 9 * vcn_inst ,
208- adev -> vcn .inst [i ].aid_id );
209-
210- WREG32_SOC15 (
211- VCN , GET_INST (VCN , ring -> me ),
212- regVCN_RB1_DB_CTRL ,
213- ring -> doorbell_index
214- << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
215- VCN_RB1_DB_CTRL__EN_MASK );
216-
217- /* Read DB_CTRL to flush the write DB_CTRL command. */
218- RREG32_SOC15 (
219- VCN , GET_INST (VCN , ring -> me ),
220- regVCN_RB1_DB_CTRL );
215+ for (i = 0 ; i < adev -> vcn .num_vcn_inst ; ++ i ) {
216+ ring = & adev -> vcn .inst [i ].ring_enc [0 ];
217+ ring -> wptr = 0 ;
218+ ring -> wptr_old = 0 ;
219+ vcn_v4_0_3_unified_ring_set_wptr (ring );
220+ ring -> sched .ready = true;
221221 }
222+ } else {
223+ for (i = 0 ; i < adev -> vcn .num_vcn_inst ; ++ i ) {
224+ vcn_inst = GET_INST (VCN , i );
225+ ring = & adev -> vcn .inst [i ].ring_enc [0 ];
226+
227+ if (ring -> use_doorbell ) {
228+ adev -> nbio .funcs -> vcn_doorbell_range (
229+ adev , ring -> use_doorbell ,
230+ (adev -> doorbell_index .vcn .vcn_ring0_1 << 1 ) +
231+ 9 * vcn_inst ,
232+ adev -> vcn .inst [i ].aid_id );
233+
234+ WREG32_SOC15 (
235+ VCN , GET_INST (VCN , ring -> me ),
236+ regVCN_RB1_DB_CTRL ,
237+ ring -> doorbell_index
238+ << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
239+ VCN_RB1_DB_CTRL__EN_MASK );
240+
241+ /* Read DB_CTRL to flush the write DB_CTRL command. */
242+ RREG32_SOC15 (
243+ VCN , GET_INST (VCN , ring -> me ),
244+ regVCN_RB1_DB_CTRL );
245+ }
222246
223- r = amdgpu_ring_test_helper (ring );
224- if (r )
225- goto done ;
247+ r = amdgpu_ring_test_helper (ring );
248+ if (r )
249+ goto done ;
250+ }
226251 }
227252
228253done :
@@ -820,6 +845,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
820845 return 0 ;
821846}
822847
848+ static int vcn_v4_0_3_start_sriov (struct amdgpu_device * adev )
849+ {
850+ int i , vcn_inst ;
851+ struct amdgpu_ring * ring_enc ;
852+ uint64_t cache_addr ;
853+ uint64_t rb_enc_addr ;
854+ uint64_t ctx_addr ;
855+ uint32_t param , resp , expected ;
856+ uint32_t offset , cache_size ;
857+ uint32_t tmp , timeout ;
858+
859+ struct amdgpu_mm_table * table = & adev -> virt .mm_table ;
860+ uint32_t * table_loc ;
861+ uint32_t table_size ;
862+ uint32_t size , size_dw ;
863+ uint32_t init_status ;
864+ uint32_t enabled_vcn ;
865+
866+ struct mmsch_v4_0_cmd_direct_write
867+ direct_wt = { {0 } };
868+ struct mmsch_v4_0_cmd_direct_read_modify_write
869+ direct_rd_mod_wt = { {0 } };
870+ struct mmsch_v4_0_cmd_end end = { {0 } };
871+ struct mmsch_v4_0_3_init_header header ;
872+
873+ volatile struct amdgpu_vcn4_fw_shared * fw_shared ;
874+ volatile struct amdgpu_fw_shared_rb_setup * rb_setup ;
875+
876+ direct_wt .cmd_header .command_type =
877+ MMSCH_COMMAND__DIRECT_REG_WRITE ;
878+ direct_rd_mod_wt .cmd_header .command_type =
879+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE ;
880+ end .cmd_header .command_type = MMSCH_COMMAND__END ;
881+
882+ for (i = 0 ; i < adev -> vcn .num_vcn_inst ; i ++ ) {
883+ vcn_inst = GET_INST (VCN , i );
884+
885+ memset (& header , 0 , sizeof (struct mmsch_v4_0_3_init_header ));
886+ header .version = MMSCH_VERSION ;
887+ header .total_size = sizeof (struct mmsch_v4_0_3_init_header ) >> 2 ;
888+
889+ table_loc = (uint32_t * )table -> cpu_addr ;
890+ table_loc += header .total_size ;
891+
892+ table_size = 0 ;
893+
894+ MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT (SOC15_REG_OFFSET (VCN , 0 , regUVD_STATUS ),
895+ ~UVD_STATUS__UVD_BUSY , UVD_STATUS__UVD_BUSY );
896+
897+ cache_size = AMDGPU_GPU_PAGE_ALIGN (adev -> vcn .fw -> size + 4 );
898+
899+ if (adev -> firmware .load_type == AMDGPU_FW_LOAD_PSP ) {
900+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
901+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW ),
902+ adev -> firmware .ucode [AMDGPU_UCODE_ID_VCN + i ].tmr_mc_addr_lo );
903+
904+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
905+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH ),
906+ adev -> firmware .ucode [AMDGPU_UCODE_ID_VCN + i ].tmr_mc_addr_hi );
907+
908+ offset = 0 ;
909+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
910+ regUVD_VCPU_CACHE_OFFSET0 ), 0 );
911+ } else {
912+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
913+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW ),
914+ lower_32_bits (adev -> vcn .inst [i ].gpu_addr ));
915+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
916+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH ),
917+ upper_32_bits (adev -> vcn .inst [i ].gpu_addr ));
918+ offset = cache_size ;
919+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
920+ regUVD_VCPU_CACHE_OFFSET0 ),
921+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3 );
922+ }
923+
924+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
925+ regUVD_VCPU_CACHE_SIZE0 ),
926+ cache_size );
927+
928+ cache_addr = adev -> vcn .inst [vcn_inst ].gpu_addr + offset ;
929+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
930+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW ), lower_32_bits (cache_addr ));
931+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
932+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH ), upper_32_bits (cache_addr ));
933+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
934+ regUVD_VCPU_CACHE_OFFSET1 ), 0 );
935+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
936+ regUVD_VCPU_CACHE_SIZE1 ), AMDGPU_VCN_STACK_SIZE );
937+
938+ cache_addr = adev -> vcn .inst [vcn_inst ].gpu_addr + offset +
939+ AMDGPU_VCN_STACK_SIZE ;
940+
941+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
942+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW ), lower_32_bits (cache_addr ));
943+
944+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
945+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH ), upper_32_bits (cache_addr ));
946+
947+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
948+ regUVD_VCPU_CACHE_OFFSET2 ), 0 );
949+
950+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
951+ regUVD_VCPU_CACHE_SIZE2 ), AMDGPU_VCN_CONTEXT_SIZE );
952+
953+ fw_shared = adev -> vcn .inst [vcn_inst ].fw_shared .cpu_addr ;
954+ rb_setup = & fw_shared -> rb_setup ;
955+
956+ ring_enc = & adev -> vcn .inst [vcn_inst ].ring_enc [0 ];
957+ ring_enc -> wptr = 0 ;
958+ rb_enc_addr = ring_enc -> gpu_addr ;
959+
960+ rb_setup -> is_rb_enabled_flags |= RB_ENABLED ;
961+ rb_setup -> rb_addr_lo = lower_32_bits (rb_enc_addr );
962+ rb_setup -> rb_addr_hi = upper_32_bits (rb_enc_addr );
963+ rb_setup -> rb_size = ring_enc -> ring_size / 4 ;
964+ fw_shared -> present_flag_0 |= cpu_to_le32 (AMDGPU_VCN_VF_RB_SETUP_FLAG );
965+
966+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
967+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW ),
968+ lower_32_bits (adev -> vcn .inst [vcn_inst ].fw_shared .gpu_addr ));
969+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
970+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH ),
971+ upper_32_bits (adev -> vcn .inst [vcn_inst ].fw_shared .gpu_addr ));
972+ MMSCH_V4_0_INSERT_DIRECT_WT (SOC15_REG_OFFSET (VCN , 0 ,
973+ regUVD_VCPU_NONCACHE_SIZE0 ),
974+ AMDGPU_GPU_PAGE_ALIGN (sizeof (struct amdgpu_vcn4_fw_shared )));
975+ MMSCH_V4_0_INSERT_END ();
976+
977+ header .vcn0 .init_status = 0 ;
978+ header .vcn0 .table_offset = header .total_size ;
979+ header .vcn0 .table_size = table_size ;
980+ header .total_size += table_size ;
981+
982+ /* Send init table to mmsch */
983+ size = sizeof (struct mmsch_v4_0_3_init_header );
984+ table_loc = (uint32_t * )table -> cpu_addr ;
985+ memcpy ((void * )table_loc , & header , size );
986+
987+ ctx_addr = table -> gpu_addr ;
988+ WREG32_SOC15 (VCN , vcn_inst , regMMSCH_VF_CTX_ADDR_LO , lower_32_bits (ctx_addr ));
989+ WREG32_SOC15 (VCN , vcn_inst , regMMSCH_VF_CTX_ADDR_HI , upper_32_bits (ctx_addr ));
990+
991+ tmp = RREG32_SOC15 (VCN , vcn_inst , regMMSCH_VF_VMID );
992+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK ;
993+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT );
994+ WREG32_SOC15 (VCN , vcn_inst , regMMSCH_VF_VMID , tmp );
995+
996+ size = header .total_size ;
997+ WREG32_SOC15 (VCN , vcn_inst , regMMSCH_VF_CTX_SIZE , size );
998+
999+ WREG32_SOC15 (VCN , vcn_inst , regMMSCH_VF_MAILBOX_RESP , 0 );
1000+
1001+ param = 0x00000001 ;
1002+ WREG32_SOC15 (VCN , vcn_inst , regMMSCH_VF_MAILBOX_HOST , param );
1003+ tmp = 0 ;
1004+ timeout = 1000 ;
1005+ resp = 0 ;
1006+ expected = MMSCH_VF_MAILBOX_RESP__OK ;
1007+ while (resp != expected ) {
1008+ resp = RREG32_SOC15 (VCN , vcn_inst , regMMSCH_VF_MAILBOX_RESP );
1009+ if (resp != 0 )
1010+ break ;
1011+
1012+ udelay (10 );
1013+ tmp = tmp + 10 ;
1014+ if (tmp >= timeout ) {
1015+ DRM_ERROR ("failed to init MMSCH. TIME-OUT after %d usec" \
1016+ " waiting for regMMSCH_VF_MAILBOX_RESP " \
1017+ "(expected=0x%08x, readback=0x%08x)\n" ,
1018+ tmp , expected , resp );
1019+ return - EBUSY ;
1020+ }
1021+ }
1022+
1023+ enabled_vcn = amdgpu_vcn_is_disabled_vcn (adev , VCN_DECODE_RING , 0 ) ? 1 : 0 ;
1024+ init_status = ((struct mmsch_v4_0_3_init_header * )(table_loc ))-> vcn0 .init_status ;
1025+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
1026+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS ) {
1027+ DRM_ERROR ("MMSCH init status is incorrect! readback=0x%08x, header init " \
1028+ "status for VCN%x: 0x%x\n" , resp , enabled_vcn , init_status );
1029+ }
1030+ }
1031+
1032+ return 0 ;
1033+ }
1034+
8231035/**
8241036 * vcn_v4_0_3_start - VCN start
8251037 *
0 commit comments