2626#include "amdgpu.h"
2727#include "amdgpu_ucode.h"
2828#include "amdgpu_vpe.h"
29+ #include "amdgpu_smu.h"
2930#include "soc15_common.h"
3031#include "vpe_v6_1.h"
3132
3233#define AMDGPU_CSA_VPE_SIZE 64
3334/* VPE CSA resides in the 4th page of CSA */
3435#define AMDGPU_CSA_VPE_OFFSET (4096 * 3)
3536
37+ /* 1 second timeout */
38+ #define VPE_IDLE_TIMEOUT msecs_to_jiffies(1000)
39+
40+ #define VPE_MAX_DPM_LEVEL 4
41+ #define FIXED1_8_BITS_PER_FRACTIONAL_PART 8
42+ #define GET_PRATIO_INTEGER_PART (x ) ((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART)
43+
3644static void vpe_set_ring_funcs (struct amdgpu_device * adev );
3745
46+ static inline uint16_t div16_u16_rem (uint16_t dividend , uint16_t divisor , uint16_t * remainder )
47+ {
48+ * remainder = dividend % divisor ;
49+ return dividend / divisor ;
50+ }
51+
52+ static inline uint16_t complete_integer_division_u16 (
53+ uint16_t dividend ,
54+ uint16_t divisor ,
55+ uint16_t * remainder )
56+ {
57+ return div16_u16_rem (dividend , divisor , (uint16_t * )remainder );
58+ }
59+
60+ static uint16_t vpe_u1_8_from_fraction (uint16_t numerator , uint16_t denominator )
61+ {
62+ bool arg1_negative = numerator < 0 ;
63+ bool arg2_negative = denominator < 0 ;
64+
65+ uint16_t arg1_value = (uint16_t )(arg1_negative ? - numerator : numerator );
66+ uint16_t arg2_value = (uint16_t )(arg2_negative ? - denominator : denominator );
67+
68+ uint16_t remainder ;
69+
70+ /* determine integer part */
71+ uint16_t res_value = complete_integer_division_u16 (
72+ arg1_value , arg2_value , & remainder );
73+
74+ if (res_value > 127 /* CHAR_MAX */ )
75+ return 0 ;
76+
77+ /* determine fractional part */
78+ {
79+ unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART ;
80+
81+ do {
82+ remainder <<= 1 ;
83+
84+ res_value <<= 1 ;
85+
86+ if (remainder >= arg2_value ) {
87+ res_value |= 1 ;
88+ remainder -= arg2_value ;
89+ }
90+ } while (-- i != 0 );
91+ }
92+
93+ /* round up LSB */
94+ {
95+ uint16_t summand = (remainder << 1 ) >= arg2_value ;
96+
97+ if ((res_value + summand ) > 32767 /* SHRT_MAX */ )
98+ return 0 ;
99+
100+ res_value += summand ;
101+ }
102+
103+ if (arg1_negative ^ arg2_negative )
104+ res_value = - res_value ;
105+
106+ return res_value ;
107+ }
108+
109+ static uint16_t vpe_internal_get_pratio (uint16_t from_frequency , uint16_t to_frequency )
110+ {
111+ uint16_t pratio = vpe_u1_8_from_fraction (from_frequency , to_frequency );
112+
113+ if (GET_PRATIO_INTEGER_PART (pratio ) > 1 )
114+ pratio = 0 ;
115+
116+ return pratio ;
117+ }
118+
119+ /*
120+ * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest),
121+ * VPE FW will dynamically decide which level should be used according to current loading.
122+ *
123+ * Get VPE and SOC clocks from PM, and select the appropriate four clock values,
124+ * calculate the ratios of adjusting from one clock to another.
125+ * The VPE FW can then request the appropriate frequency from the PMFW.
126+ */
127+ int amdgpu_vpe_configure_dpm (struct amdgpu_vpe * vpe )
128+ {
129+ struct amdgpu_device * adev = vpe -> ring .adev ;
130+ uint32_t dpm_ctl ;
131+
132+ if (adev -> pm .dpm_enabled ) {
133+ struct dpm_clocks clock_table = { 0 };
134+ struct dpm_clock * VPEClks ;
135+ struct dpm_clock * SOCClks ;
136+ uint32_t idx ;
137+ uint32_t pratio_vmax_vnorm = 0 , pratio_vnorm_vmid = 0 , pratio_vmid_vmin = 0 ;
138+ uint16_t pratio_vmin_freq = 0 , pratio_vmid_freq = 0 , pratio_vnorm_freq = 0 , pratio_vmax_freq = 0 ;
139+
140+ dpm_ctl = RREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_enable ));
141+ dpm_ctl |= 1 ; /* DPM enablement */
142+ WREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_enable ), dpm_ctl );
143+
144+ /* Get VPECLK and SOCCLK */
145+ if (amdgpu_dpm_get_dpm_clock_table (adev , & clock_table )) {
146+ dev_dbg (adev -> dev , "%s: get clock failed!\n" , __func__ );
147+ goto disable_dpm ;
148+ }
149+
150+ SOCClks = clock_table .SocClocks ;
151+ VPEClks = clock_table .VPEClocks ;
152+
153+ /* vpe dpm only cares 4 levels. */
154+ for (idx = 0 ; idx < VPE_MAX_DPM_LEVEL ; idx ++ ) {
155+ uint32_t soc_dpm_level ;
156+ uint32_t min_freq ;
157+
158+ if (idx == 0 )
159+ soc_dpm_level = 0 ;
160+ else
161+ soc_dpm_level = (idx * 2 ) + 1 ;
162+
163+ /* clamp the max level */
164+ if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1 )
165+ soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1 ;
166+
167+ min_freq = (SOCClks [soc_dpm_level ].Freq < VPEClks [soc_dpm_level ].Freq ) ?
168+ SOCClks [soc_dpm_level ].Freq : VPEClks [soc_dpm_level ].Freq ;
169+
170+ switch (idx ) {
171+ case 0 :
172+ pratio_vmin_freq = min_freq ;
173+ break ;
174+ case 1 :
175+ pratio_vmid_freq = min_freq ;
176+ break ;
177+ case 2 :
178+ pratio_vnorm_freq = min_freq ;
179+ break ;
180+ case 3 :
181+ pratio_vmax_freq = min_freq ;
182+ break ;
183+ default :
184+ break ;
185+ }
186+ }
187+
188+ if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq ) {
189+ uint32_t pratio_ctl ;
190+
191+ pratio_vmax_vnorm = (uint32_t )vpe_internal_get_pratio (pratio_vmax_freq , pratio_vnorm_freq );
192+ pratio_vnorm_vmid = (uint32_t )vpe_internal_get_pratio (pratio_vnorm_freq , pratio_vmid_freq );
193+ pratio_vmid_vmin = (uint32_t )vpe_internal_get_pratio (pratio_vmid_freq , pratio_vmin_freq );
194+
195+ pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9 ) | (pratio_vmid_vmin << 18 );
196+ WREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_pratio ), pratio_ctl ); /* PRatio */
197+ WREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_request_interval ), 24000 ); /* 1ms, unit=1/24MHz */
198+ WREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_decision_threshold ), 1200000 ); /* 50ms */
199+ WREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_busy_clamp_threshold ), 1200000 );/* 50ms */
200+ WREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_idle_clamp_threshold ), 1200000 );/* 50ms */
201+ dev_dbg (adev -> dev , "%s: configure vpe dpm pratio done!\n" , __func__ );
202+ } else {
203+ dev_dbg (adev -> dev , "%s: invalid pratio parameters!\n" , __func__ );
204+ goto disable_dpm ;
205+ }
206+ }
207+ return 0 ;
208+
209+ disable_dpm :
210+ dpm_ctl = RREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_enable ));
211+ dpm_ctl &= 0xfffffffe ; /* Disable DPM */
212+ WREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .dpm_enable ), dpm_ctl );
213+ dev_dbg (adev -> dev , "%s: disable vpe dpm\n" , __func__ );
214+ return 0 ;
215+ }
216+
38217int amdgpu_vpe_psp_update_sram (struct amdgpu_device * adev )
39218{
40219 struct amdgpu_firmware_info ucode = {
@@ -134,6 +313,19 @@ static int vpe_early_init(void *handle)
134313 return 0 ;
135314}
136315
316+ static void vpe_idle_work_handler (struct work_struct * work )
317+ {
318+ struct amdgpu_device * adev =
319+ container_of (work , struct amdgpu_device , vpe .idle_work .work );
320+ unsigned int fences = 0 ;
321+
322+ fences += amdgpu_fence_count_emitted (& adev -> vpe .ring );
323+
324+ if (fences == 0 )
325+ amdgpu_device_ip_set_powergating_state (adev , AMD_IP_BLOCK_TYPE_VPE , AMD_PG_STATE_GATE );
326+ else
327+ schedule_delayed_work (& adev -> vpe .idle_work , VPE_IDLE_TIMEOUT );
328+ }
137329
138330static int vpe_common_init (struct amdgpu_vpe * vpe )
139331{
@@ -150,6 +342,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe)
150342 return r ;
151343 }
152344
345+ vpe -> context_started = false;
346+ INIT_DELAYED_WORK (& adev -> vpe .idle_work , vpe_idle_work_handler );
347+
153348 return 0 ;
154349}
155350
@@ -219,13 +414,18 @@ static int vpe_hw_fini(void *handle)
219414
220415 vpe_ring_stop (vpe );
221416
417+ /* Power off VPE */
418+ amdgpu_device_ip_set_powergating_state (adev , AMD_IP_BLOCK_TYPE_VPE , AMD_PG_STATE_GATE );
419+
222420 return 0 ;
223421}
224422
225423static int vpe_suspend (void * handle )
226424{
227425 struct amdgpu_device * adev = (struct amdgpu_device * )handle ;
228426
427+ cancel_delayed_work_sync (& adev -> vpe .idle_work );
428+
229429 return vpe_hw_fini (adev );
230430}
231431
@@ -430,6 +630,21 @@ static int vpe_set_clockgating_state(void *handle,
430630static int vpe_set_powergating_state (void * handle ,
431631 enum amd_powergating_state state )
432632{
633+ struct amdgpu_device * adev = (struct amdgpu_device * )handle ;
634+ struct amdgpu_vpe * vpe = & adev -> vpe ;
635+
636+ if (!adev -> pm .dpm_enabled )
637+ dev_err (adev -> dev , "Without PM, cannot support powergating\n" );
638+
639+ dev_dbg (adev -> dev , "%s: %s!\n" , __func__ , (state == AMD_PG_STATE_GATE ) ? "GATE" :"UNGATE" );
640+
641+ if (state == AMD_PG_STATE_GATE ) {
642+ amdgpu_dpm_enable_vpe (adev , false);
643+ vpe -> context_started = false;
644+ } else {
645+ amdgpu_dpm_enable_vpe (adev , true);
646+ }
647+
433648 return 0 ;
434649}
435650
@@ -595,6 +810,38 @@ static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
595810 return ret ;
596811}
597812
813+ static void vpe_ring_begin_use (struct amdgpu_ring * ring )
814+ {
815+ struct amdgpu_device * adev = ring -> adev ;
816+ struct amdgpu_vpe * vpe = & adev -> vpe ;
817+
818+ cancel_delayed_work_sync (& adev -> vpe .idle_work );
819+
820+ /* Power on VPE and notify VPE of new context */
821+ if (!vpe -> context_started ) {
822+ uint32_t context_notify ;
823+
824+ /* Power on VPE */
825+ amdgpu_device_ip_set_powergating_state (adev , AMD_IP_BLOCK_TYPE_VPE , AMD_PG_STATE_UNGATE );
826+
827+ /* Indicates that a job from a new context has been submitted. */
828+ context_notify = RREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .context_indicator ));
829+ if ((context_notify & 0x1 ) == 0 )
830+ context_notify |= 0x1 ;
831+ else
832+ context_notify &= ~(0x1 );
833+ WREG32 (vpe_get_reg_offset (vpe , 0 , vpe -> regs .context_indicator ), context_notify );
834+ vpe -> context_started = true;
835+ }
836+ }
837+
838+ static void vpe_ring_end_use (struct amdgpu_ring * ring )
839+ {
840+ struct amdgpu_device * adev = ring -> adev ;
841+
842+ schedule_delayed_work (& adev -> vpe .idle_work , VPE_IDLE_TIMEOUT );
843+ }
844+
598845static const struct amdgpu_ring_funcs vpe_ring_funcs = {
599846 .type = AMDGPU_RING_TYPE_VPE ,
600847 .align_mask = 0xf ,
@@ -625,6 +872,8 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
625872 .init_cond_exec = vpe_ring_init_cond_exec ,
626873 .patch_cond_exec = vpe_ring_patch_cond_exec ,
627874 .preempt_ib = vpe_ring_preempt_ib ,
875+ .begin_use = vpe_ring_begin_use ,
876+ .end_use = vpe_ring_end_use ,
628877};
629878
630879static void vpe_set_ring_funcs (struct amdgpu_device * adev )
0 commit comments