Skip to content

Commit d117fd2

Browse files
Samir Dhumealexdeucher
authored andcommitted
drm/amdgpu/vcn: sriov support for vcn_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume <samir.dhume@amd.com> Acked-by: Leo Liu <leo.liu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 44fd83e commit d117fd2

1 file changed

Lines changed: 236 additions & 24 deletions

File tree

drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c

Lines changed: 236 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "soc15d.h"
3232
#include "soc15_hw_ip.h"
3333
#include "vcn_v2_0.h"
34+
#include "mmsch_v4_0_3.h"
3435

3536
#include "vcn/vcn_4_0_3_offset.h"
3637
#include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
4445
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
4546
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
4647

48+
static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
4749
static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
4850
static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
4951
static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -137,6 +139,12 @@ static int vcn_v4_0_3_sw_init(void *handle)
137139
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
138140
}
139141

142+
if (amdgpu_sriov_vf(adev)) {
143+
r = amdgpu_virt_alloc_mm_table(adev);
144+
if (r)
145+
return r;
146+
}
147+
140148
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
141149
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
142150

@@ -174,6 +182,9 @@ static int vcn_v4_0_3_sw_fini(void *handle)
174182
drm_dev_exit(idx);
175183
}
176184

185+
if (amdgpu_sriov_vf(adev))
186+
amdgpu_virt_free_mm_table(adev);
187+
177188
r = amdgpu_vcn_suspend(adev);
178189
if (r)
179190
return r;
@@ -196,33 +207,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
196207
struct amdgpu_ring *ring;
197208
int i, r, vcn_inst;
198209

199-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
200-
vcn_inst = GET_INST(VCN, i);
201-
ring = &adev->vcn.inst[i].ring_enc[0];
210+
if (amdgpu_sriov_vf(adev)) {
211+
r = vcn_v4_0_3_start_sriov(adev);
212+
if (r)
213+
goto done;
202214

203-
if (ring->use_doorbell) {
204-
adev->nbio.funcs->vcn_doorbell_range(
205-
adev, ring->use_doorbell,
206-
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
207-
9 * vcn_inst,
208-
adev->vcn.inst[i].aid_id);
209-
210-
WREG32_SOC15(
211-
VCN, GET_INST(VCN, ring->me),
212-
regVCN_RB1_DB_CTRL,
213-
ring->doorbell_index
214-
<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
215-
VCN_RB1_DB_CTRL__EN_MASK);
216-
217-
/* Read DB_CTRL to flush the write DB_CTRL command. */
218-
RREG32_SOC15(
219-
VCN, GET_INST(VCN, ring->me),
220-
regVCN_RB1_DB_CTRL);
215+
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
216+
ring = &adev->vcn.inst[i].ring_enc[0];
217+
ring->wptr = 0;
218+
ring->wptr_old = 0;
219+
vcn_v4_0_3_unified_ring_set_wptr(ring);
220+
ring->sched.ready = true;
221221
}
222+
} else {
223+
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
224+
vcn_inst = GET_INST(VCN, i);
225+
ring = &adev->vcn.inst[i].ring_enc[0];
226+
227+
if (ring->use_doorbell) {
228+
adev->nbio.funcs->vcn_doorbell_range(
229+
adev, ring->use_doorbell,
230+
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
231+
9 * vcn_inst,
232+
adev->vcn.inst[i].aid_id);
233+
234+
WREG32_SOC15(
235+
VCN, GET_INST(VCN, ring->me),
236+
regVCN_RB1_DB_CTRL,
237+
ring->doorbell_index
238+
<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
239+
VCN_RB1_DB_CTRL__EN_MASK);
240+
241+
/* Read DB_CTRL to flush the write DB_CTRL command. */
242+
RREG32_SOC15(
243+
VCN, GET_INST(VCN, ring->me),
244+
regVCN_RB1_DB_CTRL);
245+
}
222246

223-
r = amdgpu_ring_test_helper(ring);
224-
if (r)
225-
goto done;
247+
r = amdgpu_ring_test_helper(ring);
248+
if (r)
249+
goto done;
250+
}
226251
}
227252

228253
done:
@@ -820,6 +845,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
820845
return 0;
821846
}
822847

848+
static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
849+
{
850+
int i, vcn_inst;
851+
struct amdgpu_ring *ring_enc;
852+
uint64_t cache_addr;
853+
uint64_t rb_enc_addr;
854+
uint64_t ctx_addr;
855+
uint32_t param, resp, expected;
856+
uint32_t offset, cache_size;
857+
uint32_t tmp, timeout;
858+
859+
struct amdgpu_mm_table *table = &adev->virt.mm_table;
860+
uint32_t *table_loc;
861+
uint32_t table_size;
862+
uint32_t size, size_dw;
863+
uint32_t init_status;
864+
uint32_t enabled_vcn;
865+
866+
struct mmsch_v4_0_cmd_direct_write
867+
direct_wt = { {0} };
868+
struct mmsch_v4_0_cmd_direct_read_modify_write
869+
direct_rd_mod_wt = { {0} };
870+
struct mmsch_v4_0_cmd_end end = { {0} };
871+
struct mmsch_v4_0_3_init_header header;
872+
873+
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
874+
volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
875+
876+
direct_wt.cmd_header.command_type =
877+
MMSCH_COMMAND__DIRECT_REG_WRITE;
878+
direct_rd_mod_wt.cmd_header.command_type =
879+
MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
880+
end.cmd_header.command_type = MMSCH_COMMAND__END;
881+
882+
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
883+
vcn_inst = GET_INST(VCN, i);
884+
885+
memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
886+
header.version = MMSCH_VERSION;
887+
header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
888+
889+
table_loc = (uint32_t *)table->cpu_addr;
890+
table_loc += header.total_size;
891+
892+
table_size = 0;
893+
894+
MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
895+
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
896+
897+
cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
898+
899+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
900+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
901+
regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
902+
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
903+
904+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
905+
regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
906+
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
907+
908+
offset = 0;
909+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
910+
regUVD_VCPU_CACHE_OFFSET0), 0);
911+
} else {
912+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
913+
regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
914+
lower_32_bits(adev->vcn.inst[i].gpu_addr));
915+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
916+
regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
917+
upper_32_bits(adev->vcn.inst[i].gpu_addr));
918+
offset = cache_size;
919+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
920+
regUVD_VCPU_CACHE_OFFSET0),
921+
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
922+
}
923+
924+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
925+
regUVD_VCPU_CACHE_SIZE0),
926+
cache_size);
927+
928+
cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
929+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
930+
regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
931+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
932+
regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
933+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
934+
regUVD_VCPU_CACHE_OFFSET1), 0);
935+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
936+
regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
937+
938+
cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
939+
AMDGPU_VCN_STACK_SIZE;
940+
941+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
942+
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
943+
944+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
945+
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
946+
947+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
948+
regUVD_VCPU_CACHE_OFFSET2), 0);
949+
950+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
951+
regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
952+
953+
fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
954+
rb_setup = &fw_shared->rb_setup;
955+
956+
ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
957+
ring_enc->wptr = 0;
958+
rb_enc_addr = ring_enc->gpu_addr;
959+
960+
rb_setup->is_rb_enabled_flags |= RB_ENABLED;
961+
rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
962+
rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
963+
rb_setup->rb_size = ring_enc->ring_size / 4;
964+
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
965+
966+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
967+
regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
968+
lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
969+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
970+
regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
971+
upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
972+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
973+
regUVD_VCPU_NONCACHE_SIZE0),
974+
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
975+
MMSCH_V4_0_INSERT_END();
976+
977+
header.vcn0.init_status = 0;
978+
header.vcn0.table_offset = header.total_size;
979+
header.vcn0.table_size = table_size;
980+
header.total_size += table_size;
981+
982+
/* Send init table to mmsch */
983+
size = sizeof(struct mmsch_v4_0_3_init_header);
984+
table_loc = (uint32_t *)table->cpu_addr;
985+
memcpy((void *)table_loc, &header, size);
986+
987+
ctx_addr = table->gpu_addr;
988+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
989+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
990+
991+
tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
992+
tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
993+
tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
994+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
995+
996+
size = header.total_size;
997+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
998+
999+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
1000+
1001+
param = 0x00000001;
1002+
WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
1003+
tmp = 0;
1004+
timeout = 1000;
1005+
resp = 0;
1006+
expected = MMSCH_VF_MAILBOX_RESP__OK;
1007+
while (resp != expected) {
1008+
resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
1009+
if (resp != 0)
1010+
break;
1011+
1012+
udelay(10);
1013+
tmp = tmp + 10;
1014+
if (tmp >= timeout) {
1015+
DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1016+
" waiting for regMMSCH_VF_MAILBOX_RESP "\
1017+
"(expected=0x%08x, readback=0x%08x)\n",
1018+
tmp, expected, resp);
1019+
return -EBUSY;
1020+
}
1021+
}
1022+
1023+
enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
1024+
init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
1025+
if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
1026+
&& init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
1027+
DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
1028+
"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
1029+
}
1030+
}
1031+
1032+
return 0;
1033+
}
1034+
8231035
/**
8241036
* vcn_v4_0_3_start - VCN start
8251037
*

0 commit comments

Comments
 (0)