Skip to content

Commit 221cadb

Browse files
Timur Kristófalexdeucher
authored andcommitted
drm/amdgpu/vce1: Ensure VCPU BO is in lower 32-bit address space (v3)
Based on research and ideas by Alexandre and Christian. VCE1 actually executes its code from the VCPU BO. Due to various hardware limitations, the VCE1 requires the VCPU BO to be in the low 32 bit address range. However, VRAM is typically mapped at the high address range, which means the VCPU can't access VRAM through the FB aperture. To solve this, we write a few page table entries to map the VCPU BO in the GART address range. And we make sure that the GART is located at the low address range. That way the VCE1 can access the VCPU BO. v2: - Adjust to v2 of the GART helper commit. - Add empty line to multi-line comment. v3: - Instead of relying on gmc_v6 to set the GART space before GTT, add a new function amdgpu_vce_required_gart_pages() which is called from amdgpu_gtt_mgr_init() directly. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Co-developed-by: Alexandre Demers <alexandre.f.demers@gmail.com> Signed-off-by: Alexandre Demers <alexandre.f.demers@gmail.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent baf75a0 commit 221cadb

4 files changed

Lines changed: 75 additions & 0 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
284284
ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
285285

286286
start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
287+
start += amdgpu_vce_required_gart_pages(adev);
287288
size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
288289
drm_mm_init(&mgr->mm, start, size);
289290
spin_lock_init(&mgr->lock);

drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,24 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
450450
}
451451
}
452452

453+
/**
454+
* amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE
455+
*
456+
* @adev: amdgpu_device pointer
457+
*
458+
* Returns how many GART pages we need before GTT for the VCE IP block.
459+
* For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details.
460+
* For VCE2+, this is not needed so return zero.
461+
*/
462+
u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev)
463+
{
464+
/* VCE IP block not added yet, so can't use amdgpu_ip_version */
465+
if (adev->family == AMDGPU_FAMILY_SI)
466+
return 512;
467+
468+
return 0;
469+
}
470+
453471
/**
454472
* amdgpu_vce_get_create_msg - generate a VCE create msg
455473
*

drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
6161
int amdgpu_vce_suspend(struct amdgpu_device *adev);
6262
int amdgpu_vce_resume(struct amdgpu_device *adev);
6363
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
64+
u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev);
6465
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
6566
struct amdgpu_ib *ib);
6667
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,

drivers/gpu/drm/amd/amdgpu/vce_v1_0.c

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
#include "amdgpu.h"
3636
#include "amdgpu_vce.h"
37+
#include "amdgpu_gart.h"
3738
#include "sid.h"
3839
#include "vce_v1_0.h"
3940
#include "vce/vce_1_0_d.h"
@@ -46,6 +47,11 @@
4647
#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
4748
#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
4849

50+
#define VCE_V1_0_GART_PAGE_START \
51+
(AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS)
52+
#define VCE_V1_0_GART_ADDR_START \
53+
(VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE)
54+
4955
static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
5056
static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
5157

@@ -513,6 +519,49 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
513519
return 0;
514520
}
515521

522+
/**
523+
* vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address
524+
*
525+
* @adev: amdgpu_device pointer
526+
*
527+
* Due to various hardware limitations, the VCE1 requires
528+
* the VCPU BO to be in the low 32 bit address range.
529+
* Ensure that the VCPU BO has a 32-bit GPU address,
530+
* or return an error code when that isn't possible.
531+
*
532+
* To accomodate that, we put GART to the LOW address range
533+
* and reserve some GART pages where we map the VCPU BO,
534+
* so that it gets a 32-bit address.
535+
*/
536+
static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
537+
{
538+
u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo);
539+
u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
540+
u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
541+
u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
542+
u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
543+
u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
544+
545+
/*
546+
* Check if the VCPU BO already has a 32-bit address.
547+
* Eg. if MC is configured to put VRAM in the low address range.
548+
*/
549+
if (gpu_addr <= max_vcpu_bo_addr)
550+
return 0;
551+
552+
/* Check if we can map the VCPU BO in GART to a 32-bit address. */
553+
if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr)
554+
return -EINVAL;
555+
556+
amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START,
557+
num_pages, flags, adev->gart.ptr);
558+
adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START;
559+
if (adev->vce.gpu_addr > max_vcpu_bo_addr)
560+
return -EINVAL;
561+
562+
return 0;
563+
}
564+
516565
static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
517566
{
518567
struct amdgpu_device *adev = ip_block->adev;
@@ -532,6 +581,9 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
532581
if (r)
533582
return r;
534583
r = vce_v1_0_load_fw_signature(adev);
584+
if (r)
585+
return r;
586+
r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
535587
if (r)
536588
return r;
537589

@@ -647,6 +699,9 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
647699
if (r)
648700
return r;
649701
r = vce_v1_0_load_fw_signature(adev);
702+
if (r)
703+
return r;
704+
r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
650705
if (r)
651706
return r;
652707

0 commit comments

Comments
 (0)