Skip to content

Commit 1f1c24d

Browse files
committed
Merge tag 'amd-drm-next-6.3-2023-01-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.3-2023-01-13: amdgpu: - Fix possible segfault in failure case - Rework FW requests to happen in early_init for all IPs so that we don't lose the sbios console if FW is missing - PSR fixes - Misc cleanups - Unload fix - SMU13 fixes amdkfd: - Fix for cleared VRAM BOs - Fix cleanup if GPUVM creation fails - Memory accounting fix - Use resource_size rather than open codeing it - GC11 mGPU fix radeon: - Fix memory leak on shutdown Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230113225911.7776-1-alexander.deucher@amd.com
2 parents 45be204 + 0c2dece commit 1f1c24d

67 files changed

Lines changed: 997 additions & 1591 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
9797

9898
struct amdgpu_kfd_dev {
9999
struct kfd_dev *dev;
100-
uint64_t vram_used;
100+
int64_t vram_used;
101101
uint64_t vram_used_aligned;
102102
bool init_complete;
103103
struct work_struct reset_work;
@@ -271,9 +271,9 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
271271
((struct drm_file *)(drm_priv))->driver_priv)->vm)
272272

273273
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
274-
struct file *filp, u32 pasid);
274+
struct amdgpu_vm *avm, u32 pasid);
275275
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
276-
struct file *filp,
276+
struct amdgpu_vm *avm,
277277
void **process_info,
278278
struct dma_fence **ef);
279279
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,18 +1431,11 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
14311431
}
14321432

14331433
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
1434-
struct file *filp, u32 pasid)
1434+
struct amdgpu_vm *avm, u32 pasid)
14351435

14361436
{
1437-
struct amdgpu_fpriv *drv_priv;
1438-
struct amdgpu_vm *avm;
14391437
int ret;
14401438

1441-
ret = amdgpu_file_to_fpriv(filp, &drv_priv);
1442-
if (ret)
1443-
return ret;
1444-
avm = &drv_priv->vm;
1445-
14461439
/* Free the original amdgpu allocated pasid,
14471440
* will be replaced with kfd allocated pasid.
14481441
*/
@@ -1459,19 +1452,12 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
14591452
}
14601453

14611454
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
1462-
struct file *filp,
1455+
struct amdgpu_vm *avm,
14631456
void **process_info,
14641457
struct dma_fence **ef)
14651458
{
1466-
struct amdgpu_fpriv *drv_priv;
1467-
struct amdgpu_vm *avm;
14681459
int ret;
14691460

1470-
ret = amdgpu_file_to_fpriv(filp, &drv_priv);
1471-
if (ret)
1472-
return ret;
1473-
avm = &drv_priv->vm;
1474-
14751461
/* Already a compute VM? */
14761462
if (avm->process_info)
14771463
return -EINVAL;
@@ -1613,6 +1599,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
16131599
struct amdgpu_bo *bo;
16141600
struct drm_gem_object *gobj = NULL;
16151601
u32 domain, alloc_domain;
1602+
uint64_t aligned_size;
16161603
u64 alloc_flags;
16171604
int ret;
16181605

@@ -1668,22 +1655,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
16681655
* the memory.
16691656
*/
16701657
if ((*mem)->aql_queue)
1671-
size = size >> 1;
1658+
size >>= 1;
1659+
aligned_size = PAGE_ALIGN(size);
16721660

16731661
(*mem)->alloc_flags = flags;
16741662

16751663
amdgpu_sync_create(&(*mem)->sync);
16761664

1677-
ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
1665+
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
16781666
if (ret) {
16791667
pr_debug("Insufficient memory\n");
16801668
goto err_reserve_limit;
16811669
}
16821670

16831671
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
1684-
va, size, domain_string(alloc_domain));
1672+
va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));
16851673

1686-
ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
1674+
ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
16871675
bo_type, NULL, &gobj);
16881676
if (ret) {
16891677
pr_debug("Failed to create BO on domain %s. ret %d\n",
@@ -1740,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
17401728
/* Don't unreserve system mem limit twice */
17411729
goto err_reserve_limit;
17421730
err_bo_create:
1743-
amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
1731+
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
17441732
err_reserve_limit:
17451733
mutex_destroy(&(*mem)->lock);
17461734
if (gobj)
@@ -2100,7 +2088,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b
21002088
}
21012089

21022090
amdgpu_amdkfd_remove_eviction_fence(
2103-
bo, bo->kfd_bo->process_info->eviction_fence);
2091+
bo, bo->vm_bo->vm->process_info->eviction_fence);
21042092

21052093
amdgpu_bo_unreserve(bo);
21062094

drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -411,17 +411,10 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
411411
return -EINVAL;
412412
}
413413

414-
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
415-
if (err) {
416-
DRM_ERROR("Failed to request firmware\n");
417-
return err;
418-
}
419-
420-
err = amdgpu_ucode_validate(adev->pm.fw);
414+
err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name);
421415
if (err) {
422416
DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
423-
release_firmware(adev->pm.fw);
424-
adev->pm.fw = NULL;
417+
amdgpu_ucode_release(&adev->pm.fw);
425418
return err;
426419
}
427420

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <generated/utsrelease.h>
3737
#include <linux/pci-p2pdma.h>
3838

39+
#include <drm/drm_aperture.h>
3940
#include <drm/drm_atomic_helper.h>
4041
#include <drm/drm_fb_helper.h>
4142
#include <drm/drm_probe_helper.h>
@@ -90,6 +91,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
9091
#define AMDGPU_MAX_RETRY_LIMIT 2
9192
#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
9293

94+
static const struct drm_driver amdgpu_kms_driver;
95+
9396
const char *amdgpu_asic_name[] = {
9497
"TAHITI",
9598
"PITCAIRN",
@@ -1982,17 +1985,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
19821985
}
19831986

19841987
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1985-
err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1986-
if (err) {
1987-
dev_err(adev->dev,
1988-
"Failed to load gpu_info firmware \"%s\"\n",
1989-
fw_name);
1990-
goto out;
1991-
}
1992-
err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1988+
err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
19931989
if (err) {
19941990
dev_err(adev->dev,
1995-
"Failed to validate gpu_info firmware \"%s\"\n",
1991+
"Failed to get gpu_info firmware \"%s\"\n",
19961992
fw_name);
19971993
goto out;
19981994
}
@@ -3688,6 +3684,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
36883684
if (r)
36893685
return r;
36903686

3687+
/* Get rid of things like offb */
3688+
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3689+
if (r)
3690+
return r;
3691+
36913692
/* Enable TMZ based on IP_VERSION */
36923693
amdgpu_gmc_tmz_set(adev);
36933694

@@ -4023,8 +4024,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
40234024

40244025
amdgpu_fence_driver_sw_fini(adev);
40254026
amdgpu_device_ip_fini(adev);
4026-
release_firmware(adev->firmware.gpu_info_fw);
4027-
adev->firmware.gpu_info_fw = NULL;
4027+
amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
40284028
adev->accel_working = false;
40294029
dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
40304030

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
*/
2424

2525
#include <drm/amdgpu_drm.h>
26-
#include <drm/drm_aperture.h>
2726
#include <drm/drm_drv.h>
2827
#include <drm/drm_fbdev_generic.h>
2928
#include <drm/drm_gem.h>
@@ -2124,11 +2123,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
21242123
}
21252124
#endif
21262125

2127-
/* Get rid of things like offb */
2128-
ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver);
2129-
if (ret)
2130-
return ret;
2131-
21322126
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
21332127
if (IS_ERR(adev))
21342128
return PTR_ERR(adev);

drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
6262
goto unlock;
6363
}
6464

65-
ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
66-
TTM_BO_VM_NUM_PREFAULT);
65+
ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
66+
TTM_BO_VM_NUM_PREFAULT);
6767

68-
drm_dev_exit(idx);
68+
drm_dev_exit(idx);
6969
} else {
7070
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
7171
}

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
*
2222
*/
2323

24+
#include <linux/firmware.h>
25+
2426
#include "amdgpu_mes.h"
2527
#include "amdgpu.h"
2628
#include "soc15_common.h"
@@ -1423,3 +1425,60 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
14231425
kfree(vm);
14241426
return 0;
14251427
}
1428+
1429+
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
1430+
{
1431+
const struct mes_firmware_header_v1_0 *mes_hdr;
1432+
struct amdgpu_firmware_info *info;
1433+
char ucode_prefix[30];
1434+
char fw_name[40];
1435+
int r;
1436+
1437+
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1438+
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
1439+
ucode_prefix,
1440+
pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
1441+
r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
1442+
if (r)
1443+
goto out;
1444+
1445+
mes_hdr = (const struct mes_firmware_header_v1_0 *)
1446+
adev->mes.fw[pipe]->data;
1447+
adev->mes.uc_start_addr[pipe] =
1448+
le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
1449+
((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
1450+
adev->mes.data_start_addr[pipe] =
1451+
le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
1452+
((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
1453+
1454+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1455+
int ucode, ucode_data;
1456+
1457+
if (pipe == AMDGPU_MES_SCHED_PIPE) {
1458+
ucode = AMDGPU_UCODE_ID_CP_MES;
1459+
ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
1460+
} else {
1461+
ucode = AMDGPU_UCODE_ID_CP_MES1;
1462+
ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
1463+
}
1464+
1465+
info = &adev->firmware.ucode[ucode];
1466+
info->ucode_id = ucode;
1467+
info->fw = adev->mes.fw[pipe];
1468+
adev->firmware.fw_size +=
1469+
ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
1470+
PAGE_SIZE);
1471+
1472+
info = &adev->firmware.ucode[ucode_data];
1473+
info->ucode_id = ucode_data;
1474+
info->fw = adev->mes.fw[pipe];
1475+
adev->firmware.fw_size +=
1476+
ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
1477+
PAGE_SIZE);
1478+
}
1479+
1480+
return 0;
1481+
out:
1482+
amdgpu_ucode_release(&adev->mes.fw[pipe]);
1483+
return r;
1484+
}

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ struct amdgpu_mes_funcs {
306306

307307
int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
308308

309+
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
309310
int amdgpu_mes_init(struct amdgpu_device *adev);
310311
void amdgpu_mes_fini(struct amdgpu_device *adev);
311312

drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,9 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
470470
return true;
471471

472472
fail:
473-
DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
474-
man->size);
473+
if (man)
474+
DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
475+
man->size);
475476
return false;
476477
}
477478

0 commit comments

Comments
 (0)