Skip to content

Commit 955ad0c

Browse files
committed
Merge tag 'amd-drm-next-5.18-2022-03-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.18-2022-03-09: amdgpu: - Misc code cleanups - Misc display fixes - PSR display fixes - More RAS cleanup - Hotplug fix - Bump minor version for hotplug tests - SR-IOV fixes - GC 10.3.7 updates - Remove some firmwares which are no longer used - Mode2 reset refactor - Aldebaran fixes - Add VCN fwlog feature for VCN debugging - CS code cleanup - Fix clang warning - Fix CS clean up rebase breakage amdkfd: - SVM fixes - SMI event fixes and cleanups - vmid_pasid mapping fix for gfx10.3 Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220309224439.2178877-1-alexander.deucher@amd.com
2 parents 482d7b5 + 96a2f0f commit 955ad0c

84 files changed

Lines changed: 983 additions & 671 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/gpu/drm/amd/amdgpu/aldebaran.c

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,17 @@
3131
#include "amdgpu_psp.h"
3232
#include "amdgpu_xgmi.h"
3333

34+
static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
35+
{
36+
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
37+
38+
if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
39+
adev->gmc.xgmi.connected_to_cpu))
40+
return true;
41+
42+
return false;
43+
}
44+
3445
static struct amdgpu_reset_handler *
3546
aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
3647
struct amdgpu_reset_context *reset_context)
@@ -48,7 +59,7 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
4859
}
4960
}
5061

51-
if (adev->gmc.xgmi.connected_to_cpu) {
62+
if (aldebaran_is_mode2_default(reset_ctl)) {
5263
list_for_each_entry(handler, &reset_ctl->reset_handlers,
5364
handler_list) {
5465
if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
@@ -136,27 +147,39 @@ static int
136147
aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
137148
struct amdgpu_reset_context *reset_context)
138149
{
139-
struct amdgpu_device *tmp_adev = NULL;
140150
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
151+
struct amdgpu_device *tmp_adev = NULL;
152+
struct list_head reset_device_list;
141153
int r = 0;
142154

143155
dev_dbg(adev->dev, "aldebaran perform hw reset\n");
144-
if (reset_context->hive == NULL) {
156+
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
157+
reset_context->hive == NULL) {
145158
/* Wrong context, return error */
146159
return -EINVAL;
147160
}
148161

149-
list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
150-
gmc.xgmi.head) {
162+
INIT_LIST_HEAD(&reset_device_list);
163+
if (reset_context->hive) {
164+
list_for_each_entry (tmp_adev,
165+
&reset_context->hive->device_list,
166+
gmc.xgmi.head)
167+
list_add_tail(&tmp_adev->reset_list,
168+
&reset_device_list);
169+
} else {
170+
list_add_tail(&reset_context->reset_req_dev->reset_list,
171+
&reset_device_list);
172+
}
173+
174+
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
151175
mutex_lock(&tmp_adev->reset_cntl->reset_lock);
152176
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
153177
}
154178
/*
155179
* Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
156180
* them together so that they can be completed asynchronously on multiple nodes
157181
*/
158-
list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
159-
gmc.xgmi.head) {
182+
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
160183
/* For XGMI run all resets in parallel to speed up the process */
161184
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
162185
if (!queue_work(system_unbound_wq,
@@ -174,9 +197,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
174197

175198
/* For XGMI wait for all resets to complete before proceed */
176199
if (!r) {
177-
list_for_each_entry(tmp_adev,
178-
&reset_context->hive->device_list,
179-
gmc.xgmi.head) {
200+
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
180201
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
181202
flush_work(&tmp_adev->reset_cntl->reset_work);
182203
r = tmp_adev->asic_reset_res;
@@ -186,8 +207,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
186207
}
187208
}
188209

189-
list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
190-
gmc.xgmi.head) {
210+
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
191211
mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
192212
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
193213
}
@@ -319,16 +339,30 @@ static int
319339
aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
320340
struct amdgpu_reset_context *reset_context)
321341
{
322-
int r;
323342
struct amdgpu_device *tmp_adev = NULL;
343+
struct list_head reset_device_list;
344+
int r;
324345

325-
if (reset_context->hive == NULL) {
346+
if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
347+
IP_VERSION(13, 0, 2) &&
348+
reset_context->hive == NULL) {
326349
/* Wrong context, return error */
327350
return -EINVAL;
328351
}
329352

330-
list_for_each_entry(tmp_adev, &reset_context->hive->device_list,
331-
gmc.xgmi.head) {
353+
INIT_LIST_HEAD(&reset_device_list);
354+
if (reset_context->hive) {
355+
list_for_each_entry (tmp_adev,
356+
&reset_context->hive->device_list,
357+
gmc.xgmi.head)
358+
list_add_tail(&tmp_adev->reset_list,
359+
&reset_device_list);
360+
} else {
361+
list_add_tail(&reset_context->reset_req_dev->reset_list,
362+
&reset_device_list);
363+
}
364+
365+
list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
332366
dev_info(tmp_adev->dev,
333367
"GPU reset succeeded, trying to resume\n");
334368
r = aldebaran_mode2_restore_ip(tmp_adev);

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 3 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
#include <drm/amdgpu_drm.h>
6161
#include <drm/drm_gem.h>
6262
#include <drm/drm_ioctl.h>
63-
#include <drm/gpu_scheduler.h>
6463

6564
#include <kgd_kfd_interface.h>
6665
#include "dm_pp_interface.h"
@@ -233,6 +232,9 @@ extern int amdgpu_cik_support;
233232
#endif
234233
extern int amdgpu_num_kcq;
235234

235+
#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
236+
extern int amdgpu_vcnfw_log;
237+
236238
#define AMDGPU_VM_MAX_NUM_CTX 4096
237239
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
238240
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
@@ -274,9 +276,6 @@ extern int amdgpu_num_kcq;
274276
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
275277

276278
struct amdgpu_device;
277-
struct amdgpu_ib;
278-
struct amdgpu_cs_parser;
279-
struct amdgpu_job;
280279
struct amdgpu_irq_src;
281280
struct amdgpu_fpriv;
282281
struct amdgpu_bo_va_mapping;
@@ -464,20 +463,6 @@ struct amdgpu_flip_work {
464463
};
465464

466465

467-
/*
468-
* CP & rings.
469-
*/
470-
471-
struct amdgpu_ib {
472-
struct amdgpu_sa_bo *sa_bo;
473-
uint32_t length_dw;
474-
uint64_t gpu_addr;
475-
uint32_t *ptr;
476-
uint32_t flags;
477-
};
478-
479-
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
480-
481466
/*
482467
* file private structure
483468
*/
@@ -493,79 +478,6 @@ struct amdgpu_fpriv {
493478

494479
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
495480

496-
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
497-
unsigned size,
498-
enum amdgpu_ib_pool_type pool,
499-
struct amdgpu_ib *ib);
500-
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
501-
struct dma_fence *f);
502-
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
503-
struct amdgpu_ib *ibs, struct amdgpu_job *job,
504-
struct dma_fence **f);
505-
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
506-
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
507-
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
508-
509-
/*
510-
* CS.
511-
*/
512-
struct amdgpu_cs_chunk {
513-
uint32_t chunk_id;
514-
uint32_t length_dw;
515-
void *kdata;
516-
};
517-
518-
struct amdgpu_cs_post_dep {
519-
struct drm_syncobj *syncobj;
520-
struct dma_fence_chain *chain;
521-
u64 point;
522-
};
523-
524-
struct amdgpu_cs_parser {
525-
struct amdgpu_device *adev;
526-
struct drm_file *filp;
527-
struct amdgpu_ctx *ctx;
528-
529-
/* chunks */
530-
unsigned nchunks;
531-
struct amdgpu_cs_chunk *chunks;
532-
533-
/* scheduler job object */
534-
struct amdgpu_job *job;
535-
struct drm_sched_entity *entity;
536-
537-
/* buffer objects */
538-
struct ww_acquire_ctx ticket;
539-
struct amdgpu_bo_list *bo_list;
540-
struct amdgpu_mn *mn;
541-
struct amdgpu_bo_list_entry vm_pd;
542-
struct list_head validated;
543-
struct dma_fence *fence;
544-
uint64_t bytes_moved_threshold;
545-
uint64_t bytes_moved_vis_threshold;
546-
uint64_t bytes_moved;
547-
uint64_t bytes_moved_vis;
548-
549-
/* user fence */
550-
struct amdgpu_bo_list_entry uf_entry;
551-
552-
unsigned num_post_deps;
553-
struct amdgpu_cs_post_dep *post_deps;
554-
};
555-
556-
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
557-
uint32_t ib_idx, int idx)
558-
{
559-
return p->job->ibs[ib_idx].ptr[idx];
560-
}
561-
562-
static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
563-
uint32_t ib_idx, int idx,
564-
uint32_t value)
565-
{
566-
p->job->ibs[ib_idx].ptr[idx] = value;
567-
}
568-
569481
/*
570482
* Writeback
571483
*/
@@ -1436,10 +1348,6 @@ static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { retu
14361348
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
14371349
#endif
14381350

1439-
int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1440-
uint64_t addr, struct amdgpu_bo **bo,
1441-
struct amdgpu_bo_va_mapping **mapping);
1442-
14431351
#if defined(CONFIG_DRM_AMD_DC)
14441352
int amdgpu_dm_display_resume(struct amdgpu_device *adev );
14451353
#else

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "gc/gc_10_3_0_sh_mask.h"
2727
#include "oss/osssys_5_0_0_offset.h"
2828
#include "oss/osssys_5_0_0_sh_mask.h"
29+
#include "athub/athub_2_1_0_offset.h"
30+
#include "athub/athub_2_1_0_sh_mask.h"
2931
#include "soc15_common.h"
3032
#include "v10_structs.h"
3133
#include "nv.h"
@@ -606,6 +608,18 @@ static int wave_control_execute_v10_3(struct amdgpu_device *adev,
606608
return 0;
607609
}
608610

611+
static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
612+
uint8_t vmid, uint16_t *p_pasid)
613+
{
614+
uint32_t value;
615+
616+
value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
617+
+ vmid);
618+
*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
619+
620+
return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
621+
}
622+
609623
static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
610624
uint32_t vmid, uint64_t page_table_base)
611625
{
@@ -788,7 +802,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
788802
.hqd_destroy = hqd_destroy_v10_3,
789803
.hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
790804
.wave_control_execute = wave_control_execute_v10_3,
791-
.get_atc_vmid_pasid_mapping_info = NULL,
805+
.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
792806
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
793807
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
794808
#if 0

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
#include <drm/amdgpu_drm.h>
3434
#include <drm/drm_syncobj.h>
35+
#include "amdgpu_cs.h"
3536
#include "amdgpu.h"
3637
#include "amdgpu_trace.h"
3738
#include "amdgpu_gmc.h"
@@ -782,12 +783,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
782783
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
783784
amdgpu_bo_kunmap(aobj);
784785

785-
r = amdgpu_ring_parse_cs(ring, p, j);
786+
r = amdgpu_ring_parse_cs(ring, p, p->job, ib);
786787
if (r)
787788
return r;
788789
} else {
789790
ib->ptr = (uint32_t *)kptr;
790-
r = amdgpu_ring_patch_cs_in_place(ring, p, j);
791+
r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib);
791792
amdgpu_bo_kunmap(aobj);
792793
if (r)
793794
return r;

0 commit comments

Comments
 (0)