Skip to content

Commit e3d0870

Browse files
amd-yahuilalexdeucher
authored andcommitted
drm/amdkfd: Support chain runlists of XNACK+/XNACK-
If the MEC firmware supports chaining runlists of XNACK+/XNACK- processes, set SQ_CONFIG1 chicken bit and SET_RESOURCES bit 28. When the MEC/HWS supports it, KFD checks the XNACK+/XNACK- processes mix happens or not. If it does, enter over-subscription. Signed-off-by: Amber Lin <Amber.Lin@amd.com> Reviewed-by: Philip Yang <Philip.Yang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 9c16e15 commit e3d0870

5 files changed

Lines changed: 84 additions & 12 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@
6262
*/
6363
#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL
6464

65+
/* XNACK flags */
66+
#define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0)
67+
6568
struct firmware;
6669

6770
enum amdgpu_memory_partition {
@@ -301,6 +304,7 @@ struct amdgpu_gmc {
301304
struct amdgpu_xgmi xgmi;
302305
struct amdgpu_irq_src ecc_irq;
303306
int noretry;
307+
uint32_t xnack_flags;
304308

305309
uint32_t vmid0_page_table_block_size;
306310
uint32_t vmid0_page_table_depth;

drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,22 @@ static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id)
12731273
}
12741274
}
12751275

1276+
/* For ASICs that needs xnack chain and MEC version supports, set SG_CONFIG1
1277+
* DISABLE_XNACK_CHECK_IN_RETRY_DISABLE bit and inform KFD to set xnack_chain
1278+
* bit in SET_RESOURCES
1279+
*/
1280+
static void gfx_v9_4_3_xcc_init_sq(struct amdgpu_device *adev, int xcc_id)
1281+
{
1282+
uint32_t data;
1283+
1284+
if (!(adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
1285+
return;
1286+
1287+
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_CONFIG1);
1288+
data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1);
1289+
WREG32_SOC15(GC, xcc_id, regSQ_CONFIG1, data);
1290+
}
1291+
12761292
static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
12771293
int xcc_id)
12781294
{
@@ -1317,6 +1333,7 @@ static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
13171333

13181334
gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id);
13191335
gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id);
1336+
gfx_v9_4_3_xcc_init_sq(adev, xcc_id);
13201337
}
13211338

13221339
static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
@@ -1329,6 +1346,20 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
13291346
adev->gfx.config.db_debug2 =
13301347
RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2);
13311348

1349+
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1350+
/* ToDo: GC 9.4.4 */
1351+
case IP_VERSION(9, 4, 3):
1352+
if (adev->gfx.mec_fw_version >= 184)
1353+
adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
1354+
break;
1355+
case IP_VERSION(9, 5, 0):
1356+
if (adev->gfx.mec_fw_version >= 23)
1357+
adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
1358+
break;
1359+
default:
1360+
break;
1361+
}
1362+
13321363
for (i = 0; i < num_xcc; i++)
13331364
gfx_v9_4_3_xcc_constants_init(adev, i);
13341365
}

drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
3232
#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
3333
#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
34+
#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
3435

3536
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
3637
unsigned int buffer_size_bytes)
@@ -44,7 +45,8 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
4445

4546
static void pm_calc_rlib_size(struct packet_manager *pm,
4647
unsigned int *rlib_size,
47-
int *over_subscription)
48+
int *over_subscription,
49+
int xnack_conflict)
4850
{
4951
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
5052
unsigned int map_queue_size;
@@ -73,6 +75,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
7375
*over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
7476
if (gws_queue_count > 1)
7577
*over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
78+
if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
79+
*over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
7680

7781
if (*over_subscription)
7882
dev_dbg(dev, "Over subscribed runlist\n");
@@ -96,7 +100,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
96100
unsigned int **rl_buffer,
97101
uint64_t *rl_gpu_buffer,
98102
unsigned int *rl_buffer_size,
99-
int *is_over_subscription)
103+
int *is_over_subscription,
104+
int xnack_conflict)
100105
{
101106
struct kfd_node *node = pm->dqm->dev;
102107
struct device *dev = node->adev->dev;
@@ -105,7 +110,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
105110
if (WARN_ON(pm->allocated))
106111
return -EINVAL;
107112

108-
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
113+
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
114+
xnack_conflict);
109115

110116
mutex_lock(&pm->lock);
111117

@@ -142,11 +148,27 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
142148
struct queue *q;
143149
struct kernel_queue *kq;
144150
int is_over_subscription;
151+
int xnack_enabled = -1;
152+
bool xnack_conflict = 0;
145153

146154
rl_wptr = retval = processes_mapped = 0;
147155

156+
/* Check if processes set different xnack modes */
157+
list_for_each_entry(cur, queues, list) {
158+
qpd = cur->qpd;
159+
if (xnack_enabled < 0)
160+
/* First process */
161+
xnack_enabled = qpd->pqm->process->xnack_enabled;
162+
else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
163+
/* Found a process with a different xnack mode */
164+
xnack_conflict = 1;
165+
break;
166+
}
167+
}
168+
148169
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
149-
&alloc_size_bytes, &is_over_subscription);
170+
&alloc_size_bytes, &is_over_subscription,
171+
xnack_conflict);
150172
if (retval)
151173
return retval;
152174

@@ -156,9 +178,13 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
156178
dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
157179
pm->dqm->processes_count, pm->dqm->active_queue_count);
158180

181+
build_runlist_ib:
159182
/* build the run list ib packet */
160183
list_for_each_entry(cur, queues, list) {
161184
qpd = cur->qpd;
185+
/* group processes with the same xnack mode together */
186+
if (qpd->pqm->process->xnack_enabled != xnack_enabled)
187+
continue;
162188
/* build map process packet */
163189
if (processes_mapped >= pm->dqm->processes_count) {
164190
dev_dbg(dev, "Not enough space left in runlist IB\n");
@@ -215,18 +241,26 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
215241
alloc_size_bytes);
216242
}
217243
}
244+
if (xnack_conflict) {
245+
/* pick up processes with the other xnack mode */
246+
xnack_enabled = !xnack_enabled;
247+
xnack_conflict = 0;
248+
goto build_runlist_ib;
249+
}
218250

219251
dev_dbg(dev, "Finished map process and queues to runlist\n");
220252

221253
if (is_over_subscription) {
222254
if (!pm->is_over_subscription)
223-
dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s. Expect reduced ROCm performance.\n",
224-
is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
225-
" too many processes." : "",
226-
is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
227-
" too many queues." : "",
228-
is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
229-
" multiple processes using cooperative launch." : "");
255+
dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
256+
is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
257+
" too many processes" : "",
258+
is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
259+
" too many queues" : "",
260+
is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
261+
" multiple processes using cooperative launch" : "",
262+
is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
263+
" xnack on/off processes mixed on gfx9" : "");
230264

231265
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
232266
*rl_gpu_addr,

drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
203203
queue_type__mes_set_resources__hsa_interface_queue_hiq;
204204
packet->bitfields2.vmid_mask = res->vmid_mask;
205205
packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
206+
if (pm->dqm->dev->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)
207+
packet->bitfields2.enb_xnack_retry_disable_check = 1;
206208
packet->bitfields7.oac_mask = res->oac_mask;
207209
packet->bitfields8.gds_heap_base = res->gds_heap_base;
208210
packet->bitfields8.gds_heap_size = res->gds_heap_size;

drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ struct pm4_mes_set_resources {
6363
struct {
6464
uint32_t vmid_mask:16;
6565
uint32_t unmap_latency:8;
66-
uint32_t reserved1:5;
66+
uint32_t reserved1:4;
67+
uint32_t enb_xnack_retry_disable_check:1;
6768
enum mes_set_resources_queue_type_enum queue_type:3;
6869
} bitfields2;
6970
uint32_t ordinal2;

0 commit comments

Comments
 (0)