Skip to content

Commit 97e3c6a

Browse files
mukjoshialexdeucher
authored andcommitted
drm/amdgpu: Store CU info from all XCCs for GFX v9.4.3
Currently, we store CU info only for a single XCC assuming that it is the same for all XCCs. However, that may not be true. As a result, store CU info for all XCCs. This info is later used for CU masking. Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 2f06b27 commit 97e3c6a

14 files changed

Lines changed: 60 additions & 65 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c
478478
cu_info->cu_active_number = acu_info.number;
479479
cu_info->cu_ao_mask = acu_info.ao_cu_mask;
480480
memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
481-
sizeof(acu_info.bitmap));
481+
sizeof(cu_info->cu_bitmap));
482482
cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
483483
cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
484484
cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
4444

4545
#define AMDGPU_MAX_GC_INSTANCES 8
46+
#define KGD_MAX_QUEUES 128
4647

4748
#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
4849
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@@ -257,7 +258,7 @@ struct amdgpu_cu_info {
257258
uint32_t number;
258259
uint32_t ao_cu_mask;
259260
uint32_t ao_cu_bitmap[4][4];
260-
uint32_t bitmap[4][4];
261+
uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
261262
};
262263

263264
struct amdgpu_gfx_ras {

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
839839
memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
840840
sizeof(adev->gfx.cu_info.ao_cu_bitmap));
841841
memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
842-
sizeof(adev->gfx.cu_info.bitmap));
842+
sizeof(dev_info->cu_bitmap));
843843
dev_info->vram_type = adev->gmc.vram_type;
844844
dev_info->vram_bit_width = adev->gmc.vram_width;
845845
dev_info->vce_harvest_config = adev->vce.harvest_config;

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9449,7 +9449,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
94499449
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
94509450
adev, disable_masks[i * 2 + j]);
94519451
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
9452-
cu_info->bitmap[i][j] = bitmap;
9452+
cu_info->bitmap[0][i][j] = bitmap;
94539453

94549454
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
94559455
if (bitmap & mask) {

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6368,7 +6368,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
63686368
* SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
63696369
* SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
63706370
*/
6371-
cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
6371+
cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
63726372

63736373
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
63746374
if (bitmap & mask)

drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
35773577
gfx_v6_0_set_user_cu_inactive_bitmap(
35783578
adev, disable_masks[i * 2 + j]);
35793579
bitmap = gfx_v6_0_get_cu_enabled(adev);
3580-
cu_info->bitmap[i][j] = bitmap;
3580+
cu_info->bitmap[0][i][j] = bitmap;
35813581

35823582
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
35833583
if (bitmap & mask) {

drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5119,7 +5119,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
51195119
gfx_v7_0_set_user_cu_inactive_bitmap(
51205120
adev, disable_masks[i * 2 + j]);
51215121
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5122-
cu_info->bitmap[i][j] = bitmap;
5122+
cu_info->bitmap[0][i][j] = bitmap;
51235123

51245124
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
51255125
if (bitmap & mask) {

drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
71217121
gfx_v8_0_set_user_cu_inactive_bitmap(
71227122
adev, disable_masks[i * 2 + j]);
71237123
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7124-
cu_info->bitmap[i][j] = bitmap;
7124+
cu_info->bitmap[0][i][j] = bitmap;
71257125

71267126
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
71277127
if (bitmap & mask) {

drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
14991499
amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
15001500

15011501
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1502-
if (cu_info->bitmap[i][j] & mask) {
1502+
if (cu_info->bitmap[0][i][j] & mask) {
15031503
if (counter == pg_always_on_cu_num)
15041504
WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
15051505
if (counter < always_on_cu_num)
@@ -7233,7 +7233,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
72337233
* SE6,SH0 --> bitmap[2][1]
72347234
* SE7,SH0 --> bitmap[3][1]
72357235
*/
7236-
cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7236+
cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
72377237

72387238
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
72397239
if (bitmap & mask) {

drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

Lines changed: 34 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4259,7 +4259,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
42594259
}
42604260

42614261
static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
4262-
u32 bitmap)
4262+
u32 bitmap, int xcc_id)
42634263
{
42644264
u32 data;
42654265

@@ -4269,15 +4269,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
42694269
data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
42704270
data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
42714271

4272-
WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data);
4272+
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
42734273
}
42744274

4275-
static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
4275+
static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
42764276
{
42774277
u32 data, mask;
42784278

4279-
data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG);
4280-
data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG);
4279+
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
4280+
data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
42814281

42824282
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
42834283
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
@@ -4290,7 +4290,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
42904290
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
42914291
struct amdgpu_cu_info *cu_info)
42924292
{
4293-
int i, j, k, counter, active_cu_number = 0;
4293+
int i, j, k, counter, xcc_id, active_cu_number = 0;
42944294
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
42954295
unsigned disable_masks[4 * 4];
42964296

@@ -4309,46 +4309,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
43094309
adev->gfx.config.max_sh_per_se);
43104310

43114311
mutex_lock(&adev->grbm_idx_mutex);
4312-
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4313-
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4314-
mask = 1;
4315-
ao_bitmap = 0;
4316-
counter = 0;
4317-
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0);
4318-
gfx_v9_4_3_set_user_cu_inactive_bitmap(
4319-
adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
4320-
bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev);
4321-
4322-
/*
4323-
* The bitmap(and ao_cu_bitmap) in cu_info structure is
4324-
* 4x4 size array, and it's usually suitable for Vega
4325-
* ASICs which has 4*2 SE/SH layout.
4326-
* But for Arcturus, SE/SH layout is changed to 8*1.
4327-
* To mostly reduce the impact, we make it compatible
4328-
* with current bitmap array as below:
4329-
* SE4,SH0 --> bitmap[0][1]
4330-
* SE5,SH0 --> bitmap[1][1]
4331-
* SE6,SH0 --> bitmap[2][1]
4332-
* SE7,SH0 --> bitmap[3][1]
4333-
*/
4334-
cu_info->bitmap[i % 4][j + i / 4] = bitmap;
4335-
4336-
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
4337-
if (bitmap & mask) {
4338-
if (counter < adev->gfx.config.max_cu_per_sh)
4339-
ao_bitmap |= mask;
4340-
counter++;
4312+
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
4313+
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4314+
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4315+
mask = 1;
4316+
ao_bitmap = 0;
4317+
counter = 0;
4318+
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
4319+
gfx_v9_4_3_set_user_cu_inactive_bitmap(
4320+
adev,
4321+
disable_masks[i * adev->gfx.config.max_sh_per_se + j],
4322+
xcc_id);
4323+
bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);
4324+
4325+
cu_info->bitmap[xcc_id][i][j] = bitmap;
4326+
4327+
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
4328+
if (bitmap & mask) {
4329+
if (counter < adev->gfx.config.max_cu_per_sh)
4330+
ao_bitmap |= mask;
4331+
counter++;
4332+
}
4333+
mask <<= 1;
43414334
}
4342-
mask <<= 1;
4335+
active_cu_number += counter;
4336+
if (i < 2 && j < 2)
4337+
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4338+
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
43434339
}
4344-
active_cu_number += counter;
4345-
if (i < 2 && j < 2)
4346-
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4347-
cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
43484340
}
4341+
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
4342+
xcc_id);
43494343
}
4350-
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
4351-
0);
43524344
mutex_unlock(&adev->grbm_idx_mutex);
43534345

43544346
cu_info->number = active_cu_number;

0 commit comments

Comments
 (0)