Skip to content

Commit 0752e66

Browse files
mukjoshialexdeucher
authored andcommitted
drm/amdkfd: Update cache info reporting for GFX v9.4.3
Update cache info reporting in sysfs to report the correct number of CUs and associated cache information based on different spatial partitioning modes. Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 97e3c6a commit 0752e66

3 files changed

Lines changed: 51 additions & 37 deletions

File tree

drivers/gpu/drm/amd/amdkfd/kfd_crat.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ struct crat_header {
7979
#define CRAT_SUBTYPE_IOLINK_AFFINITY 5
8080
#define CRAT_SUBTYPE_MAX 6
8181

82+
/*
83+
* Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
84+
* as it breaks the ABI.
85+
*/
8286
#define CRAT_SIBLINGMAP_SIZE 32
8387

8488
/*

drivers/gpu/drm/amd/amdkfd/kfd_topology.c

Lines changed: 46 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
15961596
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
15971597
struct kfd_gpu_cache_info *pcache_info,
15981598
struct kfd_cu_info *cu_info,
1599-
int cache_type, unsigned int cu_processor_id)
1599+
int cache_type, unsigned int cu_processor_id,
1600+
struct kfd_node *knode)
16001601
{
16011602
unsigned int cu_sibling_map_mask;
16021603
int first_active_cu;
1603-
int i, j, k;
1604+
int i, j, k, xcc, start, end;
16041605
struct kfd_cache_properties *pcache = NULL;
16051606

1606-
cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0];
1607+
start = ffs(knode->xcc_mask) - 1;
1608+
end = start + NUM_XCC(knode->xcc_mask);
1609+
cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
16071610
cu_sibling_map_mask &=
16081611
((1 << pcache_info[cache_type].num_cu_shared) - 1);
16091612
first_active_cu = ffs(cu_sibling_map_mask);
@@ -1638,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
16381641
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
16391642
k = 0;
16401643

1641-
for (i = 0; i < cu_info->num_shader_engines; i++) {
1642-
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
1643-
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
1644-
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
1645-
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
1646-
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
1647-
k += 4;
1648-
1649-
cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4];
1650-
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
1644+
for (xcc = start; xcc < end; xcc++) {
1645+
for (i = 0; i < cu_info->num_shader_engines; i++) {
1646+
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
1647+
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
1648+
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
1649+
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
1650+
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
1651+
k += 4;
1652+
1653+
cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
1654+
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
1655+
}
16511656
}
16521657
}
16531658
pcache->sibling_map_size = k;
@@ -1665,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
16651670
static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
16661671
{
16671672
struct kfd_gpu_cache_info *pcache_info = NULL;
1668-
int i, j, k;
1673+
int i, j, k, xcc, start, end;
16691674
int ct = 0;
16701675
unsigned int cu_processor_id;
16711676
int ret;
@@ -1699,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
16991704
* then it will consider only one CU from
17001705
* the shared unit
17011706
*/
1707+
start = ffs(kdev->xcc_mask) - 1;
1708+
end = start + NUM_XCC(kdev->xcc_mask);
1709+
17021710
for (ct = 0; ct < num_of_cache_types; ct++) {
17031711
cu_processor_id = gpu_processor_id;
17041712
if (pcache_info[ct].cache_level == 1) {
1705-
for (i = 0; i < pcu_info->num_shader_engines; i++) {
1706-
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
1707-
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
1708-
1709-
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
1710-
pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct,
1711-
cu_processor_id, k);
1712-
1713-
if (ret < 0)
1714-
break;
1715-
1716-
if (!ret) {
1717-
num_of_entries++;
1718-
list_add_tail(&props_ext->list, &dev->cache_props);
1713+
for (xcc = start; xcc < end; xcc++) {
1714+
for (i = 0; i < pcu_info->num_shader_engines; i++) {
1715+
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
1716+
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
1717+
1718+
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
1719+
pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
1720+
cu_processor_id, k);
1721+
1722+
if (ret < 0)
1723+
break;
1724+
1725+
if (!ret) {
1726+
num_of_entries++;
1727+
list_add_tail(&props_ext->list, &dev->cache_props);
1728+
}
1729+
1730+
/* Move to next CU block */
1731+
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
1732+
pcu_info->num_cu_per_sh) ?
1733+
pcache_info[ct].num_cu_shared :
1734+
(pcu_info->num_cu_per_sh - k);
1735+
cu_processor_id += num_cu_shared;
17191736
}
1720-
1721-
/* Move to next CU block */
1722-
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
1723-
pcu_info->num_cu_per_sh) ?
1724-
pcache_info[ct].num_cu_shared :
1725-
(pcu_info->num_cu_per_sh - k);
1726-
cu_processor_id += num_cu_shared;
17271737
}
17281738
}
17291739
}
17301740
} else {
17311741
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
1732-
pcu_info, ct, cu_processor_id);
1742+
pcu_info, ct, cu_processor_id, kdev);
17331743

17341744
if (ret < 0)
17351745
break;

drivers/gpu/drm/amd/amdkfd/kfd_topology.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ struct kfd_mem_properties {
8989
struct attribute attr;
9090
};
9191

92-
#define CACHE_SIBLINGMAP_SIZE 64
92+
#define CACHE_SIBLINGMAP_SIZE 128
9393

9494
struct kfd_cache_properties {
9595
struct list_head list;

0 commit comments

Comments
 (0)