@@ -1596,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
15961596static int fill_in_l2_l3_pcache (struct kfd_cache_properties * * props_ext ,
15971597 struct kfd_gpu_cache_info * pcache_info ,
15981598 struct kfd_cu_info * cu_info ,
1599- int cache_type , unsigned int cu_processor_id )
1599+ int cache_type , unsigned int cu_processor_id ,
1600+ struct kfd_node * knode )
16001601{
16011602 unsigned int cu_sibling_map_mask ;
16021603 int first_active_cu ;
1603- int i , j , k ;
1604+ int i , j , k , xcc , start , end ;
16041605 struct kfd_cache_properties * pcache = NULL ;
16051606
1606- cu_sibling_map_mask = cu_info -> cu_bitmap [0 ][0 ][0 ];
1607+ start = ffs (knode -> xcc_mask ) - 1 ;
1608+ end = start + NUM_XCC (knode -> xcc_mask );
1609+ cu_sibling_map_mask = cu_info -> cu_bitmap [start ][0 ][0 ];
16071610 cu_sibling_map_mask &=
16081611 ((1 << pcache_info [cache_type ].num_cu_shared ) - 1 );
16091612 first_active_cu = ffs (cu_sibling_map_mask );
@@ -1638,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
16381641 cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1 );
16391642 k = 0 ;
16401643
1641- for (i = 0 ; i < cu_info -> num_shader_engines ; i ++ ) {
1642- for (j = 0 ; j < cu_info -> num_shader_arrays_per_engine ; j ++ ) {
1643- pcache -> sibling_map [k ] = (uint8_t )(cu_sibling_map_mask & 0xFF );
1644- pcache -> sibling_map [k + 1 ] = (uint8_t )((cu_sibling_map_mask >> 8 ) & 0xFF );
1645- pcache -> sibling_map [k + 2 ] = (uint8_t )((cu_sibling_map_mask >> 16 ) & 0xFF );
1646- pcache -> sibling_map [k + 3 ] = (uint8_t )((cu_sibling_map_mask >> 24 ) & 0xFF );
1647- k += 4 ;
1648-
1649- cu_sibling_map_mask = cu_info -> cu_bitmap [0 ][i % 4 ][j + i / 4 ];
1650- cu_sibling_map_mask &= ((1 << pcache_info [cache_type ].num_cu_shared ) - 1 );
1644+ for (xcc = start ; xcc < end ; xcc ++ ) {
1645+ for (i = 0 ; i < cu_info -> num_shader_engines ; i ++ ) {
1646+ for (j = 0 ; j < cu_info -> num_shader_arrays_per_engine ; j ++ ) {
1647+ pcache -> sibling_map [k ] = (uint8_t )(cu_sibling_map_mask & 0xFF );
1648+ pcache -> sibling_map [k + 1 ] = (uint8_t )((cu_sibling_map_mask >> 8 ) & 0xFF );
1649+ pcache -> sibling_map [k + 2 ] = (uint8_t )((cu_sibling_map_mask >> 16 ) & 0xFF );
1650+ pcache -> sibling_map [k + 3 ] = (uint8_t )((cu_sibling_map_mask >> 24 ) & 0xFF );
1651+ k += 4 ;
1652+
1653+ cu_sibling_map_mask = cu_info -> cu_bitmap [xcc ][i % 4 ][j + i / 4 ];
1654+ cu_sibling_map_mask &= ((1 << pcache_info [cache_type ].num_cu_shared ) - 1 );
1655+ }
16511656 }
16521657 }
16531658 pcache -> sibling_map_size = k ;
@@ -1665,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
16651670static void kfd_fill_cache_non_crat_info (struct kfd_topology_device * dev , struct kfd_node * kdev )
16661671{
16671672 struct kfd_gpu_cache_info * pcache_info = NULL ;
1668- int i , j , k ;
1673+ int i , j , k , xcc , start , end ;
16691674 int ct = 0 ;
16701675 unsigned int cu_processor_id ;
16711676 int ret ;
@@ -1699,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
16991704 * then it will consider only one CU from
17001705 * the shared unit
17011706 */
1707+ start = ffs (kdev -> xcc_mask ) - 1 ;
1708+ end = start + NUM_XCC (kdev -> xcc_mask );
1709+
17021710 for (ct = 0 ; ct < num_of_cache_types ; ct ++ ) {
17031711 cu_processor_id = gpu_processor_id ;
17041712 if (pcache_info [ct ].cache_level == 1 ) {
1705- for (i = 0 ; i < pcu_info -> num_shader_engines ; i ++ ) {
1706- for (j = 0 ; j < pcu_info -> num_shader_arrays_per_engine ; j ++ ) {
1707- for (k = 0 ; k < pcu_info -> num_cu_per_sh ; k += pcache_info [ct ].num_cu_shared ) {
1708-
1709- ret = fill_in_l1_pcache (& props_ext , pcache_info , pcu_info ,
1710- pcu_info -> cu_bitmap [0 ][i % 4 ][j + i / 4 ], ct ,
1711- cu_processor_id , k );
1712-
1713- if (ret < 0 )
1714- break ;
1715-
1716- if (!ret ) {
1717- num_of_entries ++ ;
1718- list_add_tail (& props_ext -> list , & dev -> cache_props );
1713+ for (xcc = start ; xcc < end ; xcc ++ ) {
1714+ for (i = 0 ; i < pcu_info -> num_shader_engines ; i ++ ) {
1715+ for (j = 0 ; j < pcu_info -> num_shader_arrays_per_engine ; j ++ ) {
1716+ for (k = 0 ; k < pcu_info -> num_cu_per_sh ; k += pcache_info [ct ].num_cu_shared ) {
1717+
1718+ ret = fill_in_l1_pcache (& props_ext , pcache_info , pcu_info ,
1719+ pcu_info -> cu_bitmap [xcc ][i % 4 ][j + i / 4 ], ct ,
1720+ cu_processor_id , k );
1721+
1722+ if (ret < 0 )
1723+ break ;
1724+
1725+ if (!ret ) {
1726+ num_of_entries ++ ;
1727+ list_add_tail (& props_ext -> list , & dev -> cache_props );
1728+ }
1729+
1730+ /* Move to next CU block */
1731+ num_cu_shared = ((k + pcache_info [ct ].num_cu_shared ) <=
1732+ pcu_info -> num_cu_per_sh ) ?
1733+ pcache_info [ct ].num_cu_shared :
1734+ (pcu_info -> num_cu_per_sh - k );
1735+ cu_processor_id += num_cu_shared ;
17191736 }
1720-
1721- /* Move to next CU block */
1722- num_cu_shared = ((k + pcache_info [ct ].num_cu_shared ) <=
1723- pcu_info -> num_cu_per_sh ) ?
1724- pcache_info [ct ].num_cu_shared :
1725- (pcu_info -> num_cu_per_sh - k );
1726- cu_processor_id += num_cu_shared ;
17271737 }
17281738 }
17291739 }
17301740 } else {
17311741 ret = fill_in_l2_l3_pcache (& props_ext , pcache_info ,
1732- pcu_info , ct , cu_processor_id );
1742+ pcu_info , ct , cu_processor_id , kdev );
17331743
17341744 if (ret < 0 )
17351745 break ;
0 commit comments