@@ -1426,12 +1426,47 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
14261426 return cs_mode ;
14271427}
14281428
1429+ static int __addr_mask_to_cs_size (u32 addr_mask_orig , unsigned int cs_mode ,
1430+ int csrow_nr , int dimm )
1431+ {
1432+ u32 msb , weight , num_zero_bits ;
1433+ u32 addr_mask_deinterleaved ;
1434+ int size = 0 ;
1435+
1436+ /*
1437+ * The number of zero bits in the mask is equal to the number of bits
1438+ * in a full mask minus the number of bits in the current mask.
1439+ *
1440+ * The MSB is the number of bits in the full mask because BIT[0] is
1441+ * always 0.
1442+ *
1443+ * In the special 3 Rank interleaving case, a single bit is flipped
1444+ * without swapping with the most significant bit. This can be handled
1445+ * by keeping the MSB where it is and ignoring the single zero bit.
1446+ */
1447+ msb = fls (addr_mask_orig ) - 1 ;
1448+ weight = hweight_long (addr_mask_orig );
1449+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE );
1450+
1451+ /* Take the number of zero bits off from the top of the mask. */
1452+ addr_mask_deinterleaved = GENMASK_ULL (msb - num_zero_bits , 1 );
1453+
1454+ edac_dbg (1 , "CS%d DIMM%d AddrMasks:\n" , csrow_nr , dimm );
1455+ edac_dbg (1 , " Original AddrMask: 0x%x\n" , addr_mask_orig );
1456+ edac_dbg (1 , " Deinterleaved AddrMask: 0x%x\n" , addr_mask_deinterleaved );
1457+
1458+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
1459+ size = (addr_mask_deinterleaved >> 2 ) + 1 ;
1460+
1461+ /* Return size in MBs. */
1462+ return size >> 10 ;
1463+ }
1464+
14291465static int umc_addr_mask_to_cs_size (struct amd64_pvt * pvt , u8 umc ,
14301466 unsigned int cs_mode , int csrow_nr )
14311467{
1432- u32 addr_mask_orig , addr_mask_deinterleaved ;
1433- u32 msb , weight , num_zero_bits ;
14341468 int cs_mask_nr = csrow_nr ;
1469+ u32 addr_mask_orig ;
14351470 int dimm , size = 0 ;
14361471
14371472 /* No Chip Selects are enabled. */
@@ -1475,33 +1510,7 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
14751510 else
14761511 addr_mask_orig = pvt -> csels [umc ].csmasks [cs_mask_nr ];
14771512
1478- /*
1479- * The number of zero bits in the mask is equal to the number of bits
1480- * in a full mask minus the number of bits in the current mask.
1481- *
1482- * The MSB is the number of bits in the full mask because BIT[0] is
1483- * always 0.
1484- *
1485- * In the special 3 Rank interleaving case, a single bit is flipped
1486- * without swapping with the most significant bit. This can be handled
1487- * by keeping the MSB where it is and ignoring the single zero bit.
1488- */
1489- msb = fls (addr_mask_orig ) - 1 ;
1490- weight = hweight_long (addr_mask_orig );
1491- num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE );
1492-
1493- /* Take the number of zero bits off from the top of the mask. */
1494- addr_mask_deinterleaved = GENMASK_ULL (msb - num_zero_bits , 1 );
1495-
1496- edac_dbg (1 , "CS%d DIMM%d AddrMasks:\n" , csrow_nr , dimm );
1497- edac_dbg (1 , " Original AddrMask: 0x%x\n" , addr_mask_orig );
1498- edac_dbg (1 , " Deinterleaved AddrMask: 0x%x\n" , addr_mask_deinterleaved );
1499-
1500- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
1501- size = (addr_mask_deinterleaved >> 2 ) + 1 ;
1502-
1503- /* Return size in MBs. */
1504- return size >> 10 ;
1513+ return __addr_mask_to_cs_size (addr_mask_orig , cs_mode , csrow_nr , dimm );
15051514}
15061515
15071516static void umc_debug_display_dimm_sizes (struct amd64_pvt * pvt , u8 ctrl )
@@ -3675,6 +3684,221 @@ static int umc_hw_info_get(struct amd64_pvt *pvt)
36753684 return 0 ;
36763685}
36773686
3687+ /*
3688+ * The CPUs have one channel per UMC, so UMC number is equivalent to a
3689+ * channel number. The GPUs have 8 channels per UMC, so the UMC number no
3690+ * longer works as a channel number.
3691+ *
3692+ * The channel number within a GPU UMC is given in MCA_IPID[15:12].
3693+ * However, the IDs are split such that two UMC values go to one UMC, and
3694+ * the channel numbers are split in two groups of four.
3695+ *
3696+ * Refer to comment on gpu_get_umc_base().
3697+ *
3698+ * For example,
3699+ * UMC0 CH[3:0] = 0x0005[3:0]000
3700+ * UMC0 CH[7:4] = 0x0015[3:0]000
3701+ * UMC1 CH[3:0] = 0x0025[3:0]000
3702+ * UMC1 CH[7:4] = 0x0035[3:0]000
3703+ */
3704+ static void gpu_get_err_info (struct mce * m , struct err_info * err )
3705+ {
3706+ u8 ch = (m -> ipid & GENMASK (31 , 0 )) >> 20 ;
3707+ u8 phy = ((m -> ipid >> 12 ) & 0xf );
3708+
3709+ err -> channel = ch % 2 ? phy + 4 : phy ;
3710+ err -> csrow = phy ;
3711+ }
3712+
3713+ static int gpu_addr_mask_to_cs_size (struct amd64_pvt * pvt , u8 umc ,
3714+ unsigned int cs_mode , int csrow_nr )
3715+ {
3716+ u32 addr_mask_orig = pvt -> csels [umc ].csmasks [csrow_nr ];
3717+
3718+ return __addr_mask_to_cs_size (addr_mask_orig , cs_mode , csrow_nr , csrow_nr >> 1 );
3719+ }
3720+
3721+ static void gpu_debug_display_dimm_sizes (struct amd64_pvt * pvt , u8 ctrl )
3722+ {
3723+ int size , cs_mode , cs = 0 ;
3724+
3725+ edac_printk (KERN_DEBUG , EDAC_MC , "UMC%d chip selects:\n" , ctrl );
3726+
3727+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY ;
3728+
3729+ for_each_chip_select (cs , ctrl , pvt ) {
3730+ size = gpu_addr_mask_to_cs_size (pvt , ctrl , cs_mode , cs );
3731+ amd64_info (EDAC_MC ": %d: %5dMB\n" , cs , size );
3732+ }
3733+ }
3734+
3735+ static void gpu_dump_misc_regs (struct amd64_pvt * pvt )
3736+ {
3737+ struct amd64_umc * umc ;
3738+ u32 i ;
3739+
3740+ for_each_umc (i ) {
3741+ umc = & pvt -> umc [i ];
3742+
3743+ edac_dbg (1 , "UMC%d UMC cfg: 0x%x\n" , i , umc -> umc_cfg );
3744+ edac_dbg (1 , "UMC%d SDP ctrl: 0x%x\n" , i , umc -> sdp_ctrl );
3745+ edac_dbg (1 , "UMC%d ECC ctrl: 0x%x\n" , i , umc -> ecc_ctrl );
3746+ edac_dbg (1 , "UMC%d All HBMs support ECC: yes\n" , i );
3747+
3748+ gpu_debug_display_dimm_sizes (pvt , i );
3749+ }
3750+ }
3751+
3752+ static u32 gpu_get_csrow_nr_pages (struct amd64_pvt * pvt , u8 dct , int csrow_nr )
3753+ {
3754+ u32 nr_pages ;
3755+ int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY ;
3756+
3757+ nr_pages = gpu_addr_mask_to_cs_size (pvt , dct , cs_mode , csrow_nr );
3758+ nr_pages <<= 20 - PAGE_SHIFT ;
3759+
3760+ edac_dbg (0 , "csrow: %d, channel: %d\n" , csrow_nr , dct );
3761+ edac_dbg (0 , "nr_pages/channel: %u\n" , nr_pages );
3762+
3763+ return nr_pages ;
3764+ }
3765+
3766+ static void gpu_init_csrows (struct mem_ctl_info * mci )
3767+ {
3768+ struct amd64_pvt * pvt = mci -> pvt_info ;
3769+ struct dimm_info * dimm ;
3770+ u8 umc , cs ;
3771+
3772+ for_each_umc (umc ) {
3773+ for_each_chip_select (cs , umc , pvt ) {
3774+ if (!csrow_enabled (cs , umc , pvt ))
3775+ continue ;
3776+
3777+ dimm = mci -> csrows [umc ]-> channels [cs ]-> dimm ;
3778+
3779+ edac_dbg (1 , "MC node: %d, csrow: %d\n" ,
3780+ pvt -> mc_node_id , cs );
3781+
3782+ dimm -> nr_pages = gpu_get_csrow_nr_pages (pvt , umc , cs );
3783+ dimm -> edac_mode = EDAC_SECDED ;
3784+ dimm -> mtype = MEM_HBM2 ;
3785+ dimm -> dtype = DEV_X16 ;
3786+ dimm -> grain = 64 ;
3787+ }
3788+ }
3789+ }
3790+
3791+ static void gpu_setup_mci_misc_attrs (struct mem_ctl_info * mci )
3792+ {
3793+ struct amd64_pvt * pvt = mci -> pvt_info ;
3794+
3795+ mci -> mtype_cap = MEM_FLAG_HBM2 ;
3796+ mci -> edac_ctl_cap = EDAC_FLAG_SECDED ;
3797+
3798+ mci -> edac_cap = EDAC_FLAG_EC ;
3799+ mci -> mod_name = EDAC_MOD_STR ;
3800+ mci -> ctl_name = pvt -> ctl_name ;
3801+ mci -> dev_name = pci_name (pvt -> F3 );
3802+ mci -> ctl_page_to_phys = NULL ;
3803+
3804+ gpu_init_csrows (mci );
3805+ }
3806+
3807+ /* ECC is enabled by default on GPU nodes */
3808+ static bool gpu_ecc_enabled (struct amd64_pvt * pvt )
3809+ {
3810+ return true;
3811+ }
3812+
3813+ static inline u32 gpu_get_umc_base (u8 umc , u8 channel )
3814+ {
3815+ /*
3816+ * On CPUs, there is one channel per UMC, so UMC numbering equals
3817+ * channel numbering. On GPUs, there are eight channels per UMC,
3818+ * so the channel numbering is different from UMC numbering.
3819+ *
3820+ * On CPU nodes channels are selected in 6th nibble
3821+ * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000;
3822+ *
3823+ * On GPU nodes channels are selected in 3rd nibble
3824+ * HBM chX[3:0]= [Y ]5X[3:0]000;
3825+ * HBM chX[7:4]= [Y+1]5X[3:0]000
3826+ */
3827+ umc *= 2 ;
3828+
3829+ if (channel >= 4 )
3830+ umc ++ ;
3831+
3832+ return 0x50000 + (umc << 20 ) + ((channel % 4 ) << 12 );
3833+ }
3834+
3835+ static void gpu_read_mc_regs (struct amd64_pvt * pvt )
3836+ {
3837+ u8 nid = pvt -> mc_node_id ;
3838+ struct amd64_umc * umc ;
3839+ u32 i , umc_base ;
3840+
3841+ /* Read registers from each UMC */
3842+ for_each_umc (i ) {
3843+ umc_base = gpu_get_umc_base (i , 0 );
3844+ umc = & pvt -> umc [i ];
3845+
3846+ amd_smn_read (nid , umc_base + UMCCH_UMC_CFG , & umc -> umc_cfg );
3847+ amd_smn_read (nid , umc_base + UMCCH_SDP_CTRL , & umc -> sdp_ctrl );
3848+ amd_smn_read (nid , umc_base + UMCCH_ECC_CTRL , & umc -> ecc_ctrl );
3849+ }
3850+ }
3851+
3852+ static void gpu_read_base_mask (struct amd64_pvt * pvt )
3853+ {
3854+ u32 base_reg , mask_reg ;
3855+ u32 * base , * mask ;
3856+ int umc , cs ;
3857+
3858+ for_each_umc (umc ) {
3859+ for_each_chip_select (cs , umc , pvt ) {
3860+ base_reg = gpu_get_umc_base (umc , cs ) + UMCCH_BASE_ADDR ;
3861+ base = & pvt -> csels [umc ].csbases [cs ];
3862+
3863+ if (!amd_smn_read (pvt -> mc_node_id , base_reg , base )) {
3864+ edac_dbg (0 , " DCSB%d[%d]=0x%08x reg: 0x%x\n" ,
3865+ umc , cs , * base , base_reg );
3866+ }
3867+
3868+ mask_reg = gpu_get_umc_base (umc , cs ) + UMCCH_ADDR_MASK ;
3869+ mask = & pvt -> csels [umc ].csmasks [cs ];
3870+
3871+ if (!amd_smn_read (pvt -> mc_node_id , mask_reg , mask )) {
3872+ edac_dbg (0 , " DCSM%d[%d]=0x%08x reg: 0x%x\n" ,
3873+ umc , cs , * mask , mask_reg );
3874+ }
3875+ }
3876+ }
3877+ }
3878+
3879+ static void gpu_prep_chip_selects (struct amd64_pvt * pvt )
3880+ {
3881+ int umc ;
3882+
3883+ for_each_umc (umc ) {
3884+ pvt -> csels [umc ].b_cnt = 8 ;
3885+ pvt -> csels [umc ].m_cnt = 8 ;
3886+ }
3887+ }
3888+
3889+ static int gpu_hw_info_get (struct amd64_pvt * pvt )
3890+ {
3891+ pvt -> umc = kcalloc (pvt -> max_mcs , sizeof (struct amd64_umc ), GFP_KERNEL );
3892+ if (!pvt -> umc )
3893+ return - ENOMEM ;
3894+
3895+ gpu_prep_chip_selects (pvt );
3896+ gpu_read_base_mask (pvt );
3897+ gpu_read_mc_regs (pvt );
3898+
3899+ return 0 ;
3900+ }
3901+
36783902static void hw_info_put (struct amd64_pvt * pvt )
36793903{
36803904 pci_dev_put (pvt -> F1 );
@@ -3690,6 +3914,14 @@ static struct low_ops umc_ops = {
36903914 .get_err_info = umc_get_err_info ,
36913915};
36923916
3917+ static struct low_ops gpu_ops = {
3918+ .hw_info_get = gpu_hw_info_get ,
3919+ .ecc_enabled = gpu_ecc_enabled ,
3920+ .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs ,
3921+ .dump_misc_regs = gpu_dump_misc_regs ,
3922+ .get_err_info = gpu_get_err_info ,
3923+ };
3924+
36933925/* Use Family 16h versions for defaults and adjust as needed below. */
36943926static struct low_ops dct_ops = {
36953927 .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow ,
@@ -3813,6 +4045,16 @@ static int per_family_init(struct amd64_pvt *pvt)
38134045 case 0x20 ... 0x2f :
38144046 pvt -> ctl_name = "F19h_M20h" ;
38154047 break ;
4048+ case 0x30 ... 0x3f :
4049+ if (pvt -> F3 -> device == PCI_DEVICE_ID_AMD_MI200_DF_F3 ) {
4050+ pvt -> ctl_name = "MI200" ;
4051+ pvt -> max_mcs = 4 ;
4052+ pvt -> ops = & gpu_ops ;
4053+ } else {
4054+ pvt -> ctl_name = "F19h_M30h" ;
4055+ pvt -> max_mcs = 8 ;
4056+ }
4057+ break ;
38164058 case 0x50 ... 0x5f :
38174059 pvt -> ctl_name = "F19h_M50h" ;
38184060 break ;
@@ -3846,11 +4088,17 @@ static int init_one_instance(struct amd64_pvt *pvt)
38464088 struct edac_mc_layer layers [2 ];
38474089 int ret = - ENOMEM ;
38484090
4091+ /*
4092+ * For Heterogeneous family EDAC CHIP_SELECT and CHANNEL layers should
4093+ * be swapped to fit into the layers.
4094+ */
38494095 layers [0 ].type = EDAC_MC_LAYER_CHIP_SELECT ;
3850- layers [0 ].size = pvt -> csels [0 ].b_cnt ;
4096+ layers [0 ].size = (pvt -> F3 -> device == PCI_DEVICE_ID_AMD_MI200_DF_F3 ) ?
4097+ pvt -> max_mcs : pvt -> csels [0 ].b_cnt ;
38514098 layers [0 ].is_virt_csrow = true;
38524099 layers [1 ].type = EDAC_MC_LAYER_CHANNEL ;
3853- layers [1 ].size = pvt -> max_mcs ;
4100+ layers [1 ].size = (pvt -> F3 -> device == PCI_DEVICE_ID_AMD_MI200_DF_F3 ) ?
4101+ pvt -> csels [0 ].b_cnt : pvt -> max_mcs ;
38544102 layers [1 ].is_virt_csrow = false;
38554103
38564104 mci = edac_mc_alloc (pvt -> mc_node_id , ARRAY_SIZE (layers ), layers , 0 );
0 commit comments