Skip to content

Commit 1dee7f5

Browse files
committed
Merge tag 'edac_updates_for_v6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov: - The EDAC drivers part of the effort to make the ->remove() platform driver callback return void - Add support for AMD AI accelerators - Add support for a number of Intel SoCs: Alder Lake-N, Raptor Lake-P, Meteor Lake-{P,PS} - Random fixes and cleanups all over the place * tag 'edac_updates_for_v6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: (39 commits) EDAC/skx_common: Filter out the invalid address EDAC, pnd2: Sort headers alphabetically EDAC, pnd2: Correct misleading error message in mk_region_mask() EDAC, pnd2: Apply bit macros and helpers where it makes sense EDAC, pnd2: Replace custom definition by one from sizes.h EDAC/igen6: Add Intel Meteor Lake-P SoCs support EDAC/igen6: Add Intel Meteor Lake-PS SoCs support EDAC/igen6: Add Intel Raptor Lake-P SoCs support EDAC/igen6: Add Intel Alder Lake-N SoCs support EDAC/igen6: Make get_mchbar() helper function EDAC/amd64: Add support for family 0x19, models 0x90-9f devices EDAC/mc: Add support for HBM3 memory type EDAC/{sb,i7core}_edac: Do not use a plain integer for a NULL pointer EDAC/armada_xp: Explicitly include correct DT includes EDAC/pci_sysfs: Use PCI_HEADER_TYPE_MASK instead of literals EDAC/thunderx: Fix possible out-of-bounds string access EDAC/fsl_ddr: Convert to platform remove callback returning void EDAC/zynqmp: Convert to platform remove callback returning void EDAC/xgene: Convert to platform remove callback returning void EDAC/ti: Convert to platform remove callback returning void ...
2 parents 5db8752 + 1e92af0 commit 1dee7f5

35 files changed

Lines changed: 331 additions & 182 deletions

drivers/edac/altera_edac.c

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <linux/of_platform.h>
2323
#include <linux/panic_notifier.h>
2424
#include <linux/platform_device.h>
25+
#include <linux/property.h>
2526
#include <linux/regmap.h>
2627
#include <linux/types.h>
2728
#include <linux/uaccess.h>
@@ -279,7 +280,6 @@ static int a10_unmask_irq(struct platform_device *pdev, u32 mask)
279280

280281
static int altr_sdram_probe(struct platform_device *pdev)
281282
{
282-
const struct of_device_id *id;
283283
struct edac_mc_layer layers[2];
284284
struct mem_ctl_info *mci;
285285
struct altr_sdram_mc_data *drvdata;
@@ -290,10 +290,6 @@ static int altr_sdram_probe(struct platform_device *pdev)
290290
int irq, irq2, res = 0;
291291
unsigned long mem_size, irqflags = 0;
292292

293-
id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev);
294-
if (!id)
295-
return -ENODEV;
296-
297293
/* Grab the register range from the sdr controller in device tree */
298294
mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
299295
"altr,sdr-syscon");
@@ -304,8 +300,7 @@ static int altr_sdram_probe(struct platform_device *pdev)
304300
}
305301

306302
/* Check specific dependencies for the module */
307-
priv = of_match_node(altr_sdram_ctrl_of_match,
308-
pdev->dev.of_node)->data;
303+
priv = device_get_match_data(&pdev->dev);
309304

310305
/* Validate the SDRAM controller has ECC enabled */
311306
if (regmap_read(mc_vbase, priv->ecc_ctrl_offset, &read_reg) ||
@@ -459,15 +454,13 @@ static int altr_sdram_probe(struct platform_device *pdev)
459454
return res;
460455
}
461456

462-
static int altr_sdram_remove(struct platform_device *pdev)
457+
static void altr_sdram_remove(struct platform_device *pdev)
463458
{
464459
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
465460

466461
edac_mc_del_mc(&pdev->dev);
467462
edac_mc_free(mci);
468463
platform_set_drvdata(pdev, NULL);
469-
470-
return 0;
471464
}
472465

473466
/*
@@ -489,7 +482,7 @@ static const struct dev_pm_ops altr_sdram_pm_ops = {
489482

490483
static struct platform_driver altr_sdram_edac_driver = {
491484
.probe = altr_sdram_probe,
492-
.remove = altr_sdram_remove,
485+
.remove_new = altr_sdram_remove,
493486
.driver = {
494487
.name = "altr_sdram_edac",
495488
#ifdef CONFIG_PM
@@ -812,21 +805,19 @@ static int altr_edac_device_probe(struct platform_device *pdev)
812805
return res;
813806
}
814807

815-
static int altr_edac_device_remove(struct platform_device *pdev)
808+
static void altr_edac_device_remove(struct platform_device *pdev)
816809
{
817810
struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
818811
struct altr_edac_device_dev *drvdata = dci->pvt_info;
819812

820813
debugfs_remove_recursive(drvdata->debugfs_dir);
821814
edac_device_del_device(&pdev->dev);
822815
edac_device_free_ctl_info(dci);
823-
824-
return 0;
825816
}
826817

827818
static struct platform_driver altr_edac_device_driver = {
828819
.probe = altr_edac_device_probe,
829-
.remove = altr_edac_device_remove,
820+
.remove_new = altr_edac_device_remove,
830821
.driver = {
831822
.name = "altr_edac_device",
832823
.of_match_table = altr_edac_device_of_match,

drivers/edac/amd64_edac.c

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -996,15 +996,23 @@ static struct local_node_map {
996996
#define LNTM_NODE_COUNT GENMASK(27, 16)
997997
#define LNTM_BASE_NODE_ID GENMASK(11, 0)
998998

999-
static int gpu_get_node_map(void)
999+
static int gpu_get_node_map(struct amd64_pvt *pvt)
10001000
{
10011001
struct pci_dev *pdev;
10021002
int ret;
10031003
u32 tmp;
10041004

10051005
/*
1006-
* Node ID 0 is reserved for CPUs.
1007-
* Therefore, a non-zero Node ID means we've already cached the values.
1006+
* Mapping of nodes from hardware-provided AMD Node ID to a
1007+
* Linux logical one is applicable for MI200 models. Therefore,
1008+
* return early for other heterogeneous systems.
1009+
*/
1010+
if (pvt->F3->device != PCI_DEVICE_ID_AMD_MI200_DF_F3)
1011+
return 0;
1012+
1013+
/*
1014+
* Node ID 0 is reserved for CPUs. Therefore, a non-zero Node ID
1015+
* means the values have been already cached.
10081016
*/
10091017
if (gpu_node_map.base_node_id)
10101018
return 0;
@@ -3851,7 +3859,7 @@ static void gpu_init_csrows(struct mem_ctl_info *mci)
38513859

38523860
dimm->nr_pages = gpu_get_csrow_nr_pages(pvt, umc, cs);
38533861
dimm->edac_mode = EDAC_SECDED;
3854-
dimm->mtype = MEM_HBM2;
3862+
dimm->mtype = pvt->dram_type;
38553863
dimm->dtype = DEV_X16;
38563864
dimm->grain = 64;
38573865
}
@@ -3880,7 +3888,7 @@ static bool gpu_ecc_enabled(struct amd64_pvt *pvt)
38803888
return true;
38813889
}
38823890

3883-
static inline u32 gpu_get_umc_base(u8 umc, u8 channel)
3891+
static inline u32 gpu_get_umc_base(struct amd64_pvt *pvt, u8 umc, u8 channel)
38843892
{
38853893
/*
38863894
* On CPUs, there is one channel per UMC, so UMC numbering equals
@@ -3893,13 +3901,16 @@ static inline u32 gpu_get_umc_base(u8 umc, u8 channel)
38933901
* On GPU nodes channels are selected in 3rd nibble
38943902
* HBM chX[3:0]= [Y ]5X[3:0]000;
38953903
* HBM chX[7:4]= [Y+1]5X[3:0]000
3904+
*
3905+
* On MI300 APU nodes, same as GPU nodes but channels are selected
3906+
* in the base address of 0x90000
38963907
*/
38973908
umc *= 2;
38983909

38993910
if (channel >= 4)
39003911
umc++;
39013912

3902-
return 0x50000 + (umc << 20) + ((channel % 4) << 12);
3913+
return pvt->gpu_umc_base + (umc << 20) + ((channel % 4) << 12);
39033914
}
39043915

39053916
static void gpu_read_mc_regs(struct amd64_pvt *pvt)
@@ -3910,7 +3921,7 @@ static void gpu_read_mc_regs(struct amd64_pvt *pvt)
39103921

39113922
/* Read registers from each UMC */
39123923
for_each_umc(i) {
3913-
umc_base = gpu_get_umc_base(i, 0);
3924+
umc_base = gpu_get_umc_base(pvt, i, 0);
39143925
umc = &pvt->umc[i];
39153926

39163927
amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
@@ -3927,15 +3938,15 @@ static void gpu_read_base_mask(struct amd64_pvt *pvt)
39273938

39283939
for_each_umc(umc) {
39293940
for_each_chip_select(cs, umc, pvt) {
3930-
base_reg = gpu_get_umc_base(umc, cs) + UMCCH_BASE_ADDR;
3941+
base_reg = gpu_get_umc_base(pvt, umc, cs) + UMCCH_BASE_ADDR;
39313942
base = &pvt->csels[umc].csbases[cs];
39323943

39333944
if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) {
39343945
edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
39353946
umc, cs, *base, base_reg);
39363947
}
39373948

3938-
mask_reg = gpu_get_umc_base(umc, cs) + UMCCH_ADDR_MASK;
3949+
mask_reg = gpu_get_umc_base(pvt, umc, cs) + UMCCH_ADDR_MASK;
39393950
mask = &pvt->csels[umc].csmasks[cs];
39403951

39413952
if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) {
@@ -3960,7 +3971,7 @@ static int gpu_hw_info_get(struct amd64_pvt *pvt)
39603971
{
39613972
int ret;
39623973

3963-
ret = gpu_get_node_map();
3974+
ret = gpu_get_node_map(pvt);
39643975
if (ret)
39653976
return ret;
39663977

@@ -4125,6 +4136,8 @@ static int per_family_init(struct amd64_pvt *pvt)
41254136
if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) {
41264137
pvt->ctl_name = "MI200";
41274138
pvt->max_mcs = 4;
4139+
pvt->dram_type = MEM_HBM2;
4140+
pvt->gpu_umc_base = 0x50000;
41284141
pvt->ops = &gpu_ops;
41294142
} else {
41304143
pvt->ctl_name = "F19h_M30h";
@@ -4142,6 +4155,13 @@ static int per_family_init(struct amd64_pvt *pvt)
41424155
pvt->ctl_name = "F19h_M70h";
41434156
pvt->flags.zn_regs_v2 = 1;
41444157
break;
4158+
case 0x90 ... 0x9f:
4159+
pvt->ctl_name = "F19h_M90h";
4160+
pvt->max_mcs = 4;
4161+
pvt->dram_type = MEM_HBM3;
4162+
pvt->gpu_umc_base = 0x90000;
4163+
pvt->ops = &gpu_ops;
4164+
break;
41454165
case 0xa0 ... 0xaf:
41464166
pvt->ctl_name = "F19h_MA0h";
41474167
pvt->max_mcs = 12;
@@ -4180,23 +4200,33 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
41804200
NULL
41814201
};
41824202

4203+
/*
4204+
* For heterogeneous and APU models EDAC CHIP_SELECT and CHANNEL layers
4205+
* should be swapped to fit into the layers.
4206+
*/
4207+
static unsigned int get_layer_size(struct amd64_pvt *pvt, u8 layer)
4208+
{
4209+
bool is_gpu = (pvt->ops == &gpu_ops);
4210+
4211+
if (!layer)
4212+
return is_gpu ? pvt->max_mcs
4213+
: pvt->csels[0].b_cnt;
4214+
else
4215+
return is_gpu ? pvt->csels[0].b_cnt
4216+
: pvt->max_mcs;
4217+
}
4218+
41834219
static int init_one_instance(struct amd64_pvt *pvt)
41844220
{
41854221
struct mem_ctl_info *mci = NULL;
41864222
struct edac_mc_layer layers[2];
41874223
int ret = -ENOMEM;
41884224

4189-
/*
4190-
* For Heterogeneous family EDAC CHIP_SELECT and CHANNEL layers should
4191-
* be swapped to fit into the layers.
4192-
*/
41934225
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
4194-
layers[0].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
4195-
pvt->max_mcs : pvt->csels[0].b_cnt;
4226+
layers[0].size = get_layer_size(pvt, 0);
41964227
layers[0].is_virt_csrow = true;
41974228
layers[1].type = EDAC_MC_LAYER_CHANNEL;
4198-
layers[1].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
4199-
pvt->csels[0].b_cnt : pvt->max_mcs;
4229+
layers[1].size = get_layer_size(pvt, 1);
42004230
layers[1].is_virt_csrow = false;
42014231

42024232
mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);

drivers/edac/amd64_edac.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ struct amd64_pvt {
362362
u32 dct_sel_lo; /* DRAM Controller Select Low */
363363
u32 dct_sel_hi; /* DRAM Controller Select High */
364364
u32 online_spare; /* On-Line spare Reg */
365+
u32 gpu_umc_base; /* Base address used for channel selection on GPUs */
365366

366367
/* x4, x8, or x16 syndromes in use */
367368
u8 ecc_sym_sz;

drivers/edac/armada_xp_edac.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55

66
#include <linux/kernel.h>
77
#include <linux/edac.h>
8-
#include <linux/of_platform.h>
8+
#include <linux/of.h>
9+
#include <linux/of_device.h>
10+
#include <linux/platform_device.h>
911

1012
#include <asm/hardware/cache-l2x0.h>
1113
#include <asm/hardware/cache-aurora-l2.h>
@@ -351,20 +353,18 @@ static int axp_mc_probe(struct platform_device *pdev)
351353
return 0;
352354
}
353355

354-
static int axp_mc_remove(struct platform_device *pdev)
356+
static void axp_mc_remove(struct platform_device *pdev)
355357
{
356358
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
357359

358360
edac_mc_del_mc(&pdev->dev);
359361
edac_mc_free(mci);
360362
platform_set_drvdata(pdev, NULL);
361-
362-
return 0;
363363
}
364364

365365
static struct platform_driver axp_mc_driver = {
366366
.probe = axp_mc_probe,
367-
.remove = axp_mc_remove,
367+
.remove_new = axp_mc_remove,
368368
.driver = {
369369
.name = "armada_xp_mc_edac",
370370
.of_match_table = of_match_ptr(axp_mc_of_match),
@@ -564,7 +564,7 @@ static int aurora_l2_probe(struct platform_device *pdev)
564564
return 0;
565565
}
566566

567-
static int aurora_l2_remove(struct platform_device *pdev)
567+
static void aurora_l2_remove(struct platform_device *pdev)
568568
{
569569
struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
570570
#ifdef CONFIG_EDAC_DEBUG
@@ -575,13 +575,11 @@ static int aurora_l2_remove(struct platform_device *pdev)
575575
edac_device_del_device(&pdev->dev);
576576
edac_device_free_ctl_info(dci);
577577
platform_set_drvdata(pdev, NULL);
578-
579-
return 0;
580578
}
581579

582580
static struct platform_driver aurora_l2_driver = {
583581
.probe = aurora_l2_probe,
584-
.remove = aurora_l2_remove,
582+
.remove_new = aurora_l2_remove,
585583
.driver = {
586584
.name = "aurora_l2_edac",
587585
.of_match_table = of_match_ptr(aurora_l2_of_match),

drivers/edac/aspeed_edac.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ static int aspeed_probe(struct platform_device *pdev)
357357
}
358358

359359

360-
static int aspeed_remove(struct platform_device *pdev)
360+
static void aspeed_remove(struct platform_device *pdev)
361361
{
362362
struct mem_ctl_info *mci;
363363

@@ -369,8 +369,6 @@ static int aspeed_remove(struct platform_device *pdev)
369369
mci = edac_mc_del_mc(&pdev->dev);
370370
if (mci)
371371
edac_mc_free(mci);
372-
373-
return 0;
374372
}
375373

376374

@@ -389,7 +387,7 @@ static struct platform_driver aspeed_driver = {
389387
.of_match_table = aspeed_of_match
390388
},
391389
.probe = aspeed_probe,
392-
.remove = aspeed_remove
390+
.remove_new = aspeed_remove
393391
};
394392
module_platform_driver(aspeed_driver);
395393

drivers/edac/bluefield_edac.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -323,14 +323,12 @@ static int bluefield_edac_mc_probe(struct platform_device *pdev)
323323

324324
}
325325

326-
static int bluefield_edac_mc_remove(struct platform_device *pdev)
326+
static void bluefield_edac_mc_remove(struct platform_device *pdev)
327327
{
328328
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
329329

330330
edac_mc_del_mc(&pdev->dev);
331331
edac_mc_free(mci);
332-
333-
return 0;
334332
}
335333

336334
static const struct acpi_device_id bluefield_mc_acpi_ids[] = {
@@ -346,7 +344,7 @@ static struct platform_driver bluefield_edac_mc_driver = {
346344
.acpi_match_table = bluefield_mc_acpi_ids,
347345
},
348346
.probe = bluefield_edac_mc_probe,
349-
.remove = bluefield_edac_mc_remove,
347+
.remove_new = bluefield_edac_mc_remove,
350348
};
351349

352350
module_platform_driver(bluefield_edac_mc_driver);

drivers/edac/cell_edac.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,20 +234,19 @@ static int cell_edac_probe(struct platform_device *pdev)
234234
return 0;
235235
}
236236

237-
static int cell_edac_remove(struct platform_device *pdev)
237+
static void cell_edac_remove(struct platform_device *pdev)
238238
{
239239
struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
240240
if (mci)
241241
edac_mc_free(mci);
242-
return 0;
243242
}
244243

245244
static struct platform_driver cell_edac_driver = {
246245
.driver = {
247246
.name = "cbe-mic",
248247
},
249248
.probe = cell_edac_probe,
250-
.remove = cell_edac_remove,
249+
.remove_new = cell_edac_remove,
251250
};
252251

253252
static int __init cell_edac_init(void)

0 commit comments

Comments
 (0)