Skip to content

Commit dd7814b

Browse files
qzhuo2aegl
authored andcommitted
EDAC/i10nm: Make more configurations CPU model specific
The numbers of memory controllers per socket, channels per memory controller, DIMMs per channel and the triples of bus/device/function of PCI devices used in i10nm_edac can be CPU model specific. Add new fields to the structure res_config for above numbers and triples to make them CPU model specific. Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/all/20230113032802.41752-1-qiuxu.zhuo@intel.com
1 parent e4b2bc6 commit dd7814b

2 files changed

Lines changed: 121 additions & 42 deletions

File tree

drivers/edac/i10nm_base.c

Lines changed: 91 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -148,35 +148,47 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
148148

149149
static void enable_retry_rd_err_log(bool enable)
150150
{
151+
int i, j, imc_num, chan_num;
151152
struct skx_imc *imc;
152153
struct skx_dev *d;
153-
int i, j;
154154

155155
edac_dbg(2, "\n");
156156

157-
list_for_each_entry(d, i10nm_edac_list, list)
158-
for (i = 0; i < I10NM_NUM_IMC; i++) {
157+
list_for_each_entry(d, i10nm_edac_list, list) {
158+
imc_num = res_cfg->ddr_imc_num;
159+
chan_num = res_cfg->ddr_chan_num;
160+
161+
for (i = 0; i < imc_num; i++) {
159162
imc = &d->imc[i];
160163
if (!imc->mbase)
161164
continue;
162165

163-
for (j = 0; j < I10NM_NUM_CHANNELS; j++) {
164-
if (imc->hbm_mc) {
165-
__enable_retry_rd_err_log(imc, j, enable,
166-
res_cfg->offsets_scrub_hbm0,
167-
res_cfg->offsets_demand_hbm0,
168-
NULL);
169-
__enable_retry_rd_err_log(imc, j, enable,
170-
res_cfg->offsets_scrub_hbm1,
171-
res_cfg->offsets_demand_hbm1,
172-
NULL);
173-
} else {
174-
__enable_retry_rd_err_log(imc, j, enable,
175-
res_cfg->offsets_scrub,
176-
res_cfg->offsets_demand,
177-
res_cfg->offsets_demand2);
178-
}
166+
for (j = 0; j < chan_num; j++)
167+
__enable_retry_rd_err_log(imc, j, enable,
168+
res_cfg->offsets_scrub,
169+
res_cfg->offsets_demand,
170+
res_cfg->offsets_demand2);
171+
}
172+
173+
imc_num += res_cfg->hbm_imc_num;
174+
chan_num = res_cfg->hbm_chan_num;
175+
176+
for (; i < imc_num; i++) {
177+
imc = &d->imc[i];
178+
if (!imc->mbase || !imc->hbm_mc)
179+
continue;
180+
181+
for (j = 0; j < chan_num; j++) {
182+
__enable_retry_rd_err_log(imc, j, enable,
183+
res_cfg->offsets_scrub_hbm0,
184+
res_cfg->offsets_demand_hbm0,
185+
NULL);
186+
__enable_retry_rd_err_log(imc, j, enable,
187+
res_cfg->offsets_scrub_hbm1,
188+
res_cfg->offsets_demand_hbm1,
189+
NULL);
179190
}
191+
}
180192
}
181193
}
182194

@@ -318,9 +330,9 @@ static bool i10nm_check_2lm(struct res_config *cfg)
318330
int i;
319331

320332
list_for_each_entry(d, i10nm_edac_list, list) {
321-
d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
322-
PCI_SLOT(cfg->sad_all_devfn),
323-
PCI_FUNC(cfg->sad_all_devfn));
333+
d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus],
334+
res_cfg->sad_all_bdf.dev,
335+
res_cfg->sad_all_bdf.fun);
324336
if (!d->sad_all)
325337
continue;
326338

@@ -444,11 +456,15 @@ static int i10nm_get_ddr_munits(void)
444456
u64 base;
445457

446458
list_for_each_entry(d, i10nm_edac_list, list) {
447-
d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1);
459+
d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus],
460+
res_cfg->util_all_bdf.dev,
461+
res_cfg->util_all_bdf.fun);
448462
if (!d->util_all)
449463
return -ENODEV;
450464

451-
d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1);
465+
d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus],
466+
res_cfg->uracu_bdf.dev,
467+
res_cfg->uracu_bdf.fun);
452468
if (!d->uracu)
453469
return -ENODEV;
454470

@@ -461,9 +477,10 @@ static int i10nm_get_ddr_munits(void)
461477
edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
462478
j++, base, reg);
463479

464-
for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
465-
mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
466-
12 + i, 0);
480+
for (i = 0; i < res_cfg->ddr_imc_num; i++) {
481+
mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->ddr_mdev_bdf.bus],
482+
res_cfg->ddr_mdev_bdf.dev + i,
483+
res_cfg->ddr_mdev_bdf.fun);
467484
if (i == 0 && !mdev) {
468485
i10nm_printk(KERN_ERR, "No IMC found\n");
469486
return -ENODEV;
@@ -519,7 +536,9 @@ static int i10nm_get_hbm_munits(void)
519536
u64 base;
520537

521538
list_for_each_entry(d, i10nm_edac_list, list) {
522-
d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
539+
d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus],
540+
res_cfg->pcu_cr3_bdf.dev,
541+
res_cfg->pcu_cr3_bdf.fun);
523542
if (!d->pcu_cr3)
524543
return -ENODEV;
525544

@@ -540,11 +559,13 @@ static int i10nm_get_hbm_munits(void)
540559
}
541560
base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
542561

543-
lmc = I10NM_NUM_DDR_IMC;
562+
lmc = res_cfg->ddr_imc_num;
563+
564+
for (i = 0; i < res_cfg->hbm_imc_num; i++) {
565+
mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus],
566+
res_cfg->hbm_mdev_bdf.dev + i / 4,
567+
res_cfg->hbm_mdev_bdf.fun + i % 4);
544568

545-
for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
546-
mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
547-
12 + i / 4, 1 + i % 4);
548569
if (i == 0 && !mdev) {
549570
i10nm_printk(KERN_ERR, "No hbm mc found\n");
550571
return -ENODEV;
@@ -594,8 +615,16 @@ static struct res_config i10nm_cfg0 = {
594615
.type = I10NM,
595616
.decs_did = 0x3452,
596617
.busno_cfg_offset = 0xcc,
618+
.ddr_imc_num = 4,
619+
.ddr_chan_num = 2,
620+
.ddr_dimm_num = 2,
597621
.ddr_chan_mmio_sz = 0x4000,
598-
.sad_all_devfn = PCI_DEVFN(29, 0),
622+
.sad_all_bdf = {1, 29, 0},
623+
.pcu_cr3_bdf = {1, 30, 3},
624+
.util_all_bdf = {1, 29, 1},
625+
.uracu_bdf = {0, 0, 1},
626+
.ddr_mdev_bdf = {0, 12, 0},
627+
.hbm_mdev_bdf = {0, 12, 1},
599628
.sad_all_offset = 0x108,
600629
.offsets_scrub = offsets_scrub_icx,
601630
.offsets_demand = offsets_demand_icx,
@@ -605,8 +634,16 @@ static struct res_config i10nm_cfg1 = {
605634
.type = I10NM,
606635
.decs_did = 0x3452,
607636
.busno_cfg_offset = 0xd0,
637+
.ddr_imc_num = 4,
638+
.ddr_chan_num = 2,
639+
.ddr_dimm_num = 2,
608640
.ddr_chan_mmio_sz = 0x4000,
609-
.sad_all_devfn = PCI_DEVFN(29, 0),
641+
.sad_all_bdf = {1, 29, 0},
642+
.pcu_cr3_bdf = {1, 30, 3},
643+
.util_all_bdf = {1, 29, 1},
644+
.uracu_bdf = {0, 0, 1},
645+
.ddr_mdev_bdf = {0, 12, 0},
646+
.hbm_mdev_bdf = {0, 12, 1},
610647
.sad_all_offset = 0x108,
611648
.offsets_scrub = offsets_scrub_icx,
612649
.offsets_demand = offsets_demand_icx,
@@ -616,10 +653,21 @@ static struct res_config spr_cfg = {
616653
.type = SPR,
617654
.decs_did = 0x3252,
618655
.busno_cfg_offset = 0xd0,
656+
.ddr_imc_num = 4,
657+
.ddr_chan_num = 2,
658+
.ddr_dimm_num = 2,
659+
.hbm_imc_num = 16,
660+
.hbm_chan_num = 2,
661+
.hbm_dimm_num = 1,
619662
.ddr_chan_mmio_sz = 0x8000,
620663
.hbm_chan_mmio_sz = 0x4000,
621664
.support_ddr5 = true,
622-
.sad_all_devfn = PCI_DEVFN(10, 0),
665+
.sad_all_bdf = {1, 10, 0},
666+
.pcu_cr3_bdf = {1, 30, 3},
667+
.util_all_bdf = {1, 29, 1},
668+
.uracu_bdf = {0, 0, 1},
669+
.ddr_mdev_bdf = {0, 12, 0},
670+
.hbm_mdev_bdf = {0, 12, 1},
623671
.sad_all_offset = 0x300,
624672
.offsets_scrub = offsets_scrub_spr,
625673
.offsets_scrub_hbm0 = offsets_scrub_spr_hbm0,
@@ -753,6 +801,7 @@ static int __init i10nm_init(void)
753801
struct skx_dev *d;
754802
int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
755803
u64 tolm, tohm;
804+
int imc_num;
756805

757806
edac_dbg(2, "\n");
758807

@@ -793,6 +842,8 @@ static int __init i10nm_init(void)
793842
if (i10nm_get_hbm_munits() && rc)
794843
goto fail;
795844

845+
imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num;
846+
796847
list_for_each_entry(d, i10nm_edac_list, list) {
797848
rc = skx_get_src_id(d, 0xf8, &src_id);
798849
if (rc < 0)
@@ -803,7 +854,7 @@ static int __init i10nm_init(void)
803854
goto fail;
804855

805856
edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
806-
for (i = 0; i < I10NM_NUM_IMC; i++) {
857+
for (i = 0; i < imc_num; i++) {
807858
if (!d->imc[i].mdev)
808859
continue;
809860

@@ -813,12 +864,12 @@ static int __init i10nm_init(void)
813864
d->imc[i].node_id = node_id;
814865
if (d->imc[i].hbm_mc) {
815866
d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
816-
d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
817-
d->imc[i].num_dimms = I10NM_NUM_HBM_DIMMS;
867+
d->imc[i].num_channels = cfg->hbm_chan_num;
868+
d->imc[i].num_dimms = cfg->hbm_dimm_num;
818869
} else {
819870
d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
820-
d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
821-
d->imc[i].num_dimms = I10NM_NUM_DDR_DIMMS;
871+
d->imc[i].num_channels = cfg->ddr_chan_num;
872+
d->imc[i].num_dimms = cfg->ddr_dimm_num;
822873
}
823874

824875
rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,

drivers/edac/skx_common.h

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,19 +173,47 @@ struct decoded_addr {
173173
bool decoded_by_adxl;
174174
};
175175

176+
struct pci_bdf {
177+
u32 bus : 8;
178+
u32 dev : 5;
179+
u32 fun : 3;
180+
};
181+
176182
struct res_config {
177183
enum type type;
178184
/* Configuration agent device ID */
179185
unsigned int decs_did;
180186
/* Default bus number configuration register offset */
181187
int busno_cfg_offset;
188+
/* DDR memory controllers per socket */
189+
int ddr_imc_num;
190+
/* DDR channels per DDR memory controller */
191+
int ddr_chan_num;
192+
/* DDR DIMMs per DDR memory channel */
193+
int ddr_dimm_num;
182194
/* Per DDR channel memory-mapped I/O size */
183195
int ddr_chan_mmio_sz;
196+
/* HBM memory controllers per socket */
197+
int hbm_imc_num;
198+
/* HBM channels per HBM memory controller */
199+
int hbm_chan_num;
200+
/* HBM DIMMs per HBM memory channel */
201+
int hbm_dimm_num;
184202
/* Per HBM channel memory-mapped I/O size */
185203
int hbm_chan_mmio_sz;
186204
bool support_ddr5;
187-
/* SAD device number and function number */
188-
unsigned int sad_all_devfn;
205+
/* SAD device BDF */
206+
struct pci_bdf sad_all_bdf;
207+
/* PCU device BDF */
208+
struct pci_bdf pcu_cr3_bdf;
209+
/* UTIL device BDF */
210+
struct pci_bdf util_all_bdf;
211+
/* URACU device BDF */
212+
struct pci_bdf uracu_bdf;
213+
/* DDR mdev device BDF */
214+
struct pci_bdf ddr_mdev_bdf;
215+
/* HBM mdev device BDF */
216+
struct pci_bdf hbm_mdev_bdf;
189217
int sad_all_offset;
190218
/* Offsets of retry_rd_err_log registers */
191219
u32 *offsets_scrub;

0 commit comments

Comments
 (0)