Skip to content

Commit b3ece3a

Browse files
muralimk-amdbp3tk0v
authored andcommitted
EDAC/amd64: Add get_err_info() to pvt->ops
GPU Nodes will use a different method to determine the chip select and channel of an error. A function pointer should be used rather than introduce another branching condition. Prepare for this by adding get_err_info() to pvt->ops. This function is only called from the modern code path, so a legacy function is not defined. Make sure to call this after MCA_STATUS[SyndV] is checked, since the csrow value is found in MCA_SYND. [ Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K <muralidhara.mk@amd.com> Co-developed-by: Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com> Signed-off-by: Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com> Co-developed-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Link: https://lore.kernel.org/r/20230127170419.1824692-23-yazen.ghannam@amd.com
1 parent f6f3638 commit b3ece3a

2 files changed

Lines changed: 9 additions & 5 deletions

File tree

drivers/edac/amd64_edac.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2974,10 +2974,14 @@ static inline void decode_bus_error(int node_id, struct mce *m)
29742974
* Currently, we can derive the channel number by looking at the 6th nibble in
29752975
* the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
29762976
* number.
2977+
*
2978+
* For DRAM ECC errors, the Chip Select number is given in bits [2:0] of
2979+
* the MCA_SYND[ErrorInformation] field.
29772980
*/
2978-
static int find_umc_channel(struct mce *m)
2981+
static void umc_get_err_info(struct mce *m, struct err_info *err)
29792982
{
2980-
return (m->ipid & GENMASK(31, 0)) >> 20;
2983+
err->channel = (m->ipid & GENMASK(31, 0)) >> 20;
2984+
err->csrow = m->synd & 0x7;
29812985
}
29822986

29832987
static void decode_umc_error(int node_id, struct mce *m)
@@ -2999,8 +3003,6 @@ static void decode_umc_error(int node_id, struct mce *m)
29993003
if (m->status & MCI_STATUS_DEFERRED)
30003004
ecc_type = 3;
30013005

3002-
err.channel = find_umc_channel(m);
3003-
30043006
if (!(m->status & MCI_STATUS_SYNDV)) {
30053007
err.err_code = ERR_SYND;
30063008
goto log_error;
@@ -3015,7 +3017,7 @@ static void decode_umc_error(int node_id, struct mce *m)
30153017
err.err_code = ERR_CHANNEL;
30163018
}
30173019

3018-
err.csrow = m->synd & 0x7;
3020+
pvt->ops->get_err_info(m, &err);
30193021

30203022
if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
30213023
err.err_code = ERR_NORM_ADDR;
@@ -3685,6 +3687,7 @@ static struct low_ops umc_ops = {
36853687
.ecc_enabled = umc_ecc_enabled,
36863688
.setup_mci_misc_attrs = umc_setup_mci_misc_attrs,
36873689
.dump_misc_regs = umc_dump_misc_regs,
3690+
.get_err_info = umc_get_err_info,
36883691
};
36893692

36903693
/* Use Family 16h versions for defaults and adjust as needed below. */

drivers/edac/amd64_edac.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ struct low_ops {
470470
bool (*ecc_enabled)(struct amd64_pvt *pvt);
471471
void (*setup_mci_misc_attrs)(struct mem_ctl_info *mci);
472472
void (*dump_misc_regs)(struct amd64_pvt *pvt);
473+
void (*get_err_info)(struct mce *m, struct err_info *err);
473474
};
474475

475476
int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,

0 commit comments

Comments
 (0)