Skip to content

Commit 8d4ec3f

Browse files
jamesequinlanbjorn-helgaas
authored andcommitted
PCI: brcmstb: Add panic/die handler to driver
Most PCIe HW returns 0xffffffff for failed reads on PCIe, but by default Broadcom's STB PCIe controller effects an abort. Some SoCs -- 7216 and its descendants -- have new HW that identifies error details. Add a simple handler to print diagnostic info in case the PCIe controller was the cause of the abort. Unfortunately, an abort still occurs. Read the error registers only when the PCIe bridge is active and the PCIe registers are accessible. Otherwise, a "die" event caused by something other than PCIe could cause an abort if the PCIe "die" handler tried to access registers when the bridge is off. Example error output: brcm-pcie 8b20000.pcie: Error: Mem Acc: 32bit, read, @0x38000000 brcm-pcie 8b20000.pcie: Type: TO=0 Abt=0 UnspReq=1 AccDsble=0 BadAddr=0 Signed-off-by: Jim Quinlan <james.quinlan@broadcom.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com> Link: https://patch.msgid.link/20251029193616.3670003-3-james.quinlan@broadcom.com
1 parent a3f00f2 commit 8d4ec3f

1 file changed

Lines changed: 159 additions & 2 deletions

File tree

drivers/pci/controller/pcie-brcmstb.c

Lines changed: 159 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,18 @@
1414
#include <linux/irqchip/chained_irq.h>
1515
#include <linux/irqchip/irq-msi-lib.h>
1616
#include <linux/irqdomain.h>
17+
#include <linux/kdebug.h>
1718
#include <linux/kernel.h>
1819
#include <linux/list.h>
1920
#include <linux/log2.h>
2021
#include <linux/module.h>
2122
#include <linux/msi.h>
23+
#include <linux/notifier.h>
2224
#include <linux/of_address.h>
2325
#include <linux/of_irq.h>
2426
#include <linux/of_pci.h>
2527
#include <linux/of_platform.h>
28+
#include <linux/panic_notifier.h>
2629
#include <linux/pci.h>
2730
#include <linux/pci-ecam.h>
2831
#include <linux/printk.h>
@@ -32,6 +35,7 @@
3235
#include <linux/slab.h>
3336
#include <linux/spinlock.h>
3437
#include <linux/string.h>
38+
#include <linux/string_choices.h>
3539
#include <linux/types.h>
3640

3741
#include "../pci.h"
@@ -155,8 +159,40 @@
155159
#define MSI_INT_MASK_SET 0x10
156160
#define MSI_INT_MASK_CLR 0x14
157161

162+
/* Error report registers */
163+
#define PCIE_OUTB_ERR_TREAT 0x6000
164+
#define PCIE_OUTB_ERR_TREAT_CONFIG 0x1
165+
#define PCIE_OUTB_ERR_TREAT_MEM 0x2
166+
#define PCIE_OUTB_ERR_VALID 0x6004
167+
#define PCIE_OUTB_ERR_CLEAR 0x6008
168+
#define PCIE_OUTB_ERR_ACC_INFO 0x600c
169+
#define PCIE_OUTB_ERR_ACC_INFO_CFG_ERR BIT(0)
170+
#define PCIE_OUTB_ERR_ACC_INFO_MEM_ERR BIT(1)
171+
#define PCIE_OUTB_ERR_ACC_INFO_TYPE_64 BIT(2)
172+
#define PCIE_OUTB_ERR_ACC_INFO_DIR_WRITE BIT(4)
173+
#define PCIE_OUTB_ERR_ACC_INFO_BYTE_LANES 0xff00
174+
#define PCIE_OUTB_ERR_ACC_ADDR 0x6010
175+
#define PCIE_OUTB_ERR_ACC_ADDR_BUS 0xff00000
176+
#define PCIE_OUTB_ERR_ACC_ADDR_DEV 0xf8000
177+
#define PCIE_OUTB_ERR_ACC_ADDR_FUNC 0x7000
178+
#define PCIE_OUTB_ERR_ACC_ADDR_REG 0xfff
179+
#define PCIE_OUTB_ERR_CFG_CAUSE 0x6014
180+
#define PCIE_OUTB_ERR_CFG_CAUSE_TIMEOUT BIT(6)
181+
#define PCIE_OUTB_ERR_CFG_CAUSE_ABORT BIT(5)
182+
#define PCIE_OUTB_ERR_CFG_CAUSE_UNSUPP_REQ BIT(4)
183+
#define PCIE_OUTB_ERR_CFG_CAUSE_ACC_TIMEOUT BIT(2)
184+
#define PCIE_OUTB_ERR_CFG_CAUSE_ACC_DISABLED BIT(1)
185+
#define PCIE_OUTB_ERR_CFG_CAUSE_ACC_64BIT BIT(0)
186+
#define PCIE_OUTB_ERR_MEM_ADDR_LO 0x6018
187+
#define PCIE_OUTB_ERR_MEM_ADDR_HI 0x601c
188+
#define PCIE_OUTB_ERR_MEM_CAUSE 0x6020
189+
#define PCIE_OUTB_ERR_MEM_CAUSE_TIMEOUT BIT(6)
190+
#define PCIE_OUTB_ERR_MEM_CAUSE_ABORT BIT(5)
191+
#define PCIE_OUTB_ERR_MEM_CAUSE_UNSUPP_REQ BIT(4)
192+
#define PCIE_OUTB_ERR_MEM_CAUSE_ACC_DISABLED BIT(1)
193+
#define PCIE_OUTB_ERR_MEM_CAUSE_BAD_ADDR BIT(0)
194+
158195
#define PCIE_RGR1_SW_INIT_1_PERST_MASK 0x1
159-
#define PCIE_RGR1_SW_INIT_1_PERST_SHIFT 0x0
160196

161197
#define RGR1_SW_INIT_1_INIT_GENERIC_MASK 0x2
162198
#define RGR1_SW_INIT_1_INIT_GENERIC_SHIFT 0x1
@@ -305,6 +341,8 @@ struct brcm_pcie {
305341
bool ep_wakeup_capable;
306342
const struct pcie_cfg_data *cfg;
307343
bool bridge_in_reset;
344+
struct notifier_block die_notifier;
345+
struct notifier_block panic_notifier;
308346
spinlock_t bridge_lock;
309347
};
310348

@@ -1727,6 +1765,119 @@ static int brcm_pcie_resume_noirq(struct device *dev)
17271765
return ret;
17281766
}
17291767

1768+
/* Dump out PCIe errors on die or panic */
1769+
static int brcm_pcie_dump_err(struct brcm_pcie *pcie,
1770+
const char *type)
1771+
{
1772+
void __iomem *base = pcie->base;
1773+
int i, is_cfg_err, is_mem_err, lanes;
1774+
const char *width_str, *direction_str;
1775+
u32 info, cfg_addr, cfg_cause, mem_cause, lo, hi;
1776+
struct pci_host_bridge *bridge = pci_host_bridge_from_priv(pcie);
1777+
unsigned long flags;
1778+
char lanes_str[9];
1779+
1780+
spin_lock_irqsave(&pcie->bridge_lock, flags);
1781+
/* Don't access registers when the bridge is off */
1782+
if (pcie->bridge_in_reset || readl(base + PCIE_OUTB_ERR_VALID) == 0) {
1783+
spin_unlock_irqrestore(&pcie->bridge_lock, flags);
1784+
return NOTIFY_DONE;
1785+
}
1786+
1787+
/* Read all necessary registers so we can release the spinlock ASAP */
1788+
info = readl(base + PCIE_OUTB_ERR_ACC_INFO);
1789+
is_cfg_err = !!(info & PCIE_OUTB_ERR_ACC_INFO_CFG_ERR);
1790+
is_mem_err = !!(info & PCIE_OUTB_ERR_ACC_INFO_MEM_ERR);
1791+
if (is_cfg_err) {
1792+
cfg_addr = readl(base + PCIE_OUTB_ERR_ACC_ADDR);
1793+
cfg_cause = readl(base + PCIE_OUTB_ERR_CFG_CAUSE);
1794+
}
1795+
if (is_mem_err) {
1796+
mem_cause = readl(base + PCIE_OUTB_ERR_MEM_CAUSE);
1797+
lo = readl(base + PCIE_OUTB_ERR_MEM_ADDR_LO);
1798+
hi = readl(base + PCIE_OUTB_ERR_MEM_ADDR_HI);
1799+
}
1800+
/* We've got all of the info, clear the error */
1801+
writel(1, base + PCIE_OUTB_ERR_CLEAR);
1802+
spin_unlock_irqrestore(&pcie->bridge_lock, flags);
1803+
1804+
dev_err(pcie->dev, "reporting PCIe info which may be related to %s error\n",
1805+
type);
1806+
width_str = (info & PCIE_OUTB_ERR_ACC_INFO_TYPE_64) ? "64bit" : "32bit";
1807+
direction_str = str_read_write(!(info & PCIE_OUTB_ERR_ACC_INFO_DIR_WRITE));
1808+
lanes = FIELD_GET(PCIE_OUTB_ERR_ACC_INFO_BYTE_LANES, info);
1809+
for (i = 0, lanes_str[8] = 0; i < 8; i++)
1810+
lanes_str[i] = (lanes & (1 << i)) ? '1' : '0';
1811+
1812+
if (is_cfg_err) {
1813+
int bus = FIELD_GET(PCIE_OUTB_ERR_ACC_ADDR_BUS, cfg_addr);
1814+
int dev = FIELD_GET(PCIE_OUTB_ERR_ACC_ADDR_DEV, cfg_addr);
1815+
int func = FIELD_GET(PCIE_OUTB_ERR_ACC_ADDR_FUNC, cfg_addr);
1816+
int reg = FIELD_GET(PCIE_OUTB_ERR_ACC_ADDR_REG, cfg_addr);
1817+
1818+
dev_err(pcie->dev, "Error: CFG Acc, %s, %s (%04x:%02x:%02x.%d) reg=0x%x, lanes=%s\n",
1819+
width_str, direction_str, bridge->domain_nr, bus, dev,
1820+
func, reg, lanes_str);
1821+
dev_err(pcie->dev, " Type: TO=%d Abt=%d UnsupReq=%d AccTO=%d AccDsbld=%d Acc64bit=%d\n",
1822+
!!(cfg_cause & PCIE_OUTB_ERR_CFG_CAUSE_TIMEOUT),
1823+
!!(cfg_cause & PCIE_OUTB_ERR_CFG_CAUSE_ABORT),
1824+
!!(cfg_cause & PCIE_OUTB_ERR_CFG_CAUSE_UNSUPP_REQ),
1825+
!!(cfg_cause & PCIE_OUTB_ERR_CFG_CAUSE_ACC_TIMEOUT),
1826+
!!(cfg_cause & PCIE_OUTB_ERR_CFG_CAUSE_ACC_DISABLED),
1827+
!!(cfg_cause & PCIE_OUTB_ERR_CFG_CAUSE_ACC_64BIT));
1828+
}
1829+
1830+
if (is_mem_err) {
1831+
u64 addr = ((u64)hi << 32) | (u64)lo;
1832+
1833+
dev_err(pcie->dev, "Error: Mem Acc, %s, %s, @0x%llx, lanes=%s\n",
1834+
width_str, direction_str, addr, lanes_str);
1835+
dev_err(pcie->dev, " Type: TO=%d Abt=%d UnsupReq=%d AccDsble=%d BadAddr=%d\n",
1836+
!!(mem_cause & PCIE_OUTB_ERR_MEM_CAUSE_TIMEOUT),
1837+
!!(mem_cause & PCIE_OUTB_ERR_MEM_CAUSE_ABORT),
1838+
!!(mem_cause & PCIE_OUTB_ERR_MEM_CAUSE_UNSUPP_REQ),
1839+
!!(mem_cause & PCIE_OUTB_ERR_MEM_CAUSE_ACC_DISABLED),
1840+
!!(mem_cause & PCIE_OUTB_ERR_MEM_CAUSE_BAD_ADDR));
1841+
}
1842+
1843+
return NOTIFY_DONE;
1844+
}
1845+
1846+
static int brcm_pcie_die_notify_cb(struct notifier_block *self,
1847+
unsigned long v, void *p)
1848+
{
1849+
struct brcm_pcie *pcie =
1850+
container_of(self, struct brcm_pcie, die_notifier);
1851+
1852+
return brcm_pcie_dump_err(pcie, "Die");
1853+
}
1854+
1855+
static int brcm_pcie_panic_notify_cb(struct notifier_block *self,
1856+
unsigned long v, void *p)
1857+
{
1858+
struct brcm_pcie *pcie =
1859+
container_of(self, struct brcm_pcie, panic_notifier);
1860+
1861+
return brcm_pcie_dump_err(pcie, "Panic");
1862+
}
1863+
1864+
static void brcm_register_die_notifiers(struct brcm_pcie *pcie)
1865+
{
1866+
pcie->panic_notifier.notifier_call = brcm_pcie_panic_notify_cb;
1867+
atomic_notifier_chain_register(&panic_notifier_list,
1868+
&pcie->panic_notifier);
1869+
1870+
pcie->die_notifier.notifier_call = brcm_pcie_die_notify_cb;
1871+
register_die_notifier(&pcie->die_notifier);
1872+
}
1873+
1874+
static void brcm_unregister_die_notifiers(struct brcm_pcie *pcie)
1875+
{
1876+
unregister_die_notifier(&pcie->die_notifier);
1877+
atomic_notifier_chain_unregister(&panic_notifier_list,
1878+
&pcie->panic_notifier);
1879+
}
1880+
17301881
static void __brcm_pcie_remove(struct brcm_pcie *pcie)
17311882
{
17321883
brcm_msi_remove(pcie);
@@ -1745,6 +1896,9 @@ static void brcm_pcie_remove(struct platform_device *pdev)
17451896

17461897
pci_stop_root_bus(bridge->bus);
17471898
pci_remove_root_bus(bridge->bus);
1899+
if (pcie->cfg->has_err_report)
1900+
brcm_unregister_die_notifiers(pcie);
1901+
17481902
__brcm_pcie_remove(pcie);
17491903
}
17501904

@@ -1845,6 +1999,7 @@ static const struct pcie_cfg_data bcm7216_cfg = {
18451999
.bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_7278,
18462000
.has_phy = true,
18472001
.num_inbound_wins = 3,
2002+
.has_err_report = true,
18482003
};
18492004

18502005
static const struct pcie_cfg_data bcm7712_cfg = {
@@ -2019,8 +2174,10 @@ static int brcm_pcie_probe(struct platform_device *pdev)
20192174
return ret;
20202175
}
20212176

2022-
if (pcie->cfg->has_err_report)
2177+
if (pcie->cfg->has_err_report) {
20232178
spin_lock_init(&pcie->bridge_lock);
2179+
brcm_register_die_notifiers(pcie);
2180+
}
20242181

20252182
return 0;
20262183

0 commit comments

Comments
 (0)