Skip to content

Commit 4ec6054

Browse files
niklas88Alexander Gordeev
authored andcommitted
s390/pci: Report PCI error recovery results via SCLP
Add a mechanism with which the status of PCI error recovery runs is reported to the platform. Together with the status supply additional information that may aid in problem determination. Reviewed-by: Halil Pasic <pasic@linux.ibm.com> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
1 parent fac04ef commit 4ec6054

7 files changed

Lines changed: 178 additions & 38 deletions

File tree

arch/s390/include/asm/sclp.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
/* 24 + 16 * SCLP_MAX_CORES */
1717
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
1818

19+
#define SCLP_ERRNOTIFY_AQ_RESET 0
20+
#define SCLP_ERRNOTIFY_AQ_REPAIR 1
21+
#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2
22+
#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3
23+
1924
#ifndef __ASSEMBLY__
2025
#include <linux/uio.h>
2126
#include <asm/chpid.h>
@@ -111,6 +116,34 @@ struct sclp_info {
111116
};
112117
extern struct sclp_info sclp;
113118

119+
struct sccb_header {
120+
u16 length;
121+
u8 function_code;
122+
u8 control_mask[3];
123+
u16 response_code;
124+
} __packed;
125+
126+
struct evbuf_header {
127+
u16 length;
128+
u8 type;
129+
u8 flags;
130+
u16 _reserved;
131+
} __packed;
132+
133+
struct err_notify_evbuf {
134+
struct evbuf_header header;
135+
u8 action;
136+
u8 atype;
137+
u32 fh;
138+
u32 fid;
139+
u8 data[];
140+
} __packed;
141+
142+
struct err_notify_sccb {
143+
struct sccb_header header;
144+
struct err_notify_evbuf evbuf;
145+
} __packed;
146+
114147
struct zpci_report_error_header {
115148
u8 version; /* Interface version byte */
116149
u8 action; /* Action qualifier byte

arch/s390/pci/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55

66
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \
77
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
8-
pci_bus.o pci_kvm_hook.o
8+
pci_bus.o pci_kvm_hook.o pci_report.o
99
obj-$(CONFIG_PCI_IOV) += pci_iov.o
1010
obj-$(CONFIG_SYSFS) += pci_sysfs.o

arch/s390/pci/pci_event.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <asm/sclp.h>
1717

1818
#include "pci_bus.h"
19+
#include "pci_report.h"
1920

2021
/* Content Code Description for PCI Function Error */
2122
struct zpci_ccdf_err {
@@ -169,6 +170,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
169170
static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
170171
{
171172
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
173+
struct zpci_dev *zdev = to_zpci(pdev);
174+
char *status_str = "success";
172175
struct pci_driver *driver;
173176

174177
/*
@@ -186,29 +189,37 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
186189
if (is_passed_through(pdev)) {
187190
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
188191
pci_name(pdev));
192+
status_str = "failed (pass-through)";
189193
goto out_unlock;
190194
}
191195

192196
driver = to_pci_driver(pdev->dev.driver);
193197
if (!is_driver_supported(driver)) {
194-
if (!driver)
198+
if (!driver) {
195199
pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
196200
pci_name(pdev));
197-
else
201+
status_str = "failed (no driver)";
202+
} else {
198203
pr_info("%s: The %s driver bound to the device does not support error recovery\n",
199204
pci_name(pdev),
200205
driver->name);
206+
status_str = "failed (no driver support)";
207+
}
201208
goto out_unlock;
202209
}
203210

204211
ers_res = zpci_event_notify_error_detected(pdev, driver);
205-
if (ers_result_indicates_abort(ers_res))
212+
if (ers_result_indicates_abort(ers_res)) {
213+
status_str = "failed (abort on detection)";
206214
goto out_unlock;
215+
}
207216

208217
if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
209218
ers_res = zpci_event_do_error_state_clear(pdev, driver);
210-
if (ers_result_indicates_abort(ers_res))
219+
if (ers_result_indicates_abort(ers_res)) {
220+
status_str = "failed (abort on MMIO enable)";
211221
goto out_unlock;
222+
}
212223
}
213224

214225
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
@@ -217,6 +228,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
217228
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
218229
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
219230
pci_name(pdev));
231+
status_str = "failed (driver can't recover)";
220232
goto out_unlock;
221233
}
222234

@@ -225,6 +237,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
225237
driver->err_handler->resume(pdev);
226238
out_unlock:
227239
pci_dev_unlock(pdev);
240+
zpci_report_status(zdev, "recovery", status_str);
228241

229242
return ers_res;
230243
}

arch/s390/pci/pci_report.c

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright IBM Corp. 2024
4+
*
5+
* Author(s):
6+
* Niklas Schnelle <schnelle@linux.ibm.com>
7+
*
8+
*/
9+
10+
#define KMSG_COMPONENT "zpci"
11+
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
12+
13+
#include <linux/kernel.h>
14+
#include <linux/sprintf.h>
15+
#include <linux/pci.h>
16+
17+
#include <asm/sclp.h>
18+
19+
#include "pci_report.h"
20+
21+
#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
22+
23+
struct zpci_report_error_data {
24+
u64 timestamp;
25+
u64 err_log_id;
26+
char log_data[];
27+
} __packed;
28+
29+
#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb))
30+
#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
31+
32+
struct zpci_report_error {
33+
struct zpci_report_error_header header;
34+
struct zpci_report_error_data data;
35+
} __packed;
36+
37+
static const char *zpci_state_str(pci_channel_state_t state)
38+
{
39+
switch (state) {
40+
case pci_channel_io_normal:
41+
return "normal";
42+
case pci_channel_io_frozen:
43+
return "frozen";
44+
case pci_channel_io_perm_failure:
45+
return "permanent-failure";
46+
default:
47+
return "invalid";
48+
};
49+
}
50+
51+
/**
52+
* zpci_report_status - Report the status of operations on a PCI device
53+
* @zdev: The PCI device for which to report status
54+
* @operation: A string representing the operation reported
55+
* @status: A string representing the status of the operation
56+
*
57+
* This function creates a human readable report about an operation such as
58+
* PCI device recovery and forwards this to the platform using the SCLP Write
59+
* Event Data mechanism. Besides the operation and status strings the report
60+
* also contains additional information about the device deemed useful for
61+
* debug such as the currently bound device driver, if any, and error state.
62+
*
63+
* Return: 0 on success an error code < 0 otherwise.
64+
*/
65+
int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
66+
{
67+
struct zpci_report_error *report;
68+
struct pci_driver *driver = NULL;
69+
struct pci_dev *pdev = NULL;
70+
char *buf, *end;
71+
int ret;
72+
73+
if (!zdev || !zdev->zbus)
74+
return -ENODEV;
75+
76+
/* Protected virtualization hosts get nothing from us */
77+
if (prot_virt_guest)
78+
return -ENODATA;
79+
80+
report = (void *)get_zeroed_page(GFP_KERNEL);
81+
if (!report)
82+
return -ENOMEM;
83+
if (zdev->zbus->bus)
84+
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
85+
if (pdev)
86+
driver = to_pci_driver(pdev->dev.driver);
87+
88+
buf = report->data.log_data;
89+
end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
90+
buf += scnprintf(buf, end - buf, "report: %s\n", operation);
91+
buf += scnprintf(buf, end - buf, "status: %s\n", status);
92+
buf += scnprintf(buf, end - buf, "state: %s\n",
93+
(pdev) ? zpci_state_str(pdev->error_state) : "n/a");
94+
buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
95+
96+
report->header.version = 1;
97+
report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
98+
report->header.length = buf - (char *)&report->data;
99+
report->data.timestamp = ktime_get_clocktai_seconds();
100+
report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
101+
102+
ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
103+
if (ret)
104+
pr_err("Reporting PCI status failed with code %d\n", ret);
105+
else
106+
pr_info("Reported PCI device status\n");
107+
108+
free_page((unsigned long)report);
109+
110+
return ret;
111+
}

arch/s390/pci/pci_report.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright IBM Corp. 2024
4+
*
5+
* Author(s):
6+
* Niklas Schnelle <schnelle@linux.ibm.com>
7+
*
8+
*/
9+
#ifndef __S390_PCI_REPORT_H
10+
#define __S390_PCI_REPORT_H
11+
12+
struct zpci_dev;
13+
14+
int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
15+
16+
#endif /* __S390_PCI_REPORT_H */

drivers/s390/char/sclp.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,6 @@ typedef unsigned int sclp_cmdw_t;
8585

8686
typedef u64 sccb_mask_t;
8787

88-
struct sccb_header {
89-
u16 length;
90-
u8 function_code;
91-
u8 control_mask[3];
92-
u16 response_code;
93-
} __attribute__((packed));
94-
9588
struct init_sccb {
9689
struct sccb_header header;
9790
u16 _reserved;
@@ -238,13 +231,6 @@ struct gds_vector {
238231
u16 gds_id;
239232
} __attribute__((packed));
240233

241-
struct evbuf_header {
242-
u16 length;
243-
u8 type;
244-
u8 flags;
245-
u16 _reserved;
246-
} __attribute__((packed));
247-
248234
struct sclp_req {
249235
struct list_head list; /* list_head for request queueing. */
250236
sclp_cmdw_t command; /* sclp command to execute */

drivers/s390/char/sclp_pci.c

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,11 @@
2424

2525
#define SCLP_ATYPE_PCI 2
2626

27-
#define SCLP_ERRNOTIFY_AQ_RESET 0
28-
#define SCLP_ERRNOTIFY_AQ_REPAIR 1
29-
#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2
30-
#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3
31-
3227
static DEFINE_MUTEX(sclp_pci_mutex);
3328
static struct sclp_register sclp_pci_event = {
3429
.send_mask = EVTYP_ERRNOTIFY_MASK,
3530
};
3631

37-
struct err_notify_evbuf {
38-
struct evbuf_header header;
39-
u8 action;
40-
u8 atype;
41-
u32 fh;
42-
u32 fid;
43-
u8 data[];
44-
} __packed;
45-
46-
struct err_notify_sccb {
47-
struct sccb_header header;
48-
struct err_notify_evbuf evbuf;
49-
} __packed;
50-
5132
struct pci_cfg_sccb {
5233
struct sccb_header header;
5334
u8 atype; /* adapter type */

0 commit comments

Comments
 (0)