|
9 | 9 | #include <cxlmem.h> |
10 | 10 | #include <cxl.h> |
11 | 11 | #include "core.h" |
| 12 | +#include "trace.h" |
12 | 13 |
|
13 | 14 | /** |
14 | 15 | * DOC: cxl core pci |
@@ -622,3 +623,117 @@ void read_cdat_data(struct cxl_port *port) |
622 | 623 | } |
623 | 624 | } |
624 | 625 | EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); |
| 626 | + |
| 627 | +void cxl_cor_error_detected(struct pci_dev *pdev) |
| 628 | +{ |
| 629 | + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
| 630 | + struct cxl_memdev *cxlmd = cxlds->cxlmd; |
| 631 | + struct device *dev = &cxlmd->dev; |
| 632 | + void __iomem *addr; |
| 633 | + u32 status; |
| 634 | + |
| 635 | + if (!cxlds->regs.ras) |
| 636 | + return; |
| 637 | + |
| 638 | + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; |
| 639 | + status = readl(addr); |
| 640 | + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { |
| 641 | + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); |
| 642 | + trace_cxl_aer_correctable_error(dev, status); |
| 643 | + } |
| 644 | +} |
| 645 | +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); |
| 646 | + |
| 647 | +/* CXL spec rev3.0 8.2.4.16.1 */ |
| 648 | +static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) |
| 649 | +{ |
| 650 | + void __iomem *addr; |
| 651 | + u32 *log_addr; |
| 652 | + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); |
| 653 | + |
| 654 | + addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; |
| 655 | + log_addr = log; |
| 656 | + |
| 657 | + for (i = 0; i < log_u32_size; i++) { |
| 658 | + *log_addr = readl(addr); |
| 659 | + log_addr++; |
| 660 | + addr += sizeof(u32); |
| 661 | + } |
| 662 | +} |
| 663 | + |
| 664 | +/* |
| 665 | + * Log the state of the RAS status registers and prepare them to log the |
| 666 | + * next error status. Return 1 if reset needed. |
| 667 | + */ |
| 668 | +static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) |
| 669 | +{ |
| 670 | + struct cxl_memdev *cxlmd = cxlds->cxlmd; |
| 671 | + struct device *dev = &cxlmd->dev; |
| 672 | + u32 hl[CXL_HEADERLOG_SIZE_U32]; |
| 673 | + void __iomem *addr; |
| 674 | + u32 status; |
| 675 | + u32 fe; |
| 676 | + |
| 677 | + if (!cxlds->regs.ras) |
| 678 | + return false; |
| 679 | + |
| 680 | + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; |
| 681 | + status = readl(addr); |
| 682 | + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) |
| 683 | + return false; |
| 684 | + |
| 685 | + /* If multiple errors, log header points to first error from ctrl reg */ |
| 686 | + if (hweight32(status) > 1) { |
| 687 | + void __iomem *rcc_addr = |
| 688 | + cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; |
| 689 | + |
| 690 | + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, |
| 691 | + readl(rcc_addr))); |
| 692 | + } else { |
| 693 | + fe = status; |
| 694 | + } |
| 695 | + |
| 696 | + header_log_copy(cxlds, hl); |
| 697 | + trace_cxl_aer_uncorrectable_error(dev, status, fe, hl); |
| 698 | + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); |
| 699 | + |
| 700 | + return true; |
| 701 | +} |
| 702 | + |
| 703 | +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, |
| 704 | + pci_channel_state_t state) |
| 705 | +{ |
| 706 | + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
| 707 | + struct cxl_memdev *cxlmd = cxlds->cxlmd; |
| 708 | + struct device *dev = &cxlmd->dev; |
| 709 | + bool ue; |
| 710 | + |
| 711 | + /* |
| 712 | + * A frozen channel indicates an impending reset which is fatal to |
| 713 | + * CXL.mem operation, and will likely crash the system. On the off |
| 714 | + * chance the situation is recoverable dump the status of the RAS |
| 715 | + * capability registers and bounce the active state of the memdev. |
| 716 | + */ |
| 717 | + ue = cxl_report_and_clear(cxlds); |
| 718 | + |
| 719 | + switch (state) { |
| 720 | + case pci_channel_io_normal: |
| 721 | + if (ue) { |
| 722 | + device_release_driver(dev); |
| 723 | + return PCI_ERS_RESULT_NEED_RESET; |
| 724 | + } |
| 725 | + return PCI_ERS_RESULT_CAN_RECOVER; |
| 726 | + case pci_channel_io_frozen: |
| 727 | + dev_warn(&pdev->dev, |
| 728 | + "%s: frozen state error detected, disable CXL.mem\n", |
| 729 | + dev_name(dev)); |
| 730 | + device_release_driver(dev); |
| 731 | + return PCI_ERS_RESULT_NEED_RESET; |
| 732 | + case pci_channel_io_perm_failure: |
| 733 | + dev_warn(&pdev->dev, |
| 734 | + "failure state error detected, request disconnect\n"); |
| 735 | + return PCI_ERS_RESULT_DISCONNECT; |
| 736 | + } |
| 737 | + return PCI_ERS_RESULT_NEED_RESET; |
| 738 | +} |
| 739 | +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL); |
0 commit comments