@@ -410,6 +410,7 @@ struct aer_err_info {
unsigned int status; /* COR/UNCOR Error Status */
unsigned int mask; /* COR/UNCOR Error Mask */
+ unsigned int anfe_status; /* UNCOR Error Status for ANFE */
struct pcie_tlp_log tlp; /* TLP Header */
};
@@ -107,6 +107,12 @@ struct aer_stats {
PCI_ERR_ROOT_MULTI_COR_RCV | \
PCI_ERR_ROOT_MULTI_UNCOR_RCV)
+#define AER_ERR_ANFE_UNC_MASK (PCI_ERR_UNC_POISON_TLP | \
+ PCI_ERR_UNC_COMP_TIME | \
+ PCI_ERR_UNC_COMP_ABORT | \
+ PCI_ERR_UNC_UNX_COMP | \
+ PCI_ERR_UNC_UNSUP)
+
static int pcie_aer_disable;
static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
@@ -1094,9 +1100,14 @@ static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info)
* Correctable error does not need software intervention.
* No need to go through error recovery process.
*/
- if (aer)
+ if (aer) {
pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
info->status);
+ if (info->anfe_status)
+ pci_write_config_dword(dev,
+ aer + PCI_ERR_UNCOR_STATUS,
+ info->anfe_status);
+ }
if (pcie_aer_is_native(dev)) {
struct pci_driver *pdrv = dev->driver;
@@ -1196,6 +1207,53 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
EXPORT_SYMBOL_GPL(aer_recover_queue);
#endif
+static void anfe_get_uc_status(struct pci_dev *dev, struct aer_err_info *info)
+{
+ u32 uncor_mask, uncor_status, anfe_status;
+ u16 device_status;
+ int aer = dev->aer_cap;
+
+ /*
+ * To avoid race between device status read and error status register read,
+ * cache uncorrectable error status before checking for NFE in device status
+ * register.
+ */
+ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &uncor_status);
+ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &uncor_mask);
+ /*
+ * According to PCIe Base Specification Revision 6.1 Section 6.2.3.2.4,
+ * if an UNCOR error is raised as Advisory Non-Fatal error, it will
+ * match the following conditions:
+ * a. The severity of the error is Non-Fatal.
+ * b. The error is one of the following:
+ * 1. Poisoned TLP (Section 6.2.3.2.4.3)
+ * 2. Completion Timeout (Section 6.2.3.2.4.4)
+ * 3. Completer Abort (Section 6.2.3.2.4.1)
+ * 4. Unexpected Completion (Section 6.2.3.2.4.5)
+ * 5. Unsupported Request (Section 6.2.3.2.4.1)
+ */
+ anfe_status = uncor_status & ~uncor_mask & ~info->severity &
+ AER_ERR_ANFE_UNC_MASK;
+
+ if (pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &device_status))
+ return;
+ /*
+ * Take the most conservative route here. If there are Non-Fatal errors
+ * detected, do not assume any bit in uncor_status is set by ANFE.
+ */
+ if (device_status & PCI_EXP_DEVSTA_NFED)
+ return;
+
+ /*
+ * If there is another ANFE between reading uncor_status and clearing
+ * PCI_ERR_COR_ADV_NFAT bit in cor_status register, that ANFE isn't
+ * recorded in info->anfe_status. It will be read out as NFE in
+ * following uncor_status register reading and processed by NFE
+ * handler.
+ */
+ info->anfe_status = anfe_status;
+}
+
/**
* aer_get_device_error_info - read error status from dev and store it to info
* @dev: pointer to the device expected to have a error record
@@ -1213,6 +1271,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
/* Must reset in this function */
info->status = 0;
+ info->anfe_status = 0;
info->tlp_header_valid = 0;
/* The device might not support AER */
@@ -1226,6 +1285,9 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
&info->mask);
if (!(info->status & ~info->mask))
return 0;
+
+ if (info->status & PCI_ERR_COR_ADV_NFAT)
+ anfe_get_uc_status(dev, info);
} else if (type == PCI_EXP_TYPE_ROOT_PORT ||
type == PCI_EXP_TYPE_RC_EC ||
type == PCI_EXP_TYPE_DOWNSTREAM ||