@@ -13,6 +13,12 @@
#include <opal-msg.h>
#include <timebase.h>
#include <timer.h>
+#include <errorlog.h>
+#include <chip.h>
+
+DEFINE_LOG_ENTRY(OPAL_RC_PCI_PHB_FREEZE, OPAL_INPUT_OUTPUT_ERR_EVT,
+ OPAL_PCI, OPAL_IO_DEVICES, OPAL_UNRECOVERABLE_ERR_GENERAL,
+ OPAL_NA);
#define OPAL_PCICFG_ACCESS_READ(op, cb, type) \
static int64_t opal_pci_config_##op(uint64_t phb_id, \
@@ -58,6 +64,66 @@ OPAL_PCICFG_ACCESS_WRITE(write_byte, write8, uint8_t)
OPAL_PCICFG_ACCESS_WRITE(write_half_word, write16, uint16_t)
OPAL_PCICFG_ACCESS_WRITE(write_word, write32, uint32_t)
+/* Generate and send an error log/eSEL */
+static void send_eeh_serviceable_event(struct phb *phb, struct errorlog *buf,
+ void *diag_buffer)
+{
+ const char *loc, *part, *serial;
+ uint32_t chip_id, len;
+ struct OpalIoPhbErrorCommon *common;
+
+ log_mark_serviceable(buf);
+
+ /* Add FRU callout for PHB base (backplane) */
+ loc = phb->base_loc_code;
+ part = phb->base_part_no;
+ serial = phb->base_serial_no;
+ log_add_callout_section(buf, loc, part, serial);
+
+ /* Add FRU callout of associated chip id */
+ chip_id = dt_get_chip_id(phb->dt_node);
+ loc = chip_loc_code(chip_id);
+ part = chip_part_number(chip_id);
+ serial = chip_serial_number(chip_id);
+ log_add_callout_section(buf, loc, part, serial);
+
+ if (!diag_buffer)
+ goto skip_to_commit;
+
+ /* Insert the phb diag data. */
+ common = diag_buffer;
+ len = be32_to_cpu(common->len);
+
+ log_add_section(buf, OPAL_ELOG_SEC_DIAG);
+ log_append_data(buf, diag_buffer, len);
+
+skip_to_commit:
+ log_commit(buf);
+}
+
+static void send_phb_freeze_event(struct phb *phb, void *diag_buffer)
+{
+ struct errorlog *buf;
+ struct pci_device *pd;
+ const char *loc = NULL;
+
+ buf = opal_elog_create(&e_info(OPAL_RC_PCI_PHB_FREEZE), 0);
+ if (!buf) {
+ prerror("Unable to send EEH error log (eSEL)\n");
+ return;
+ }
+
+ log_append_msg(buf, "PHB#%x Freeze/Fence detected!\n", phb->opal_id);
+
+ /* Add slot location info of RootPort */
+ pd = list_entry(phb->devices.n.next, struct pci_device, link);
+ loc = dt_prop_get_def(pd->dn, "ibm,slot-location-code", NULL);
+ log_add_callout_section(buf, loc, NULL, NULL);
+
+ send_eeh_serviceable_event(phb, buf, diag_buffer);
+ phb->flags &= ~PCI_EEH_ERR_LOG_SEND;
+}
+
static int64_t opal_pci_config_read_half_word_be(uint64_t phb_id,
uint64_t bus_dev_func,
uint64_t offset,
@@ -1000,6 +1066,10 @@ static int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id,
return OPAL_UNSUPPORTED;
phb_lock(phb);
rc = phb->ops->get_diag_data2(phb, diag_buffer, diag_buffer_len);
+
+ /* Send an error log if required */
+ if (phb->flags & PCI_EEH_ERR_LOG_SEND)
+ send_phb_freeze_event(phb, diag_buffer);
phb_unlock(phb);
return rc;
@@ -68,6 +68,9 @@ static bool phb3_fenced(struct phb3 *p)
if (nfir & PPC_BIT(16)) {
p->flags |= PHB3_AIB_FENCED;
+ /* Mark flag to send an error log */
+ p->phb.flags |= PCI_EEH_ERR_LOG_SEND;
+
phb3_eeh_dump_regs(p, NULL);
return true;
}
@@ -2554,6 +2554,9 @@ static bool phb4_fenced(struct phb4 *p)
/* Mark ourselves fenced */
p->flags |= PHB4_AIB_FENCED;
+ /* Mark flag to send an error log */
+ p->phb.flags |= PCI_EEH_ERR_LOG_SEND;
+
PHBERR(p, "PHB Freeze/Fence detected !\n");
phb4_dump_pec_err_regs(p);
@@ -3448,6 +3451,7 @@ static int64_t phb4_creset(struct pci_slot *slot)
p->flags &= ~PHB4_AIB_FENCED;
p->flags &= ~PHB4_CAPP_RECOVERY;
p->flags &= ~PHB4_CFG_USE_ASB;
+
phb4_init_hw(p);
pci_slot_set_state(slot, PHB4_SLOT_CRESET_FRESET);
@@ -287,6 +287,7 @@ enum opal_reasoncode {
OPAL_RC_PCI_ADD_SLOT = OPAL_SRC_COMPONENT_PCI | 0x11,
OPAL_RC_PCI_SCAN = OPAL_SRC_COMPONENT_PCI | 0x12,
OPAL_RC_PCI_RESET_PHB = OPAL_SRC_COMPONENT_PCI | 0x10,
+ OPAL_RC_PCI_PHB_FREEZE = OPAL_SRC_COMPONENT_PCI | 0x13,
/* ATTN */
OPAL_RC_ATTN = OPAL_SRC_COMPONENT_ATTN | 0x10,
/* MEM_ERR */
@@ -341,6 +342,7 @@ enum opal_reasoncode {
};
#define OPAL_ELOG_SEC_DESC 0x44455343
+#define OPAL_ELOG_SEC_DIAG 0x44494147 /* For EEH diag data */
#define DEFINE_LOG_ENTRY(reason, type, id, subsys, \
severity, subtype) static struct opal_err_info err_##reason = \
@@ -384,6 +384,9 @@ struct phb {
/* Additional data the platform might need to attach */
void *platform_data;
+
+ uint32_t flags;
+#define PCI_EEH_ERR_LOG_SEND 0x1
};
static inline void phb_lock(struct phb *phb)
On EEH error send out an error log (eSEL) with hardware callout. To avoid generating multiple events for same error, use a bit flag in generic PHB structure. Whenever an EEH freeze/fence is detected, a SEND error log bit is set. The error log includes FRU details and PHB diag data. This patch addresses full PHB fences events. Subsequent patches will address single PE and Multi-PE freeze/fences. As part of FRU details it will include slot location of RootPort, io base location code and processor chip fru details as below: | Callout Section | | | | Additional Sections : Disabled | | Callout Count : 3 | | | | Normal Hardware FRU | | Priority : Medium Priority | | Location Code : U78D2.001.RCH0060-P1-C2 | | | | Normal Hardware FRU | | Priority : Medium Priority | | Location Code : U78D2.001.RCH0060-P1 | | Part Number : 01EK968 | | Serial Number : Y230UF6C103M | | | | Normal Hardware FRU | | Priority : Medium Priority | | Location Code : U78D2.001.RCH0060-P1-C48 | | Part Number : 02CY253 | | Serial Number : YA1934460542 | | | Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com> --- Change in v2: - Introduce new elog type OPAL_RC_PCI_PHB_FREEZE. --- core/pci-opal.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/phb3.c | 3 ++ hw/phb4.c | 4 +++ include/errorlog.h | 2 + include/pci.h | 3 ++ 5 files changed, 82 insertions(+)