From patchwork Tue Jun 23 04:25:58 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Benjamin Herrenschmidt X-Patchwork-Id: 487502 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id B92A11401AF for ; Tue, 23 Jun 2015 14:27:01 +1000 (AEST) Received: from ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) by lists.ozlabs.org (Postfix) with ESMTP id 90DDF1A0F7B for ; Tue, 23 Jun 2015 14:27:01 +1000 (AEST) X-Original-To: skiboot@lists.ozlabs.org Delivered-To: skiboot@lists.ozlabs.org Received: from gate.crashing.org (gate.crashing.org [63.228.1.57]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 620401A0270 for ; Tue, 23 Jun 2015 14:26:25 +1000 (AEST) Received: from pasglop.ozlabs.ibm.com (localhost.localdomain [127.0.0.1]) by gate.crashing.org (8.14.1/8.13.8) with ESMTP id t5N4Q2mR029011; Mon, 22 Jun 2015 23:26:12 -0500 From: Benjamin Herrenschmidt To: skiboot@lists.ozlabs.org Date: Tue, 23 Jun 2015 14:25:58 +1000 Message-Id: <1435033560-9180-8-git-send-email-benh@kernel.crashing.org> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1435033560-9180-1-git-send-email-benh@kernel.crashing.org> References: <1435033560-9180-1-git-send-email-benh@kernel.crashing.org> Subject: [Skiboot] [PATCH 08/10] fsi-master: More error handling and recovery X-BeenThere: skiboot@lists.ozlabs.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: Mailing list for skiboot development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org Sender: "Skiboot" This adapts a bunch of code from HostBoot to better handle (and recover from) FSI Master errors. Signed-off-by: Benjamin Herrenschmidt --- hw/fsi-master.c | 477 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 407 insertions(+), 70 deletions(-) diff --git a/hw/fsi-master.c b/hw/fsi-master.c index f2efd01..7c33e9e 100644 --- a/hw/fsi-master.c +++ b/hw/fsi-master.c @@ -53,22 +53,56 @@ OPB_STAT_ERRACK) #define PIB2OPB_REG_LSTAT 0x2 #define PIB2OPB_REG_RESET 0x4 +#define PIB2OPB_REG_cRSIC 0x5 +#define PIB2OPB_REG_cRSIM 0x6 +#define PIB2OPB_REG_cRSIS 0x7 +#define PIB2OPB_REG_hRSIC 0x8 +#define PIB2OPB_REG_hRSIM 0x9 +#define PIB2OPB_REG_hRSIS 0xA + +/* Low level errors from OPB contain the status in the bottom 32-bit + * and one of these in the top 32-bit + */ +#define OPB_ERR_XSCOM_ERR 0x100000000ull +#define OPB_ERR_TIMEOUT_ERR 0x200000000ull +#define OPB_ERR_BAD_OPB_ADDR 0x400000000ull /* * PIB2OPB 0 has 2 MFSIs, cMFSI and hMFSI, PIB2OPB 1 only * has cMFSI */ -#define cMFSI_OPB_PORT_BASE 0x40000 +#define cMFSI_OPB_PORTS_BASE 0x40000 #define cMFSI_OPB_REG_BASE 0x03000 -#define hMFSI_OPB_PORT_BASE 0x80000 +#define hMFSI_OPB_PORTS_BASE 0x80000 #define hMFSI_OPB_REG_BASE 0x03400 #define MFSI_OPB_PORT_STRIDE 0x08000 +/* MFSI control registers */ +#define MFSI_REG_MSTAP(__n) (0x0D0 + (__n) * 4) +#define MFSI_REG_MATRB0 0x1D8 +#define MFSI_REG_MDTRB0 0x1DC +#define MFSI_REG_MESRB0 0x1D0 +#define MFSI_REG_MAESP0 0x050 +#define MFSI_REG_MAEB 0x070 +#define MFSI_REG_MSCSB0 0x1D4 + +/* FSI Slave registers */ +#define FSI_SLAVE_REGS 0x000800 /**< FSI Slave Register */ +#define FSI_SMODE (FSI_SLAVE_REGS | 0x00) +#define FSI_SLBUS (FSI_SLAVE_REGS | 0x30) +#define FSI_SLRES (FSI_SLAVE_REGS | 0x34) + +#define FSI2PIB_ENGINE 0x001000 /**< FSI2PIB Engine (SCOM) */ +#define FSI2PIB_RESET (FSI2PIB_ENGINE | 0x18) +#define FSI2PIB_STATUS (FSI2PIB_ENGINE | 0x1C) +#define FSI2PIB_COMPMASK (FSI2PIB_ENGINE | 0x30) +#define FSI2PIB_TRUEMASK (FSI2PIB_ENGINE | 0x34) + struct mfsi { uint32_t chip_id; uint32_t unit; uint32_t xscom_base; - uint32_t port_base; + uint32_t ports_base; uint32_t reg_base; uint32_t err_bits; }; @@ -91,48 +125,7 @@ static struct lock fsi_lock = LOCK_UNLOCKED; /* We try up to 1.2ms for an OPB access */ #define MFSI_OPB_MAX_TRIES 1200 -static int64_t mfsi_pib2opb_reset(struct mfsi *mfsi) -{ - uint64_t stat; - int64_t rc; - - rc = xscom_write(mfsi->chip_id, - mfsi->xscom_base + PIB2OPB_REG_RESET, (1ul << 63)); - if (rc) { - mfsi_log(PR_ERR, mfsi, "XSCOM error %lld resetting PIB2OPB\n", rc); - return rc; - } - rc = xscom_write(mfsi->chip_id, - mfsi->xscom_base + PIB2OPB_REG_STAT, (1ul << 63)); - if (rc) { - mfsi_log(PR_ERR, mfsi, "XSCOM error %lld resetting status\n", rc); - return rc; - } - rc = xscom_read(mfsi->chip_id, - mfsi->xscom_base + PIB2OPB_REG_STAT, &stat); - if (rc) { - mfsi_log(PR_ERR, mfsi, "XSCOM error %lld reading status\n", rc); - return rc; - } - return 0; -} - - -static int64_t mfsi_handle_opb_error(struct mfsi *mfsi, uint32_t stat) -{ - mfsi_log(PR_ERR, mfsi, "MFSI: Error status=0x%08x (raw=0x%08x)\n", - stat & mfsi->err_bits, stat); - - /* For now, just reset the PIB2OPB on error. We should collect more - * info and look at the remote errors in the target as well but that - * will be for another day. - */ - mfsi_pib2opb_reset(mfsi); - - return OPAL_HARDWARE; -} - -static int64_t mfsi_opb_poll(struct mfsi *mfsi, uint32_t *read_data) +static uint64_t mfsi_opb_poll(struct mfsi *mfsi, uint32_t *read_data) { unsigned long retries = MFSI_OPB_MAX_TRIES; uint64_t sval; @@ -146,7 +139,7 @@ static int64_t mfsi_opb_poll(struct mfsi *mfsi, uint32_t *read_data) if (rc) { /* Do something here ? */ mfsi_log(PR_ERR, mfsi, "XSCOM error %lld read OPB STAT\n", rc); - return rc; + return OPB_ERR_XSCOM_ERR; } mfsi_log(PR_INSANE, mfsi, " STAT=0x%16llx...\n", sval); @@ -156,16 +149,16 @@ static int64_t mfsi_opb_poll(struct mfsi *mfsi, uint32_t *read_data) if (!(stat & OPB_STAT_BUSY)) break; if (retries-- == 0) { - /* XXX What should we do here ? reset it ? */ + /* This isn't supposed to happen (HW timeout) */ mfsi_log(PR_ERR, mfsi, "OPB POLL timeout !\n"); - return OPAL_HARDWARE; + return OPB_ERR_TIMEOUT_ERR | (stat & mfsi->err_bits); } time_wait_us(1); } /* Did we have an error ? */ if (stat & mfsi->err_bits) - return mfsi_handle_opb_error(mfsi, stat); + return stat & mfsi->err_bits; if (read_data) { if (!(stat & OPB_STAT_READ_VALID)) { @@ -177,16 +170,16 @@ static int64_t mfsi_opb_poll(struct mfsi *mfsi, uint32_t *read_data) *read_data = sval & 0xffffffff; } - return OPAL_SUCCESS; + return 0; } -static int64_t mfsi_opb_read(struct mfsi *mfsi, uint32_t opb_addr, uint32_t *data) +static uint64_t mfsi_opb_read(struct mfsi *mfsi, uint32_t opb_addr, uint32_t *data) { uint64_t opb_cmd = OPB_CMD_READ | OPB_CMD_32BIT; int64_t rc; if (opb_addr > 0x00ffffff) - return OPAL_PARAMETER; + return OPB_ERR_BAD_OPB_ADDR; opb_cmd |= opb_addr; opb_cmd <<= 32; @@ -197,18 +190,18 @@ static int64_t mfsi_opb_read(struct mfsi *mfsi, uint32_t opb_addr, uint32_t *dat rc = xscom_write(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_CMD, opb_cmd); if (rc) { mfsi_log(PR_ERR, mfsi, "XSCOM error %lld writing OPB CMD\n", rc); - return rc; + return OPB_ERR_XSCOM_ERR; } return mfsi_opb_poll(mfsi, data); } -static int64_t mfsi_opb_write(struct mfsi *mfsi, uint32_t opb_addr, uint32_t data) +static uint64_t mfsi_opb_write(struct mfsi *mfsi, uint32_t opb_addr, uint32_t data) { uint64_t opb_cmd = OPB_CMD_WRITE | OPB_CMD_32BIT; int64_t rc; if (opb_addr > 0x00ffffff) - return OPAL_PARAMETER; + return OPB_ERR_BAD_OPB_ADDR; opb_cmd |= opb_addr; opb_cmd <<= 32; @@ -220,7 +213,7 @@ static int64_t mfsi_opb_write(struct mfsi *mfsi, uint32_t opb_addr, uint32_t dat rc = xscom_write(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_CMD, opb_cmd); if (rc) { mfsi_log(PR_ERR, mfsi, "XSCOM error %lld writing OPB CMD\n", rc); - return rc; + return OPB_ERR_XSCOM_ERR; } return mfsi_opb_poll(mfsi, NULL); } @@ -238,26 +231,366 @@ static struct mfsi *mfsi_get(uint32_t chip_id, uint32_t unit) return mfsi; } +static int64_t mfsi_reset_pib2opb(struct mfsi *mfsi) +{ + uint64_t stat; + int64_t rc; + + rc = xscom_write(mfsi->chip_id, + mfsi->xscom_base + PIB2OPB_REG_RESET, (1ul << 63)); + if (rc) { + mfsi_log(PR_ERR, mfsi, "XSCOM error %lld resetting PIB2OPB\n", rc); + return rc; + } + rc = xscom_write(mfsi->chip_id, + mfsi->xscom_base + PIB2OPB_REG_STAT, (1ul << 63)); + if (rc) { + mfsi_log(PR_ERR, mfsi, "XSCOM error %lld resetting status\n", rc); + return rc; + } + rc = xscom_read(mfsi->chip_id, + mfsi->xscom_base + PIB2OPB_REG_STAT, &stat); + if (rc) { + mfsi_log(PR_ERR, mfsi, "XSCOM error %lld reading status\n", rc); + return rc; + } + return 0; +} + + +static void mfsi_dump_pib2opb_state(struct mfsi *mfsi) +{ + uint64_t val; + + /* Dump a bunch of registers */ + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_CMD, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB CMD = %016llx\n", val); + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_STAT, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB STAT = %016llx\n", val); + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_LSTAT, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB LSTAT = %016llx\n", val); + + if (mfsi->unit == MFSI_cMFSI0 || mfsi->unit == MFSI_cMFSI1) { + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_cRSIC, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB cRSIC = %016llx\n", val); + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_cRSIM, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB cRSIM = %016llx\n", val); + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_cRSIS, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB cRSIS = %016llx\n", val); + } else if (mfsi->unit == MFSI_hMFSI0) { + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_hRSIC, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB hRSIC = %016llx\n", val); + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_hRSIM, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB hRSIM = %016llx\n", val); + if (xscom_read(mfsi->chip_id, mfsi->xscom_base + PIB2OPB_REG_hRSIS, &val)) + goto xscom_error; + mfsi_log(PR_ERR, mfsi, " PIB2OPB hRSIS = %016llx\n", val); + } + return; + xscom_error: + mfsi_log(PR_ERR, mfsi, "XSCOM error reading PIB2OPB registers\n"); +} + +static int64_t mfsi_dump_ctrl_regs(struct mfsi *mfsi) +{ + uint64_t opb_stat; + uint32_t i; + + /* List of registers to dump (from HB) */ + static uint32_t dump_regs[] = { + MFSI_REG_MATRB0, + MFSI_REG_MDTRB0, + MFSI_REG_MESRB0, + MFSI_REG_MAESP0, + MFSI_REG_MAEB, + MFSI_REG_MSCSB0, + }; + static const char * dump_regs_names[] = { + "MFSI_REG_MATRB0", + "MFSI_REG_MDTRB0", + "MFSI_REG_MESRB0", + "MFSI_REG_MAESP0", + "MFSI_REG_MAEB ", + "MFSI_REG_MSCSB0", + }; + for (i = 0; i < ARRAY_SIZE(dump_regs); i++) { + uint32_t val; + + opb_stat = mfsi_opb_read(mfsi, mfsi->reg_base + dump_regs[i], &val); + if (opb_stat) { + /* Error on dump, give up */ + mfsi_log(PR_ERR, mfsi, " OPB stat 0x%016llx dumping reg %x\n", + opb_stat, dump_regs[i]); + return OPAL_HARDWARE; + } + mfsi_log(PR_ERR, mfsi, " %s = %08x\n", dump_regs_names[i], val); + } + for (i = 0; i < 8; i++) { + uint32_t val; + + opb_stat = mfsi_opb_read(mfsi, mfsi->reg_base + MFSI_REG_MSTAP(i), &val); + if (opb_stat) { + /* Error on dump, give up */ + mfsi_log(PR_ERR, mfsi, " OPB stat 0x%016llx dumping reg %x\n", + opb_stat, MFSI_REG_MSTAP(i)); + return OPAL_HARDWARE; + } + mfsi_log(PR_ERR, mfsi, " MFSI_REG_MSTAP%d = %08x\n", i, val); + } + return OPAL_SUCCESS; +} + +static int64_t mfsi_master_cleanup(struct mfsi *mfsi, uint32_t port) +{ + uint64_t opb_stat; + uint32_t port_base, compmask, truemask; + + /* Reset the bridge to clear up the residual errors */ + + /* bit0 = Bridge: General reset */ + opb_stat = mfsi_opb_write(mfsi, mfsi->reg_base + MFSI_REG_MESRB0, 0x80000000u); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, " OPB stat 0x%016llx writing reset to MESRB0\n", + opb_stat); + return OPAL_HARDWARE; + } + + /* Calculate base address of port */ + port_base = mfsi->ports_base + port * MFSI_OPB_PORT_STRIDE; + + /* Perform error reset on Centaur fsi slave: */ + /* write 0x4000000 to addr=834 */ + opb_stat = mfsi_opb_write(mfsi, port_base + FSI_SLRES, 0x04000000); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, + " OPB stat 0x%016llx writing reset to FSI slave\n", + opb_stat); + return OPAL_HARDWARE; + } + + /* Further step is to issue a PIB reset to the FSI2PIB engine + * in busy state, i.e. write arbitrary data to 101c + * (putcfam 1007) register of the previously failed FSI2PIB + * engine on Centaur. + * + * XXX BenH: Should that be done by the upper FSI XSCOM layer ? + */ + opb_stat = mfsi_opb_write(mfsi, port_base + FSI2PIB_STATUS, 0xFFFFFFFF); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, + " OPB stat 0x%016llx clearing FSI2PIB_STATUS\n", + opb_stat); + return OPAL_HARDWARE; + } + + /* Need to save/restore the true/comp masks or the FSP (PRD ?) will + * get annoyed + */ + opb_stat = mfsi_opb_read(mfsi, port_base + FSI2PIB_COMPMASK, &compmask); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, + " OPB stat 0x%016llx reading FSI2PIB_COMPMASK\n", + opb_stat); + return OPAL_HARDWARE; + } + opb_stat = mfsi_opb_read(mfsi, port_base + FSI2PIB_TRUEMASK, &truemask); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, + " OPB stat 0x%016llx reading FSI2PIB_TRUEMASK\n", + opb_stat); + return OPAL_HARDWARE; + } + + /* Then, write arbitrary data to 1018 (putcfam 1006) to + * reset any pending FSI2PIB errors. + */ + opb_stat = mfsi_opb_write(mfsi, port_base + FSI2PIB_RESET, 0xFFFFFFFF); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, + " OPB stat 0x%016llx writing FSI2PIB_RESET\n", + opb_stat); + return OPAL_HARDWARE; + } + + /* Restore the true/comp masks */ + opb_stat = mfsi_opb_write(mfsi, port_base + FSI2PIB_COMPMASK, compmask); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, + " OPB stat 0x%016llx writing FSI2PIB_COMPMASK\n", + opb_stat); + return OPAL_HARDWARE; + } + opb_stat = mfsi_opb_write(mfsi, port_base + FSI2PIB_TRUEMASK, truemask); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, + " OPB stat 0x%016llx writing FSI2PIB_TRUEMASK\n", + opb_stat); + return OPAL_HARDWARE; + } + return OPAL_SUCCESS; +} + +static int64_t mfsi_analyse_fsi_error(struct mfsi *mfsi) +{ + uint64_t opb_stat; + uint32_t mesrb0; + + /* Most of the code below is adapted from HB. The main difference is + * that we don't gard + */ + + /* Read MESRB0 */ + opb_stat = mfsi_opb_read(mfsi, mfsi->reg_base + MFSI_REG_MESRB0, &mesrb0); + if (opb_stat) { + mfsi_log(PR_ERR, mfsi, " OPB stat 0x%016llx reading MESRB0\n", opb_stat); + return OPAL_HARDWARE; + } + mfsi_log(PR_ERR, mfsi, " MESRB0=%08x\n", mesrb0); + + /* bits 8:15 are internal parity errors in the master */ + if (mesrb0 & 0x00FF0000) { + mfsi_log(PR_ERR, mfsi, " Master parity error !\n"); + } else { + /* bits 0:3 are a specific error code */ + switch ((mesrb0 & 0xF0000000) >> 28 ) { + case 0x1: /* OPB error */ + case 0x2: /* Invalid state of OPB state machine */ + /* error is inside the OPB logic */ + mfsi_log(PR_ERR, mfsi, " OPB logic error !\n"); + break; + case 0x3: /* Port access error */ + /* probably some kind of code collision */ + /* could also be something weird in the chip */ + mfsi_log(PR_ERR, mfsi, " Port access error !\n"); + break; + case 0x4: /* ID mismatch */ + mfsi_log(PR_ERR, mfsi, " Port ID mismatch !\n"); + break; + case 0x6: /* port timeout error */ + mfsi_log(PR_ERR, mfsi, " Port timeout !\n"); + break; + case 0x7: /* master timeout error */ + mfsi_log(PR_ERR, mfsi, " Master timeout !\n"); + break; + case 0x9: /* Any error response from Slave */ + mfsi_log(PR_ERR, mfsi, " Slave error response !\n"); + break; + case 0xC: /* bridge parity error */ + mfsi_log(PR_ERR, mfsi, " Bridge parity error !\n"); + break; + case 0xB: /* protocol error */ + mfsi_log(PR_ERR, mfsi, " Protocol error !\n"); + break; + case 0x8: /* master CRC error */ + mfsi_log(PR_ERR, mfsi, " Master CRC error !\n"); + break; + case 0xA: /* Slave CRC error */ + mfsi_log(PR_ERR, mfsi, " Slave CRC error !\n"); + break; + default: + mfsi_log(PR_ERR, mfsi, " Unknown error !\n"); + break; + } + } + return OPAL_SUCCESS; +} + +static int64_t mfsi_handle_error(struct mfsi *mfsi, uint32_t port, + uint64_t opb_stat, uint32_t fsi_addr) +{ + int rc; + bool found_root_cause = false; + + mfsi_log(PR_ERR, mfsi, "Access error on port %d, stat=%012llx\n", + port, opb_stat); + + /* First handle stat codes we synthetized */ + if (opb_stat & OPB_ERR_XSCOM_ERR) + return OPAL_HARDWARE; + if (opb_stat & OPB_ERR_BAD_OPB_ADDR) + return OPAL_PARAMETER; + + /* Dump a bunch of regisers from PIB2OPB and reset it */ + mfsi_dump_pib2opb_state(mfsi); + + /* Reset PIB2OPB */ + mfsi_reset_pib2opb(mfsi); + + /* This one is not supposed to happen but ... */ + if (opb_stat & OPB_ERR_TIMEOUT_ERR) + return OPAL_HARDWARE; + + /* Dump some FSI control registers */ + rc = mfsi_dump_ctrl_regs(mfsi); + + /* If that failed, reset PIB2OPB again and return */ + if (rc) { + mfsi_dump_pib2opb_state(mfsi); + mfsi_reset_pib2opb(mfsi); + return OPAL_HARDWARE; + } + + /* Now check for known root causes (from HB) */ + + /* First check if it's a ctrl register access error and we got an OPB NACK, + * which means an out of bounds control reg + */ + if ((opb_stat & OPB_STAT_ERRACK) && + ((fsi_addr & ~0x2ffu) == mfsi->reg_base)) { + mfsi_log(PR_ERR, mfsi, " Error appears to be out of bounds reg %08x\n", + fsi_addr); + found_root_cause = true; + } + /* Else check for other OPB errors */ + else if (opb_stat & OPB_STAT_ERR_OPB) { + mfsi_log(PR_ERR, mfsi, " Error appears to be an OPB error\n"); + found_root_cause = true; + } + + /* Root cause not found, dig into FSI logic */ + if (!found_root_cause) { + rc = mfsi_analyse_fsi_error(mfsi); + if (!rc) { + /* If that failed too, reset the PIB2OPB again */ + mfsi_reset_pib2opb(mfsi); + } + } + + /* Cleanup MFSI master */ + mfsi_master_cleanup(mfsi, port); + + return OPAL_HARDWARE; +} + int64_t mfsi_read(uint32_t chip, uint32_t unit, uint32_t port, uint32_t fsi_addr, uint32_t *data) { struct mfsi *mfsi = mfsi_get(chip, unit); uint32_t port_addr; - int64_t rc; + uint64_t opb_stat; + int64_t rc = OPAL_SUCCESS; - if (!mfsi) + if (!mfsi || port > 7) return OPAL_PARAMETER; lock(&fsi_lock); /* Calculate port address */ - port_addr = mfsi->port_base + port * MFSI_OPB_PORT_STRIDE; + port_addr = mfsi->ports_base + port * MFSI_OPB_PORT_STRIDE; port_addr += fsi_addr; /* Perform OPB access */ - rc = mfsi_opb_read(mfsi, port_addr, data); - - /* XXX Handle FSI level errors here */ + opb_stat = mfsi_opb_read(mfsi, port_addr, data); + if (opb_stat) + rc = mfsi_handle_error(mfsi, port, opb_stat, port_addr); unlock(&fsi_lock); @@ -269,18 +602,22 @@ int64_t mfsi_write(uint32_t chip, uint32_t unit, uint32_t port, { struct mfsi *mfsi = mfsi_get(chip, unit); uint32_t port_addr; - int64_t rc; + uint64_t opb_stat; + int64_t rc = OPAL_SUCCESS; + + if (!mfsi || port > 7) + return OPAL_PARAMETER; lock(&fsi_lock); /* Calculate port address */ - port_addr = mfsi->port_base + port * MFSI_OPB_PORT_STRIDE; + port_addr = mfsi->ports_base + port * MFSI_OPB_PORT_STRIDE; port_addr += fsi_addr; /* Perform OPB access */ - rc = mfsi_opb_write(mfsi, port_addr, data); - - /* XXX Handle FSI level errors here */ + opb_stat = mfsi_opb_write(mfsi, port_addr, data); + if (opb_stat) + rc = mfsi_handle_error(mfsi, port, opb_stat, port_addr); unlock(&fsi_lock); @@ -296,19 +633,19 @@ static void mfsi_add(struct proc_chip *chip, struct mfsi *mfsi, uint32_t unit) switch(unit) { case MFSI_cMFSI0: mfsi->xscom_base = PIB2OPB_MFSI0_ADDR; - mfsi->port_base = cMFSI_OPB_PORT_BASE; + mfsi->ports_base = cMFSI_OPB_PORTS_BASE; mfsi->reg_base = cMFSI_OPB_REG_BASE; mfsi->err_bits = OPB_STAT_ERR_BASE | OPB_STAT_ERR_CMFSI; break; case MFSI_cMFSI1: mfsi->xscom_base = PIB2OPB_MFSI1_ADDR; - mfsi->port_base = cMFSI_OPB_PORT_BASE; + mfsi->ports_base = cMFSI_OPB_PORTS_BASE; mfsi->reg_base = cMFSI_OPB_REG_BASE; mfsi->err_bits = OPB_STAT_ERR_BASE | OPB_STAT_ERR_CMFSI; break; case MFSI_hMFSI0: mfsi->xscom_base = PIB2OPB_MFSI0_ADDR; - mfsi->port_base = hMFSI_OPB_PORT_BASE; + mfsi->ports_base = hMFSI_OPB_PORTS_BASE; mfsi->reg_base = hMFSI_OPB_REG_BASE; mfsi->err_bits = OPB_STAT_ERR_BASE | OPB_STAT_ERR_HMFSI; break;