@@ -114,7 +114,7 @@
#include <nvram.h>
/* Enable this to disable error interrupts for debug purposes */
-#define DISABLE_ERR_INTS
+#undef DISABLE_ERR_INTS
static void phb4_init_hw(struct phb4 *p);
@@ -3511,6 +3511,33 @@ static void phb4_int_unmask_all(struct phb4 *p)
out_be64(p->regs + PHB_RXE_TCE_ERR_IRQ_ENABLE, 0x60510050c0000000ull);
}
+/*
+ * Mask the IRQ for any currently set error bits. This prevents the PHB's ERR
+ * and INF interrupts from being re-fired before the kernel can handle the
+ * underlying condition.
+ */
+static void phb4_int_mask_active(struct phb4 *p)
+{
+ const uint64_t error_regs[] = {
+ PHB_ERR_STATUS,
+ PHB_TXE_ERR_STATUS,
+ PHB_RXE_ARB_ERR_STATUS,
+ PHB_RXE_MRG_ERR_STATUS,
+ PHB_RXE_TCE_ERR_STATUS
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(error_regs); i++) {
+ uint64_t stat, mask;
+
+ /* The IRQ mask reg is always offset 0x20 from the status reg */
+ stat = phb4_read_reg(p, error_regs[i]);
+ mask = phb4_read_reg(p, error_regs[i] + 0x20);
+
+ phb4_write_reg(p, error_regs[i] + 0x20, mask & ~stat);
+ }
+}
+
static uint64_t phb4_get_pesta(struct phb4 *p, uint64_t pe_number)
{
uint64_t pesta;
@@ -3828,6 +3855,14 @@ static int64_t phb4_eeh_next_error(struct phb *phb,
*severity = OPAL_EEH_SEV_NO_ERROR;
phb4_set_err_pending(p, false);
}
+
+ /*
+ * Unmask all our error interrupts once all pending errors
+ * have been handled.
+ */
+ if (!phb4_err_pending(p))
+ phb4_int_unmask_all(p);
+
return OPAL_SUCCESS;
}
@@ -5588,6 +5623,9 @@ static void phb4_err_interrupt(struct irq_source *is, uint32_t isn)
PHBDBG(p, "Got interrupt 0x%08x\n", isn);
+ /* mask the interrupt conditions to prevent it from re-firing */
+ phb4_int_mask_active(p);
+
/* Update pending event */
opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
OPAL_EVENT_PCI_ERROR);
In PHB4 the PHB's error and informational interrupts were changed to behave more like actual LSIs. On PHB3 these interrupts would be only be raised on a 0 -> 1 transition of an error status bits (i.e. they were rising edge triggered). On PHB4 the error interrupts are "true" LSIs and will be re-raised as long the underlying error status bit is set. This causes a headache for us because OPAL's PHB error handling model requires Skiboot to preserve the state of the PHB (including errors) until the kernel is ready to handle the error. As a result we can't do anything in Skiboot to handle the interrupt condition and we need to mask the error internally. We can do this by clearing the relevant bits in the IRQ_ENABLE registers of the PHB. It's worth pointing out that we don't want to mask the interrupt by setting the Q bit in the XIVE ESBs. The ESBs are owned by the OS which may be masking and unmasking the interrupt for its own reasons (e.g. migrating IRQs). Skiboot modifying the ESB state could potentially cause problems and should be avoided. Cc: Cédric Le Goater <clg@kaod.org> Signed-off-by: Oliver O'Halloran <oohall@gmail.com> --- hw/phb4.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-)