Message ID | 1641926750-27544-4-git-send-email-amhetre@nvidia.com |
---|---|
State | Changes Requested |
Headers | show |
Series | memory: tegra: Update mc interrupts | expand |
Cc in the future also Dmitry, because he was involved in these drivers quite a lot. On 11/01/2022 19:45, Ashish Mhetre wrote: > Add all mc-errors supported by T186. > Implement mc interrupt handling routine for T186. Here and in other patches - please use Tegra186 (and similar) to be consistent with existing upstream naming. > > Signed-off-by: Ashish Mhetre <amhetre@nvidia.com> > --- > drivers/memory/tegra/mc.h | 17 +++++++ > drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 117 insertions(+) > > diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h > index 2d4f495..7817492 100644 > --- a/drivers/memory/tegra/mc.h > +++ b/drivers/memory/tegra/mc.h > @@ -44,6 +44,15 @@ > #define MC_TIMING_CONTROL_DBG 0xf8 > #define MC_TIMING_CONTROL 0xfc > > +#define MC_ERR_VPR_STATUS 0x654 > +#define MC_ERR_VPR_ADR 0x658 > +#define MC_ERR_SEC_STATUS 0x67c > +#define MC_ERR_SEC_ADR 0x680 > +#define MC_ERR_MTS_STATUS 0x9b0 > +#define MC_ERR_MTS_ADR 0x9b4 > +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS 0xc00 > +#define MC_ERR_GENERALIZED_CARVEOUT_ADR 0xc04 > + > #define MC_INT_DECERR_ROUTE_SANITY BIT(20) > #define MC_INT_WCAM_ERR BIT(19) > #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) > @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops; > extern const char * const tegra_mc_status_names[32]; > extern const char * const tegra_mc_error_names[8]; > > +struct tegra_mc_error { > + u32 int_bit; Where is it used (read)? > + const char *msg; > + u32 status_reg; > + u32 addr_reg; Please describe all these fields with kerneldoc. > + u32 addr_reg_hi; Looks unused. > +}; > + > /* > * These IDs are for internal use of Tegra ICC drivers. The ID numbers are > * chosen such that they don't conflict with the device-tree ICC node IDs. > diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c > index 6766cc4..4f3ae71 100644 > --- a/drivers/memory/tegra/tegra186.c > +++ b/drivers/memory/tegra/tegra186.c > @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc) > mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS); > } > > +static const struct tegra_mc_error int_mc_errors[] = { > + { > + .int_bit = MC_INT_DECERR_EMEM, > + .msg = "EMEM address decode error", > + .status_reg = MC_ERR_STATUS, > + .addr_reg = MC_ERR_ADR, > + }, > + { > + .int_bit = MC_INT_SECURITY_VIOLATION, > + .msg = "non secure access to secure region", > + .status_reg = MC_ERR_STATUS, > + .addr_reg = MC_ERR_ADR, > + }, > + { > + .int_bit = MC_INT_DECERR_VPR, > + .msg = "MC request violates VPR requirements", > + .status_reg = MC_ERR_VPR_STATUS, > + .addr_reg = MC_ERR_VPR_ADR, > + }, > + { > + .int_bit = MC_INT_SECERR_SEC, > + .msg = "MC request violated SEC carveout requirements", > + .status_reg = MC_ERR_SEC_STATUS, > + .addr_reg = MC_ERR_SEC_ADR, > + }, > + { > + .int_bit = MC_INT_DECERR_MTS, > + .msg = "MTS carveout access violation", > + .status_reg = MC_ERR_MTS_STATUS, > + .addr_reg = MC_ERR_MTS_ADR, > + }, > + { > + .int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT, > + .msg = "GSC access violation", > + .status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS, > + .addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR, > + }, > +}; > + > +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data) > +{ > + struct tegra_mc *mc = data; > + unsigned long status; > + unsigned int bit; > + > + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask; > + if (!status) > + return IRQ_NONE; > + > + for_each_set_bit(bit, &status, 32) { Don't you need bitops.h for this? > + const char *error = int_mc_errors[bit].msg ?: "unknown"; int_mc_errors has size of 6, but it's index (the bit variable) can be 20 or even 32. Are you sure indices are used properly or maybe int_mc_errors missed initializer per-index? > + const char *client = "unknown"; > + const char *direction, *secure; > + phys_addr_t addr = 0; > + unsigned int i; > + u8 id; > + u32 value; Keep order in reversed xmas tree plus name it in a meaningful way. You read here several registers, so which one is value? Probably you meant status? > + > + value = mc_readl(mc, int_mc_errors[bit].status_reg); > + > +#ifdef CONFIG_PHYS_ADDR_T_64BIT > + if (mc->soc->num_address_bits > 32) { > + addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) & > + MC_ERR_STATUS_ADR_HI_MASK); > + addr <<= 32; > + } > +#endif > + addr |= mc_readl(mc, int_mc_errors[bit].addr_reg); > + > + if (value & MC_ERR_STATUS_RW) > + direction = "write"; > + else > + direction = "read"; > + > + if (value & MC_ERR_STATUS_SECURITY) > + secure = "secure "; > + else > + secure = ""; > + > + id = value & mc->soc->client_id_mask; > + > + for (i = 0; i < mc->soc->num_clients; i++) { > + if (mc->soc->clients[i].id == id) { > + client = mc->soc->clients[i].name; > + break; > + } > + } > + > + dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s\n", > + client, secure, direction, &addr, error); > + } > + > + /* clear interrupts */ > + mc_writel(mc, status, MC_INTSTATUS);> + > + return IRQ_HANDLED; I don't think you are actually handling these errors as stated in commit message. You only log them. Please adjust the commit subject and message to mention the actual purpose/action of error handling. > +} > + > const struct tegra_mc_interrupt_ops tegra186_mc_interrupt_ops = { > .clear_interrupt = tegra186_mc_clear_interrupt, > + .handle_irq = tegra186_mc_handle_irq, > }; > > const struct tegra_mc_ops tegra186_mc_ops = { > @@ -886,6 +985,7 @@ const struct tegra_mc_soc tegra186_mc_soc = { > .num_clients = ARRAY_SIZE(tegra186_mc_clients), > .clients = tegra186_mc_clients, > .num_address_bits = 40, > + .client_id_mask = 0xff, > .intmask = MC_INT_WCAM_ERR | MC_INT_SCRUB_ECC_WR_ACK | > MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS | > MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | > Best regards, Krzysztof
11.01.2022 21:45, Ashish Mhetre пишет: > Add all mc-errors supported by T186. > Implement mc interrupt handling routine for T186. > > Signed-off-by: Ashish Mhetre <amhetre@nvidia.com> > --- > drivers/memory/tegra/mc.h | 17 +++++++ > drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 117 insertions(+) > > diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h > index 2d4f495..7817492 100644 > --- a/drivers/memory/tegra/mc.h > +++ b/drivers/memory/tegra/mc.h > @@ -44,6 +44,15 @@ > #define MC_TIMING_CONTROL_DBG 0xf8 > #define MC_TIMING_CONTROL 0xfc > this empty line is unnecessary > +#define MC_ERR_VPR_STATUS 0x654 > +#define MC_ERR_VPR_ADR 0x658 > +#define MC_ERR_SEC_STATUS 0x67c > +#define MC_ERR_SEC_ADR 0x680 > +#define MC_ERR_MTS_STATUS 0x9b0 > +#define MC_ERR_MTS_ADR 0x9b4 > +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS 0xc00 > +#define MC_ERR_GENERALIZED_CARVEOUT_ADR 0xc04 > + > #define MC_INT_DECERR_ROUTE_SANITY BIT(20) > #define MC_INT_WCAM_ERR BIT(19) > #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) > @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops; > extern const char * const tegra_mc_status_names[32]; > extern const char * const tegra_mc_error_names[8]; > > +struct tegra_mc_error { > + u32 int_bit; > + const char *msg; > + u32 status_reg; > + u32 addr_reg; > + u32 addr_reg_hi; > +}; > + > /* > * These IDs are for internal use of Tegra ICC drivers. The ID numbers are > * chosen such that they don't conflict with the device-tree ICC node IDs. > diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c > index 6766cc4..4f3ae71 100644 > --- a/drivers/memory/tegra/tegra186.c > +++ b/drivers/memory/tegra/tegra186.c > @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc) > mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS); > } > > +static const struct tegra_mc_error int_mc_errors[] = { > + { > + .int_bit = MC_INT_DECERR_EMEM, > + .msg = "EMEM address decode error", > + .status_reg = MC_ERR_STATUS, > + .addr_reg = MC_ERR_ADR, > + }, > + { > + .int_bit = MC_INT_SECURITY_VIOLATION, > + .msg = "non secure access to secure region", > + .status_reg = MC_ERR_STATUS, > + .addr_reg = MC_ERR_ADR, > + }, > + { > + .int_bit = MC_INT_DECERR_VPR, > + .msg = "MC request violates VPR requirements", > + .status_reg = MC_ERR_VPR_STATUS, > + .addr_reg = MC_ERR_VPR_ADR, > + }, I see that these VPR registers present on all SoCs starting with T124. It doesn't look like you need the separate IRQ handlers at all, instead please extend the common T30 handler. For example, you may add a switch-case statements to handle those T124+ specific bits differently. static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) { ... switch (bit) { case MC_INT_DECERR_VPR: status_reg = MC_ERR_VPR_STATUS; addr_reg = MC_ERR_VPR_ADR; break; ... default: status_reg = MC_ERR_STATUS; addr_reg = MC_ERR_ADR; } value = mc_readl(mc, status_reg); ... value = mc_readl(mc, addr_reg);
11.01.2022 21:45, Ashish Mhetre пишет: > +static const struct tegra_mc_error int_mc_errors[] = { > + { > + .int_bit = MC_INT_DECERR_EMEM, > + .msg = "EMEM address decode error", > + .status_reg = MC_ERR_STATUS, > + .addr_reg = MC_ERR_ADR, > + }, > + { > + .int_bit = MC_INT_SECURITY_VIOLATION, > + .msg = "non secure access to secure region", > + .status_reg = MC_ERR_STATUS, > + .addr_reg = MC_ERR_ADR, > + }, > + { > + .int_bit = MC_INT_DECERR_VPR, > + .msg = "MC request violates VPR requirements", > + .status_reg = MC_ERR_VPR_STATUS, > + .addr_reg = MC_ERR_VPR_ADR, > + }, > + { > + .int_bit = MC_INT_SECERR_SEC, > + .msg = "MC request violated SEC carveout requirements", > + .status_reg = MC_ERR_SEC_STATUS, > + .addr_reg = MC_ERR_SEC_ADR, > + }, > + { > + .int_bit = MC_INT_DECERR_MTS, > + .msg = "MTS carveout access violation", > + .status_reg = MC_ERR_MTS_STATUS, > + .addr_reg = MC_ERR_MTS_ADR, > + }, > + { > + .int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT, > + .msg = "GSC access violation", > + .status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS, > + .addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR, > + }, > +}; > + > +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data) > +{ > + struct tegra_mc *mc = data; > + unsigned long status; > + unsigned int bit; > + > + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask; > + if (!status) > + return IRQ_NONE; > + > + for_each_set_bit(bit, &status, 32) { > + const char *error = int_mc_errors[bit].msg ?: "unknown"; int_mc_errors[bit] isn't what you need and .int_bit is unused, which suggests that all this code doesn't work and was untested. Please don't send untested patches.
11.01.2022 21:45, Ashish Mhetre пишет: > #define MC_INT_DECERR_ROUTE_SANITY BIT(20) > #define MC_INT_WCAM_ERR BIT(19) > #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) I don't see where these errors are handled in the code. Is documentation that explains these bits publicly available?
12.01.2022 14:22, Dmitry Osipenko пишет: > 11.01.2022 21:45, Ashish Mhetre пишет: >> #define MC_INT_DECERR_ROUTE_SANITY BIT(20) >> #define MC_INT_WCAM_ERR BIT(19) >> #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) > > I don't see where these errors are handled in the code. Is documentation > that explains these bits publicly available? > MC_INT_SCRUB_ECC_WR_ACK shouldn't be a error.
On 1/12/2022 2:26 AM, Krzysztof Kozlowski wrote: > External email: Use caution opening links or attachments > > > Cc in the future also Dmitry, because he was involved in these drivers > quite a lot. > I'll make sure of that in future patches. > On 11/01/2022 19:45, Ashish Mhetre wrote: >> Add all mc-errors supported by T186. >> Implement mc interrupt handling routine for T186. > > Here and in other patches - please use Tegra186 (and similar) to be > consistent with existing upstream naming. > Okay, I'll update the naming. >> >> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com> >> --- >> drivers/memory/tegra/mc.h | 17 +++++++ >> drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 117 insertions(+) >> >> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h >> index 2d4f495..7817492 100644 >> --- a/drivers/memory/tegra/mc.h >> +++ b/drivers/memory/tegra/mc.h >> @@ -44,6 +44,15 @@ >> #define MC_TIMING_CONTROL_DBG 0xf8 >> #define MC_TIMING_CONTROL 0xfc >> >> +#define MC_ERR_VPR_STATUS 0x654 >> +#define MC_ERR_VPR_ADR 0x658 >> +#define MC_ERR_SEC_STATUS 0x67c >> +#define MC_ERR_SEC_ADR 0x680 >> +#define MC_ERR_MTS_STATUS 0x9b0 >> +#define MC_ERR_MTS_ADR 0x9b4 >> +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS 0xc00 >> +#define MC_ERR_GENERALIZED_CARVEOUT_ADR 0xc04 >> + >> #define MC_INT_DECERR_ROUTE_SANITY BIT(20) >> #define MC_INT_WCAM_ERR BIT(19) >> #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) >> @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops; >> extern const char * const tegra_mc_status_names[32]; >> extern const char * const tegra_mc_error_names[8]; >> >> +struct tegra_mc_error { >> + u32 int_bit; > > Where is it used (read)? > Oops, int_bit should be getting used in handle_irq for checking type of interrupt. I'll update it in next version. >> + const char *msg; >> + u32 status_reg; >> + u32 addr_reg; > > Please describe all these fields with kerneldoc. > >> + u32 addr_reg_hi; > > Looks unused. > It's getting used in patch 4. I'll update it in next version. >> +}; >> + >> /* >> * These IDs are for internal use of Tegra ICC drivers. The ID numbers are >> * chosen such that they don't conflict with the device-tree ICC node IDs. >> diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c >> index 6766cc4..4f3ae71 100644 >> --- a/drivers/memory/tegra/tegra186.c >> +++ b/drivers/memory/tegra/tegra186.c >> @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc) >> mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS); >> } >> >> +static const struct tegra_mc_error int_mc_errors[] = { >> + { >> + .int_bit = MC_INT_DECERR_EMEM, >> + .msg = "EMEM address decode error", >> + .status_reg = MC_ERR_STATUS, >> + .addr_reg = MC_ERR_ADR, >> + }, >> + { >> + .int_bit = MC_INT_SECURITY_VIOLATION, >> + .msg = "non secure access to secure region", >> + .status_reg = MC_ERR_STATUS, >> + .addr_reg = MC_ERR_ADR, >> + }, >> + { >> + .int_bit = MC_INT_DECERR_VPR, >> + .msg = "MC request violates VPR requirements", >> + .status_reg = MC_ERR_VPR_STATUS, >> + .addr_reg = MC_ERR_VPR_ADR, >> + }, >> + { >> + .int_bit = MC_INT_SECERR_SEC, >> + .msg = "MC request violated SEC carveout requirements", >> + .status_reg = MC_ERR_SEC_STATUS, >> + .addr_reg = MC_ERR_SEC_ADR, >> + }, >> + { >> + .int_bit = MC_INT_DECERR_MTS, >> + .msg = "MTS carveout access violation", >> + .status_reg = MC_ERR_MTS_STATUS, >> + .addr_reg = MC_ERR_MTS_ADR, >> + }, >> + { >> + .int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT, >> + .msg = "GSC access violation", >> + .status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS, >> + .addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR, >> + }, >> +}; >> + >> +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data) >> +{ >> + struct tegra_mc *mc = data; >> + unsigned long status; >> + unsigned int bit; >> + >> + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask; >> + if (!status) >> + return IRQ_NONE; >> + >> + for_each_set_bit(bit, &status, 32) { > > Don't you need bitops.h for this? > It didn't throw any errors when I built it. bitops.h is mostly getting included from one of the existing includes. Probably of_device.h -> of.h -> bitops.h >> + const char *error = int_mc_errors[bit].msg ?: "unknown"; > > int_mc_errors has size of 6, but it's index (the bit variable) can be 20 > or even 32. Are you sure indices are used properly or maybe > int_mc_errors missed initializer per-index? > That's true, I have not implemented it correctly. I should have looped over int_mc_errors[] and should have checked .int_bit with bit and used index of that element. >> + const char *client = "unknown"; >> + const char *direction, *secure; >> + phys_addr_t addr = 0; >> + unsigned int i; >> + u8 id; >> + u32 value; > > Keep order in reversed xmas tree plus name it in a meaningful way. You > read here several registers, so which one is value? Probably you meant > status? > Yes, I meant status. But tegra30_mc_handle_irq() is implemented in similar way. So I went with same names. I'll update the names and order. >> + >> + value = mc_readl(mc, int_mc_errors[bit].status_reg); >> + >> +#ifdef CONFIG_PHYS_ADDR_T_64BIT >> + if (mc->soc->num_address_bits > 32) { >> + addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) & >> + MC_ERR_STATUS_ADR_HI_MASK); >> + addr <<= 32; >> + } >> +#endif >> + addr |= mc_readl(mc, int_mc_errors[bit].addr_reg); >> + >> + if (value & MC_ERR_STATUS_RW) >> + direction = "write"; >> + else >> + direction = "read"; >> + >> + if (value & MC_ERR_STATUS_SECURITY) >> + secure = "secure "; >> + else >> + secure = ""; >> + >> + id = value & mc->soc->client_id_mask; >> + >> + for (i = 0; i < mc->soc->num_clients; i++) { >> + if (mc->soc->clients[i].id == id) { >> + client = mc->soc->clients[i].name; >> + break; >> + } >> + } >> + >> + dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s\n", >> + client, secure, direction, &addr, error); >> + } >> + >> + /* clear interrupts */ >> + mc_writel(mc, status, MC_INTSTATUS);> + >> + return IRQ_HANDLED; > > I don't think you are actually handling these errors as stated in commit > message. You only log them. Please adjust the commit subject and message > to mention the actual purpose/action of error handling. > Yes, I'll update the commit message. >> +} >> + >> const struct tegra_mc_interrupt_ops tegra186_mc_interrupt_ops = { >> .clear_interrupt = tegra186_mc_clear_interrupt, >> + .handle_irq = tegra186_mc_handle_irq, >> }; >> >> const struct tegra_mc_ops tegra186_mc_ops = { >> @@ -886,6 +985,7 @@ const struct tegra_mc_soc tegra186_mc_soc = { >> .num_clients = ARRAY_SIZE(tegra186_mc_clients), >> .clients = tegra186_mc_clients, >> .num_address_bits = 40, >> + .client_id_mask = 0xff, >> .intmask = MC_INT_WCAM_ERR | MC_INT_SCRUB_ECC_WR_ACK | >> MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS | >> MC_INT_SECERR_SEC | MC_INT_DECERR_VPR | >> > > > > Best regards, > Krzysztof
On 1/12/2022 4:31 PM, Dmitry Osipenko wrote: > External email: Use caution opening links or attachments > > > 11.01.2022 21:45, Ashish Mhetre пишет: >> Add all mc-errors supported by T186. >> Implement mc interrupt handling routine for T186. >> >> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com> >> --- >> drivers/memory/tegra/mc.h | 17 +++++++ >> drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 117 insertions(+) >> >> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h >> index 2d4f495..7817492 100644 >> --- a/drivers/memory/tegra/mc.h >> +++ b/drivers/memory/tegra/mc.h >> @@ -44,6 +44,15 @@ >> #define MC_TIMING_CONTROL_DBG 0xf8 >> #define MC_TIMING_CONTROL 0xfc >> > > this empty line is unnecessary > I'll fix this in next version. >> +#define MC_ERR_VPR_STATUS 0x654 >> +#define MC_ERR_VPR_ADR 0x658 >> +#define MC_ERR_SEC_STATUS 0x67c >> +#define MC_ERR_SEC_ADR 0x680 >> +#define MC_ERR_MTS_STATUS 0x9b0 >> +#define MC_ERR_MTS_ADR 0x9b4 >> +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS 0xc00 >> +#define MC_ERR_GENERALIZED_CARVEOUT_ADR 0xc04 >> + >> #define MC_INT_DECERR_ROUTE_SANITY BIT(20) >> #define MC_INT_WCAM_ERR BIT(19) >> #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) >> @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops; >> extern const char * const tegra_mc_status_names[32]; >> extern const char * const tegra_mc_error_names[8]; >> >> +struct tegra_mc_error { >> + u32 int_bit; >> + const char *msg; >> + u32 status_reg; >> + u32 addr_reg; >> + u32 addr_reg_hi; >> +}; >> + >> /* >> * These IDs are for internal use of Tegra ICC drivers. The ID numbers are >> * chosen such that they don't conflict with the device-tree ICC node IDs. >> diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c >> index 6766cc4..4f3ae71 100644 >> --- a/drivers/memory/tegra/tegra186.c >> +++ b/drivers/memory/tegra/tegra186.c >> @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc) >> mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS); >> } >> >> +static const struct tegra_mc_error int_mc_errors[] = { >> + { >> + .int_bit = MC_INT_DECERR_EMEM, >> + .msg = "EMEM address decode error", >> + .status_reg = MC_ERR_STATUS, >> + .addr_reg = MC_ERR_ADR, >> + }, >> + { >> + .int_bit = MC_INT_SECURITY_VIOLATION, >> + .msg = "non secure access to secure region", >> + .status_reg = MC_ERR_STATUS, >> + .addr_reg = MC_ERR_ADR, >> + }, >> + { >> + .int_bit = MC_INT_DECERR_VPR, >> + .msg = "MC request violates VPR requirements", >> + .status_reg = MC_ERR_VPR_STATUS, >> + .addr_reg = MC_ERR_VPR_ADR, >> + }, > > I see that these VPR registers present on all SoCs starting with T124. > It doesn't look like you need the separate IRQ handlers at all, instead > please extend the common T30 handler. For example, you may add a > switch-case statements to handle those T124+ specific bits differently. > > static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) > { > ... > switch (bit) { > case MC_INT_DECERR_VPR: > status_reg = MC_ERR_VPR_STATUS; > addr_reg = MC_ERR_VPR_ADR; > break; > ... > default: > status_reg = MC_ERR_STATUS; > addr_reg = MC_ERR_ADR; > } > > value = mc_readl(mc, status_reg); > ... > > value = mc_readl(mc, addr_reg); Okay. I'll use same handler as Tegra30 with additional Tegra186 onward bits. Also, shall I change name of tegra30_mc_handle_irq() to tegra_mc_handle_irq() as we are using it across all Tegra SOCs ?
On 1/12/2022 4:32 PM, Dmitry Osipenko wrote: > External email: Use caution opening links or attachments > > > 11.01.2022 21:45, Ashish Mhetre пишет: >> +static const struct tegra_mc_error int_mc_errors[] = { >> + { >> + .int_bit = MC_INT_DECERR_EMEM, >> + .msg = "EMEM address decode error", >> + .status_reg = MC_ERR_STATUS, >> + .addr_reg = MC_ERR_ADR, >> + }, >> + { >> + .int_bit = MC_INT_SECURITY_VIOLATION, >> + .msg = "non secure access to secure region", >> + .status_reg = MC_ERR_STATUS, >> + .addr_reg = MC_ERR_ADR, >> + }, >> + { >> + .int_bit = MC_INT_DECERR_VPR, >> + .msg = "MC request violates VPR requirements", >> + .status_reg = MC_ERR_VPR_STATUS, >> + .addr_reg = MC_ERR_VPR_ADR, >> + }, >> + { >> + .int_bit = MC_INT_SECERR_SEC, >> + .msg = "MC request violated SEC carveout requirements", >> + .status_reg = MC_ERR_SEC_STATUS, >> + .addr_reg = MC_ERR_SEC_ADR, >> + }, >> + { >> + .int_bit = MC_INT_DECERR_MTS, >> + .msg = "MTS carveout access violation", >> + .status_reg = MC_ERR_MTS_STATUS, >> + .addr_reg = MC_ERR_MTS_ADR, >> + }, >> + { >> + .int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT, >> + .msg = "GSC access violation", >> + .status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS, >> + .addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR, >> + }, >> +}; >> + >> +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data) >> +{ >> + struct tegra_mc *mc = data; >> + unsigned long status; >> + unsigned int bit; >> + >> + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask; >> + if (!status) >> + return IRQ_NONE; >> + >> + for_each_set_bit(bit, &status, 32) { >> + const char *error = int_mc_errors[bit].msg ?: "unknown"; > > int_mc_errors[bit] isn't what you need and .int_bit is unused, which > suggests that all this code doesn't work and was untested. Please don't > send untested patches. Yes, my bad. I will update this in next patch. Actually I made sure that the patches build without errors and also made sure that they does not break anything. As for reproducing each of these memory controller errors, I agree that I haven't done that.
On 1/12/2022 4:54 PM, Dmitry Osipenko wrote: > External email: Use caution opening links or attachments > > > 12.01.2022 14:22, Dmitry Osipenko пишет: >> 11.01.2022 21:45, Ashish Mhetre пишет: >>> #define MC_INT_DECERR_ROUTE_SANITY BIT(20) >>> #define MC_INT_WCAM_ERR BIT(19) >>> #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) >> >> I don't see where these errors are handled in the code. Is documentation >> that explains these bits publicly available? >> MC_INT_DECERR_ROUTE_SANITY is supposed to be part a of Tegra194 interrupts. I have missed adding it and will update in next version. MC_INT_WCAM_ERR is a MC channel error and is not applicable in upstream. I'll remove it. These bits are defined in same documentation where other errors are defined. > > MC_INT_SCRUB_ECC_WR_ACK shouldn't be a error. Okay, I 'll remove it.
19.01.2022 11:53, Ashish Mhetre пишет: > > > On 1/12/2022 4:31 PM, Dmitry Osipenko wrote: >> External email: Use caution opening links or attachments >> >> >> 11.01.2022 21:45, Ashish Mhetre пишет: >>> Add all mc-errors supported by T186. >>> Implement mc interrupt handling routine for T186. >>> >>> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com> >>> --- >>> drivers/memory/tegra/mc.h | 17 +++++++ >>> drivers/memory/tegra/tegra186.c | 100 >>> ++++++++++++++++++++++++++++++++++++++++ >>> 2 files changed, 117 insertions(+) >>> >>> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h >>> index 2d4f495..7817492 100644 >>> --- a/drivers/memory/tegra/mc.h >>> +++ b/drivers/memory/tegra/mc.h >>> @@ -44,6 +44,15 @@ >>> #define MC_TIMING_CONTROL_DBG 0xf8 >>> #define MC_TIMING_CONTROL 0xfc >>> >> >> this empty line is unnecessary >> > I'll fix this in next version. > >>> +#define MC_ERR_VPR_STATUS 0x654 >>> +#define MC_ERR_VPR_ADR 0x658 >>> +#define MC_ERR_SEC_STATUS 0x67c >>> +#define MC_ERR_SEC_ADR 0x680 >>> +#define MC_ERR_MTS_STATUS 0x9b0 >>> +#define MC_ERR_MTS_ADR 0x9b4 >>> +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS 0xc00 >>> +#define MC_ERR_GENERALIZED_CARVEOUT_ADR 0xc04 >>> + >>> #define MC_INT_DECERR_ROUTE_SANITY BIT(20) >>> #define MC_INT_WCAM_ERR BIT(19) >>> #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) >>> @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops; >>> extern const char * const tegra_mc_status_names[32]; >>> extern const char * const tegra_mc_error_names[8]; >>> >>> +struct tegra_mc_error { >>> + u32 int_bit; >>> + const char *msg; >>> + u32 status_reg; >>> + u32 addr_reg; >>> + u32 addr_reg_hi; >>> +}; >>> + >>> /* >>> * These IDs are for internal use of Tegra ICC drivers. The ID >>> numbers are >>> * chosen such that they don't conflict with the device-tree ICC >>> node IDs. >>> diff --git a/drivers/memory/tegra/tegra186.c >>> b/drivers/memory/tegra/tegra186.c >>> index 6766cc4..4f3ae71 100644 >>> --- a/drivers/memory/tegra/tegra186.c >>> +++ b/drivers/memory/tegra/tegra186.c >>> @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct >>> tegra_mc *mc) >>> mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS); >>> } >>> >>> +static const struct tegra_mc_error int_mc_errors[] = { >>> + { >>> + .int_bit = MC_INT_DECERR_EMEM, >>> + .msg = "EMEM address decode error", >>> + .status_reg = MC_ERR_STATUS, >>> + .addr_reg = MC_ERR_ADR, >>> + }, >>> + { >>> + .int_bit = MC_INT_SECURITY_VIOLATION, >>> + .msg = "non secure access to secure region", >>> + .status_reg = MC_ERR_STATUS, >>> + .addr_reg = MC_ERR_ADR, >>> + }, >>> + { >>> + .int_bit = MC_INT_DECERR_VPR, >>> + .msg = "MC request violates VPR requirements", >>> + .status_reg = MC_ERR_VPR_STATUS, >>> + .addr_reg = MC_ERR_VPR_ADR, >>> + }, >> >> I see that these VPR registers present on all SoCs starting with T124. >> It doesn't look like you need the separate IRQ handlers at all, instead >> please extend the common T30 handler. For example, you may add a >> switch-case statements to handle those T124+ specific bits differently. >> >> static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) >> { >> ... >> switch (bit) { >> case MC_INT_DECERR_VPR: >> status_reg = MC_ERR_VPR_STATUS; >> addr_reg = MC_ERR_VPR_ADR; >> break; >> ... >> default: >> status_reg = MC_ERR_STATUS; >> addr_reg = MC_ERR_ADR; >> } >> >> value = mc_readl(mc, status_reg); >> ... >> >> value = mc_readl(mc, addr_reg); > Okay. I'll use same handler as Tegra30 with additional Tegra186 onward > bits. > Also, shall I change name of tegra30_mc_handle_irq() to > tegra_mc_handle_irq() as we are using it across all Tegra SOCs ? T20 won't use it, no need to change the name.
19.01.2022 12:09, Ashish Mhetre пишет: > > > On 1/12/2022 4:54 PM, Dmitry Osipenko wrote: >> External email: Use caution opening links or attachments >> >> >> 12.01.2022 14:22, Dmitry Osipenko пишет: >>> 11.01.2022 21:45, Ashish Mhetre пишет: >>>> #define MC_INT_DECERR_ROUTE_SANITY BIT(20) >>>> #define MC_INT_WCAM_ERR BIT(19) >>>> #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) >>> >>> I don't see where these errors are handled in the code. Is documentation >>> that explains these bits publicly available? >>> > MC_INT_DECERR_ROUTE_SANITY is supposed to be part a of Tegra194 > interrupts. I have missed adding it and will update in next version. > MC_INT_WCAM_ERR is a MC channel error and is not applicable in upstream. > I'll remove it. > These bits are defined in same documentation where other errors are > defined. Is this documentation publicly available? I only have access to the public TRM that can be downloaded from the NVIDIA website and there are no register sets there for newer SoCs, AFAICS. If TRM was updated and now contains registers, then I'll try to get the latest version.
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h index 2d4f495..7817492 100644 --- a/drivers/memory/tegra/mc.h +++ b/drivers/memory/tegra/mc.h @@ -44,6 +44,15 @@ #define MC_TIMING_CONTROL_DBG 0xf8 #define MC_TIMING_CONTROL 0xfc +#define MC_ERR_VPR_STATUS 0x654 +#define MC_ERR_VPR_ADR 0x658 +#define MC_ERR_SEC_STATUS 0x67c +#define MC_ERR_SEC_ADR 0x680 +#define MC_ERR_MTS_STATUS 0x9b0 +#define MC_ERR_MTS_ADR 0x9b4 +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS 0xc00 +#define MC_ERR_GENERALIZED_CARVEOUT_ADR 0xc04 + #define MC_INT_DECERR_ROUTE_SANITY BIT(20) #define MC_INT_WCAM_ERR BIT(19) #define MC_INT_SCRUB_ECC_WR_ACK BIT(18) @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops; extern const char * const tegra_mc_status_names[32]; extern const char * const tegra_mc_error_names[8]; +struct tegra_mc_error { + u32 int_bit; + const char *msg; + u32 status_reg; + u32 addr_reg; + u32 addr_reg_hi; +}; + /* * These IDs are for internal use of Tegra ICC drivers. The ID numbers are * chosen such that they don't conflict with the device-tree ICC node IDs. diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c index 6766cc4..4f3ae71 100644 --- a/drivers/memory/tegra/tegra186.c +++ b/drivers/memory/tegra/tegra186.c @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc) mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS); } +static const struct tegra_mc_error int_mc_errors[] = { + { + .int_bit = MC_INT_DECERR_EMEM, + .msg = "EMEM address decode error", + .status_reg = MC_ERR_STATUS, + .addr_reg = MC_ERR_ADR, + }, + { + .int_bit = MC_INT_SECURITY_VIOLATION, + .msg = "non secure access to secure region", + .status_reg = MC_ERR_STATUS, + .addr_reg = MC_ERR_ADR, + }, + { + .int_bit = MC_INT_DECERR_VPR, + .msg = "MC request violates VPR requirements", + .status_reg = MC_ERR_VPR_STATUS, + .addr_reg = MC_ERR_VPR_ADR, + }, + { + .int_bit = MC_INT_SECERR_SEC, + .msg = "MC request violated SEC carveout requirements", + .status_reg = MC_ERR_SEC_STATUS, + .addr_reg = MC_ERR_SEC_ADR, + }, + { + .int_bit = MC_INT_DECERR_MTS, + .msg = "MTS carveout access violation", + .status_reg = MC_ERR_MTS_STATUS, + .addr_reg = MC_ERR_MTS_ADR, + }, + { + .int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT, + .msg = "GSC access violation", + .status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS, + .addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR, + }, +}; + +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data) +{ + struct tegra_mc *mc = data; + unsigned long status; + unsigned int bit; + + status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask; + if (!status) + return IRQ_NONE; + + for_each_set_bit(bit, &status, 32) { + const char *error = int_mc_errors[bit].msg ?: "unknown"; + const char *client = "unknown"; + const char *direction, *secure; + phys_addr_t addr = 0; + unsigned int i; + u8 id; + u32 value; + + value = mc_readl(mc, int_mc_errors[bit].status_reg); + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + if (mc->soc->num_address_bits > 32) { + addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) & + MC_ERR_STATUS_ADR_HI_MASK); + addr <<= 32; + } +#endif + addr |= mc_readl(mc, int_mc_errors[bit].addr_reg); + + if (value & MC_ERR_STATUS_RW) + direction = "write"; + else + direction = "read"; + + if (value & MC_ERR_STATUS_SECURITY) + secure = "secure "; + else + secure = ""; + + id = value & mc->soc->client_id_mask; + + for (i = 0; i < mc->soc->num_clients; i++) { + if (mc->soc->clients[i].id == id) { + client = mc->soc->clients[i].name; + break; + } + } + + dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s\n", + client, secure, direction, &addr, error); + } + + /* clear interrupts */ + mc_writel(mc, status, MC_INTSTATUS); + + return IRQ_HANDLED; +} + const struct tegra_mc_interrupt_ops tegra186_mc_interrupt_ops = { .clear_interrupt = tegra186_mc_clear_interrupt, + .handle_irq = tegra186_mc_handle_irq, }; const struct tegra_mc_ops tegra186_mc_ops = { @@ -886,6 +985,7 @@ const struct tegra_mc_soc tegra186_mc_soc = { .num_clients = ARRAY_SIZE(tegra186_mc_clients), .clients = tegra186_mc_clients, .num_address_bits = 40, + .client_id_mask = 0xff, .intmask = MC_INT_WCAM_ERR | MC_INT_SCRUB_ECC_WR_ACK | MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
Add all mc-errors supported by T186. Implement mc interrupt handling routine for T186. Signed-off-by: Ashish Mhetre <amhetre@nvidia.com> --- drivers/memory/tegra/mc.h | 17 +++++++ drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+)