diff mbox series

[V3] memory: tegra: Add MC error logging on tegra186 onward

Message ID 1642763962-32129-1-git-send-email-amhetre@nvidia.com
State Changes Requested
Headers show
Series [V3] memory: tegra: Add MC error logging on tegra186 onward | expand

Commit Message

Ashish Mhetre Jan. 21, 2022, 11:19 a.m. UTC
Remove static from tegra30_mc_handle_irq and use it as interrupt handler
for MC interrupts on tegra186, tegra194 and tegra234 to log the errors.
Add error specific MC status and address register bits and use them on
tegra186, tegra194 and tegra234.
Add error logging for generalized carveout interrupt on tegra186, tegra194
and tegra234.
Add error logging for route sanity interrupt on tegra194 an tegra234.
Add register for higher bits of error address which is available on
tegra194 and tegra234.
Add a boolean variable 'has_addr_hi_reg' in tegra_mc_soc struture which
will be true if soc has register for higher bits of memory controller
error address. Set it true for tegra194 and tegra234.

Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
---
Changes in v3:
- Removed unnecessary ifdefs
- Grouped newly added MC registers with existing MC registers
- Removed unnecessary initialization of variables
- Updated code to use newly added field 'has_addr_hi_reg' instead of ifdefs

Changes in v2:
- Updated patch subject and commit message
- Removed separate irq handlers
- Updated tegra30_mc_handle_irq to be used for Tegra186 onwards as well

 drivers/memory/tegra/mc.c       | 57 +++++++++++++++++++++++++++++++++--------
 drivers/memory/tegra/mc.h       | 16 +++++++++++-
 drivers/memory/tegra/tegra186.c |  7 +++++
 drivers/memory/tegra/tegra194.c |  6 +++++
 drivers/memory/tegra/tegra234.c |  6 +++++
 include/soc/tegra/mc.h          |  1 +
 6 files changed, 81 insertions(+), 12 deletions(-)

Comments

Dmitry Osipenko Jan. 21, 2022, 12:31 p.m. UTC | #1
...
> @@ -529,12 +536,44 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
>  		u8 id, type;
>  		u32 value;
>  
> -		value = mc_readl(mc, MC_ERR_STATUS);
> +		switch (bit) {

Again, I see that the code wasn't tested :/ Shouldn't be too difficult
to create memory-read errors to check that at least basics work
properly. Please always test your changes next time.

So it must be "switch(BIT(bit))" here, please write it like this:

u32 intmask = BIT(bit);
...
	switch(intmask) {

> +		case MC_INT_DECERR_VPR:
> +			status_reg = MC_ERR_VPR_STATUS;
> +			addr_reg = MC_ERR_VPR_ADR;
> +			break;

Please add newline after every "break;" of every case. This will make
code easier to read a tad.

> +		case MC_INT_SECERR_SEC:
> +			status_reg = MC_ERR_SEC_STATUS;
> +			addr_reg = MC_ERR_SEC_ADR;
> +			break;
> +		case MC_INT_DECERR_MTS:
> +			status_reg = MC_ERR_MTS_STATUS;
> +			addr_reg = MC_ERR_MTS_ADR;
> +			break;
> +		case MC_INT_DECERR_GENERALIZED_CARVEOUT:
> +			status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS;
> +			addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR;
> +			break;
> +		case MC_INT_DECERR_ROUTE_SANITY:
> +			status_reg = MC_ERR_ROUTE_SANITY_STATUS;
> +			addr_reg = MC_ERR_ROUTE_SANITY_ADR;
> +			break;
> +		default:
> +			status_reg = MC_ERR_STATUS;
> +			addr_reg = MC_ERR_ADR;

Add newline here too.

> +			if (mc->soc->has_addr_hi_reg)
> +				addr_hi_reg = MC_ERR_ADR_HI;
> +			break;
> +		}
...
Note that you could use "git format-patch -v4 ..." instead of manually
changing the [PATCH] prefix.
kernel test robot Jan. 21, 2022, 6:40 p.m. UTC | #2
Hi Ashish,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on tegra/for-next]
[also build test WARNING on next-20220121]
[cannot apply to v5.16]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Ashish-Mhetre/memory-tegra-Add-MC-error-logging-on-tegra186-onward/20220121-192115
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux.git for-next
config: arc-randconfig-r043-20220121 (https://download.01.org/0day-ci/archive/20220122/202201220256.pOvahFTK-lkp@intel.com/config)
compiler: arceb-elf-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/c76ed3ccfbb800c6a32b27d87b2d5464ebdf1918
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Ashish-Mhetre/memory-tegra-Add-MC-error-logging-on-tegra186-onward/20220121-192115
        git checkout c76ed3ccfbb800c6a32b27d87b2d5464ebdf1918
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=arc SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/memory/tegra/mc.c: In function 'tegra30_mc_handle_irq':
>> drivers/memory/tegra/mc.c:530:21: warning: variable 'addr_hi_reg' set but not used [-Wunused-but-set-variable]
     530 |                 u32 addr_hi_reg = 0, status_reg, addr_reg;
         |                     ^~~~~~~~~~~


vim +/addr_hi_reg +530 drivers/memory/tegra/mc.c

   516	
   517	irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
   518	{
   519		struct tegra_mc *mc = data;
   520		unsigned long status;
   521		unsigned int bit;
   522	
   523		/* mask all interrupts to avoid flooding */
   524		status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
   525		if (!status)
   526			return IRQ_NONE;
   527	
   528		for_each_set_bit(bit, &status, 32) {
   529			const char *error = tegra_mc_status_names[bit] ?: "unknown";
 > 530			u32 addr_hi_reg = 0, status_reg, addr_reg;
   531			const char *client = "unknown", *desc;
   532			const char *direction, *secure;
   533			phys_addr_t addr = 0;
   534			unsigned int i;
   535			char perm[7];
   536			u8 id, type;
   537			u32 value;
   538	
   539			switch (bit) {
   540			case MC_INT_DECERR_VPR:
   541				status_reg = MC_ERR_VPR_STATUS;
   542				addr_reg = MC_ERR_VPR_ADR;
   543				break;
   544			case MC_INT_SECERR_SEC:
   545				status_reg = MC_ERR_SEC_STATUS;
   546				addr_reg = MC_ERR_SEC_ADR;
   547				break;
   548			case MC_INT_DECERR_MTS:
   549				status_reg = MC_ERR_MTS_STATUS;
   550				addr_reg = MC_ERR_MTS_ADR;
   551				break;
   552			case MC_INT_DECERR_GENERALIZED_CARVEOUT:
   553				status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS;
   554				addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR;
   555				break;
   556			case MC_INT_DECERR_ROUTE_SANITY:
   557				status_reg = MC_ERR_ROUTE_SANITY_STATUS;
   558				addr_reg = MC_ERR_ROUTE_SANITY_ADR;
   559				break;
   560			default:
   561				status_reg = MC_ERR_STATUS;
   562				addr_reg = MC_ERR_ADR;
   563				if (mc->soc->has_addr_hi_reg)
   564					addr_hi_reg = MC_ERR_ADR_HI;
   565				break;
   566			}
   567	
   568			value = mc_readl(mc, status_reg);
   569	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Krzysztof Kozlowski Jan. 21, 2022, 6:49 p.m. UTC | #3
On 21/01/2022 13:31, Dmitry Osipenko wrote:
> ...
>> @@ -529,12 +536,44 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
>>  		u8 id, type;
>>  		u32 value;
>>  
>> -		value = mc_readl(mc, MC_ERR_STATUS);
>> +		switch (bit) {
> 
> Again, I see that the code wasn't tested :/ Shouldn't be too difficult
> to create memory-read errors to check that at least basics work
> properly. Please always test your changes next time.
> 
> So it must be "switch(BIT(bit))" here, please write it like this:
> 
> u32 intmask = BIT(bit);
> ...
> 	switch(intmask) {
> 

Also, please build your changes with W=1... It's the second try of
sending un-tested and not-working code. This time also with a compiler
warning. This looks very bad :(

For big companies with a lot of engineers, like nVidia, it is useful if
some internal review happens. It is a nice way to offload community
reviewers which are - like maintainers - a scarce resource. Doing
internal review is not a requirement, but helps to find such mistakes
earlier, before using the community. It is simply nice to us.

Best regards,
Krzysztof
Krzysztof Kozlowski Jan. 21, 2022, 6:51 p.m. UTC | #4
On 21/01/2022 19:49, Krzysztof Kozlowski wrote:
> On 21/01/2022 13:31, Dmitry Osipenko wrote:
>> ...
>>> @@ -529,12 +536,44 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
>>>  		u8 id, type;
>>>  		u32 value;
>>>  
>>> -		value = mc_readl(mc, MC_ERR_STATUS);
>>> +		switch (bit) {
>>
>> Again, I see that the code wasn't tested :/ Shouldn't be too difficult
>> to create memory-read errors to check that at least basics work
>> properly. Please always test your changes next time.
>>
>> So it must be "switch(BIT(bit))" here, please write it like this:
>>
>> u32 intmask = BIT(bit);
>> ...
>> 	switch(intmask) {
>>
> 
> Also, please build your changes with W=1... It's the second try of
> sending un-tested and not-working code. This time also with a compiler
> warning. This looks very bad :(

I am afraid this might be taken too literally and W=1 build will replace
other required steps, so let me be explicit:
We not only expect to compile it but also compile with W=1, run sparse,
smatch and coccicheck. Then also test.

> 
> For big companies with a lot of engineers, like nVidia, it is useful if
> some internal review happens. It is a nice way to offload community
> reviewers which are - like maintainers - a scarce resource. Doing
> internal review is not a requirement, but helps to find such mistakes
> earlier, before using the community. It is simply nice to us.
> 

Best regards,
Krzysztof
diff mbox series

Patch

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index bf3abb6..5ebe675 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -508,7 +508,13 @@  int tegra30_mc_probe(struct tegra_mc *mc)
 	return 0;
 }
 
-static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
+const struct tegra_mc_ops tegra30_mc_ops = {
+	.probe = tegra30_mc_probe,
+	.handle_irq = tegra30_mc_handle_irq,
+};
+#endif
+
+irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
 {
 	struct tegra_mc *mc = data;
 	unsigned long status;
@@ -521,6 +527,7 @@  static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
 
 	for_each_set_bit(bit, &status, 32) {
 		const char *error = tegra_mc_status_names[bit] ?: "unknown";
+		u32 addr_hi_reg = 0, status_reg, addr_reg;
 		const char *client = "unknown", *desc;
 		const char *direction, *secure;
 		phys_addr_t addr = 0;
@@ -529,12 +536,44 @@  static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
 		u8 id, type;
 		u32 value;
 
-		value = mc_readl(mc, MC_ERR_STATUS);
+		switch (bit) {
+		case MC_INT_DECERR_VPR:
+			status_reg = MC_ERR_VPR_STATUS;
+			addr_reg = MC_ERR_VPR_ADR;
+			break;
+		case MC_INT_SECERR_SEC:
+			status_reg = MC_ERR_SEC_STATUS;
+			addr_reg = MC_ERR_SEC_ADR;
+			break;
+		case MC_INT_DECERR_MTS:
+			status_reg = MC_ERR_MTS_STATUS;
+			addr_reg = MC_ERR_MTS_ADR;
+			break;
+		case MC_INT_DECERR_GENERALIZED_CARVEOUT:
+			status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS;
+			addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR;
+			break;
+		case MC_INT_DECERR_ROUTE_SANITY:
+			status_reg = MC_ERR_ROUTE_SANITY_STATUS;
+			addr_reg = MC_ERR_ROUTE_SANITY_ADR;
+			break;
+		default:
+			status_reg = MC_ERR_STATUS;
+			addr_reg = MC_ERR_ADR;
+			if (mc->soc->has_addr_hi_reg)
+				addr_hi_reg = MC_ERR_ADR_HI;
+			break;
+		}
+
+		value = mc_readl(mc, status_reg);
 
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 		if (mc->soc->num_address_bits > 32) {
-			addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) &
-				MC_ERR_STATUS_ADR_HI_MASK);
+			if (addr_hi_reg)
+				addr = mc_readl(mc, addr_hi_reg);
+			else
+				addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) &
+					MC_ERR_STATUS_ADR_HI_MASK);
 			addr <<= 32;
 		}
 #endif
@@ -591,7 +630,7 @@  static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
 			break;
 		}
 
-		value = mc_readl(mc, MC_ERR_ADR);
+		value = mc_readl(mc, addr_reg);
 		addr |= value;
 
 		dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s (%s%s)\n",
@@ -605,12 +644,6 @@  static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-const struct tegra_mc_ops tegra30_mc_ops = {
-	.probe = tegra30_mc_probe,
-	.handle_irq = tegra30_mc_handle_irq,
-};
-#endif
-
 const char *const tegra_mc_status_names[32] = {
 	[ 1] = "External interrupt",
 	[ 6] = "EMEM address decode error",
@@ -622,6 +655,8 @@  const char *const tegra_mc_status_names[32] = {
 	[12] = "VPR violation",
 	[13] = "Secure carveout violation",
 	[16] = "MTS carveout violation",
+	[17] = "Generalized carveout violation",
+	[20] = "Route Sanity error",
 };
 
 const char *const tegra_mc_error_names[8] = {
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index 062886e..47d2163 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -43,7 +43,20 @@ 
 #define MC_EMEM_ARB_OVERRIDE				0xe8
 #define MC_TIMING_CONTROL_DBG				0xf8
 #define MC_TIMING_CONTROL				0xfc
-
+#define MC_ERR_VPR_STATUS				0x654
+#define MC_ERR_VPR_ADR					0x658
+#define MC_ERR_SEC_STATUS				0x67c
+#define MC_ERR_SEC_ADR					0x680
+#define MC_ERR_MTS_STATUS				0x9b0
+#define MC_ERR_MTS_ADR					0x9b4
+#define MC_ERR_ROUTE_SANITY_STATUS			0x9c0
+#define MC_ERR_ROUTE_SANITY_ADR				0x9c4
+#define MC_ERR_GENERALIZED_CARVEOUT_STATUS		0xc00
+#define MC_ERR_GENERALIZED_CARVEOUT_ADR			0xc04
+#define MC_ERR_ADR_HI					0x11fc
+
+#define MC_INT_DECERR_ROUTE_SANITY			BIT(20)
+#define MC_INT_DECERR_GENERALIZED_CARVEOUT		BIT(17)
 #define MC_INT_DECERR_MTS				BIT(16)
 #define MC_INT_SECERR_SEC				BIT(13)
 #define MC_INT_DECERR_VPR				BIT(12)
@@ -156,6 +169,7 @@  extern const struct tegra_mc_ops tegra30_mc_ops;
 extern const struct tegra_mc_ops tegra186_mc_ops;
 #endif
 
+irqreturn_t tegra30_mc_handle_irq(int irq, void *data);
 extern const char * const tegra_mc_status_names[32];
 extern const char * const tegra_mc_error_names[8];
 
diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
index 3d15388..a619e6c 100644
--- a/drivers/memory/tegra/tegra186.c
+++ b/drivers/memory/tegra/tegra186.c
@@ -16,6 +16,8 @@ 
 #include <dt-bindings/memory/tegra186-mc.h>
 #endif
 
+#include "mc.h"
+
 #define MC_SID_STREAMID_OVERRIDE_MASK GENMASK(7, 0)
 #define MC_SID_STREAMID_SECURITY_WRITE_ACCESS_DISABLED BIT(16)
 #define MC_SID_STREAMID_SECURITY_OVERRIDE BIT(8)
@@ -144,6 +146,7 @@  const struct tegra_mc_ops tegra186_mc_ops = {
 	.remove = tegra186_mc_remove,
 	.resume = tegra186_mc_resume,
 	.probe_device = tegra186_mc_probe_device,
+	.handle_irq = tegra30_mc_handle_irq,
 };
 
 #if defined(CONFIG_ARCH_TEGRA_186_SOC)
@@ -875,6 +878,10 @@  const struct tegra_mc_soc tegra186_mc_soc = {
 	.num_clients = ARRAY_SIZE(tegra186_mc_clients),
 	.clients = tegra186_mc_clients,
 	.num_address_bits = 40,
+	.client_id_mask = 0xff,
+	.intmask = MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
+		   MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 	.ops = &tegra186_mc_ops,
 };
 #endif
diff --git a/drivers/memory/tegra/tegra194.c b/drivers/memory/tegra/tegra194.c
index cab998b..2765830 100644
--- a/drivers/memory/tegra/tegra194.c
+++ b/drivers/memory/tegra/tegra194.c
@@ -1347,5 +1347,11 @@  const struct tegra_mc_soc tegra194_mc_soc = {
 	.num_clients = ARRAY_SIZE(tegra194_mc_clients),
 	.clients = tegra194_mc_clients,
 	.num_address_bits = 40,
+	.client_id_mask = 0xff,
+	.intmask = MC_INT_DECERR_ROUTE_SANITY |
+		   MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
+		   MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
+	.has_addr_hi_reg = true,
 	.ops = &tegra186_mc_ops,
 };
diff --git a/drivers/memory/tegra/tegra234.c b/drivers/memory/tegra/tegra234.c
index 45efc51..2497b82 100644
--- a/drivers/memory/tegra/tegra234.c
+++ b/drivers/memory/tegra/tegra234.c
@@ -77,5 +77,11 @@  const struct tegra_mc_soc tegra234_mc_soc = {
 	.num_clients = ARRAY_SIZE(tegra234_mc_clients),
 	.clients = tegra234_mc_clients,
 	.num_address_bits = 40,
+	.client_id_mask = 0xff,
+	.intmask = MC_INT_DECERR_ROUTE_SANITY |
+		   MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
+		   MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
+		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
+	.has_addr_hi_reg = true,
 	.ops = &tegra186_mc_ops,
 };
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 1066b11..8ee0ae4 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -198,6 +198,7 @@  struct tegra_mc_soc {
 	const struct tegra_smmu_soc *smmu;
 
 	u32 intmask;
+	bool has_addr_hi_reg;
 
 	const struct tegra_mc_reset_ops *reset_ops;
 	const struct tegra_mc_reset *resets;