diff mbox series

[09/13] PCI: aardvark: Implement re-issuing config requests on CRS response

Message ID 20211001195856.10081-10-kabel@kernel.org
State New
Headers show
Series PCI: aardvark controller fixes | expand

Commit Message

Marek Behún Oct. 1, 2021, 7:58 p.m. UTC
From: Pali Rohár <pali@kernel.org>

Commit 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value") fixed
handling of CRS response and when CRSSVE flag was not enabled it marked CRS
response as failed transaction (due to simplicity).

But pci-aardvark.c driver is already waiting up to the PIO_RETRY_CNT count
for PIO config response and so we can with a small change implement
re-issuing of config requests as described in PCIe base specification.

This change implements re-issuing of config requests when response is CRS.
Set upper bound of wait cycles to around PIO_RETRY_CNT, afterwards the
transaction is marked as failed and an all-ones value is returned as
before.

We do this by returning appropriate error codes from function
advk_pcie_check_pio_status(). On CRS we return -EAGAIN and caller then
reissues transaction.

Fixes: 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value")
Signed-off-by: Pali Rohár <pali@kernel.org>
Reviewed-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Marek Behún <kabel@kernel.org>
---
 drivers/pci/controller/pci-aardvark.c | 67 +++++++++++++++++----------
 1 file changed, 43 insertions(+), 24 deletions(-)

Comments

Bjorn Helgaas Oct. 2, 2021, 4:35 p.m. UTC | #1
On Fri, Oct 01, 2021 at 09:58:52PM +0200, Marek Behún wrote:
> From: Pali Rohár <pali@kernel.org>
> 
> Commit 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value") fixed
> handling of CRS response and when CRSSVE flag was not enabled it marked CRS
> response as failed transaction (due to simplicity).
> 
> But pci-aardvark.c driver is already waiting up to the PIO_RETRY_CNT count
> for PIO config response and so we can with a small change implement
> re-issuing of config requests as described in PCIe base specification.
> 
> This change implements re-issuing of config requests when response is CRS.
> Set upper bound of wait cycles to around PIO_RETRY_CNT, afterwards the
> transaction is marked as failed and an all-ones value is returned as
> before.

Does this fix a problem?

> We do this by returning appropriate error codes from function
> advk_pcie_check_pio_status(). On CRS we return -EAGAIN and caller then
> reissues transaction.
> 
> Fixes: 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value")
> Signed-off-by: Pali Rohár <pali@kernel.org>
> Reviewed-by: Marek Behún <kabel@kernel.org>
> Signed-off-by: Marek Behún <kabel@kernel.org>
> ---
>  drivers/pci/controller/pci-aardvark.c | 67 +++++++++++++++++----------
>  1 file changed, 43 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
> index 51d2955d9cca..7b9870d0b81f 100644
> --- a/drivers/pci/controller/pci-aardvark.c
> +++ b/drivers/pci/controller/pci-aardvark.c
> @@ -603,6 +603,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
>  	u32 reg;
>  	unsigned int status;
>  	char *strcomp_status, *str_posted;
> +	int ret;
>  
>  	reg = advk_readl(pcie, PIO_STAT);
>  	status = (reg & PIO_COMPLETION_STATUS_MASK) >>
> @@ -627,6 +628,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
>  	case PIO_COMPLETION_STATUS_OK:
>  		if (reg & PIO_ERR_STATUS) {
>  			strcomp_status = "COMP_ERR";
> +			ret = -EFAULT;
>  			break;
>  		}
>  		/* Get the read result */
> @@ -634,9 +636,11 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
>  			*val = advk_readl(pcie, PIO_RD_DATA);
>  		/* No error */
>  		strcomp_status = NULL;
> +		ret = 0;
>  		break;
>  	case PIO_COMPLETION_STATUS_UR:
>  		strcomp_status = "UR";
> +		ret = -EOPNOTSUPP;
>  		break;
>  	case PIO_COMPLETION_STATUS_CRS:
>  		if (allow_crs && val) {
> @@ -654,6 +658,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
>  			 */
>  			*val = CFG_RD_CRS_VAL;
>  			strcomp_status = NULL;
> +			ret = 0;
>  			break;
>  		}
>  		/* PCIe r4.0, sec 2.3.2, says:
> @@ -669,21 +674,24 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
>  		 * Request and taking appropriate action, e.g., complete the
>  		 * Request to the host as a failed transaction.
>  		 *
> -		 * To simplify implementation do not re-issue the Configuration
> -		 * Request and complete the Request as a failed transaction.
> +		 * So return -EAGAIN and caller (pci-aardvark.c driver) will
> +		 * re-issue request again up to the PIO_RETRY_CNT retries.
>  		 */
>  		strcomp_status = "CRS";
> +		ret = -EAGAIN;
>  		break;
>  	case PIO_COMPLETION_STATUS_CA:
>  		strcomp_status = "CA";
> +		ret = -ECANCELED;
>  		break;
>  	default:
>  		strcomp_status = "Unknown";
> +		ret = -EINVAL;
>  		break;
>  	}
>  
>  	if (!strcomp_status)
> -		return 0;
> +		return ret;
>  
>  	if (reg & PIO_NON_POSTED_REQ)
>  		str_posted = "Non-posted";
> @@ -693,7 +701,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
>  	dev_dbg(dev, "%s PIO Response Status: %s, %#x @ %#x\n",
>  		str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS));
>  
> -	return -EFAULT;
> +	return ret;
>  }
>  
>  static int advk_pcie_wait_pio(struct advk_pcie *pcie)
> @@ -701,13 +709,13 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie)
>  	struct device *dev = &pcie->pdev->dev;
>  	int i;
>  
> -	for (i = 0; i < PIO_RETRY_CNT; i++) {
> +	for (i = 1; i <= PIO_RETRY_CNT; i++) {
>  		u32 start, isr;
>  
>  		start = advk_readl(pcie, PIO_START);
>  		isr = advk_readl(pcie, PIO_ISR);
>  		if (!start && isr)
> -			return 0;
> +			return i;
>  		udelay(PIO_RETRY_DELAY);
>  	}
>  
> @@ -898,6 +906,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
>  			     int where, int size, u32 *val)
>  {
>  	struct advk_pcie *pcie = bus->sysdata;
> +	int retry_count;
>  	bool allow_crs;
>  	u32 reg;
>  	int ret;
> @@ -940,16 +949,22 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
>  	/* Program the data strobe */
>  	advk_writel(pcie, 0xf, PIO_WR_DATA_STRB);
>  
> -	/* Clear PIO DONE ISR and start the transfer */
> -	advk_writel(pcie, 1, PIO_ISR);
> -	advk_writel(pcie, 1, PIO_START);
> +	retry_count = 0;
> +	do {
> +		/* Clear PIO DONE ISR and start the transfer */
> +		advk_writel(pcie, 1, PIO_ISR);
> +		advk_writel(pcie, 1, PIO_START);
>  
> -	ret = advk_pcie_wait_pio(pcie);
> -	if (ret < 0)
> -		goto try_crs;
> +		ret = advk_pcie_wait_pio(pcie);
> +		if (ret < 0)
> +			goto try_crs;
> +
> +		retry_count += ret;
> +
> +		/* Check PIO status and get the read result */
> +		ret = advk_pcie_check_pio_status(pcie, allow_crs, val);
> +	} while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT);
>  
> -	/* Check PIO status and get the read result */
> -	ret = advk_pcie_check_pio_status(pcie, allow_crs, val);
>  	if (ret < 0)
>  		goto fail;
>  
> @@ -981,6 +996,7 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
>  	struct advk_pcie *pcie = bus->sysdata;
>  	u32 reg;
>  	u32 data_strobe = 0x0;
> +	int retry_count;
>  	int offset;
>  	int ret;
>  
> @@ -1022,19 +1038,22 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
>  	/* Program the data strobe */
>  	advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB);
>  
> -	/* Clear PIO DONE ISR and start the transfer */
> -	advk_writel(pcie, 1, PIO_ISR);
> -	advk_writel(pcie, 1, PIO_START);
> +	retry_count = 0;
> +	do {
> +		/* Clear PIO DONE ISR and start the transfer */
> +		advk_writel(pcie, 1, PIO_ISR);
> +		advk_writel(pcie, 1, PIO_START);
>  
> -	ret = advk_pcie_wait_pio(pcie);
> -	if (ret < 0)
> -		return PCIBIOS_SET_FAILED;
> +		ret = advk_pcie_wait_pio(pcie);
> +		if (ret < 0)
> +			return PCIBIOS_SET_FAILED;
>  
> -	ret = advk_pcie_check_pio_status(pcie, false, NULL);
> -	if (ret < 0)
> -		return PCIBIOS_SET_FAILED;
> +		retry_count += ret;
>  
> -	return PCIBIOS_SUCCESSFUL;
> +		ret = advk_pcie_check_pio_status(pcie, false, NULL);
> +	} while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT);
> +
> +	return ret < 0 ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
>  }
>  
>  static struct pci_ops advk_pcie_ops = {
> -- 
> 2.32.0
>
Marek Behún Oct. 4, 2021, 7:21 a.m. UTC | #2
On Sat, 2 Oct 2021 11:35:19 -0500
Bjorn Helgaas <helgaas@kernel.org> wrote:

> On Fri, Oct 01, 2021 at 09:58:52PM +0200, Marek Behún wrote:
> > From: Pali Rohár <pali@kernel.org>
> > 
> > Commit 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value") fixed
> > handling of CRS response and when CRSSVE flag was not enabled it marked CRS
> > response as failed transaction (due to simplicity).
> > 
> > But pci-aardvark.c driver is already waiting up to the PIO_RETRY_CNT count
> > for PIO config response and so we can with a small change implement
> > re-issuing of config requests as described in PCIe base specification.
> > 
> > This change implements re-issuing of config requests when response is CRS.
> > Set upper bound of wait cycles to around PIO_RETRY_CNT, afterwards the
> > transaction is marked as failed and an all-ones value is returned as
> > before.  
> 
> Does this fix a problem?

Hello Bjorn,

Pali has suspisions that certain Marvell WiFi cards may need this [1]
to work, but we do not have them so we cannot test this.

I guess you think this should be considered a new feature instead of a
fix, so that the Fixes tag should be removed, yes? Pali was of the
opinion that this patch "fixes" the driver to conform more to the PCIe
specification, that is why we added the Fixes tag.

Anyway if you think this should be considered a new feature, this patch
can be skipped. The following patches apply even without it.

Marek

[1]
https://lore.kernel.org/linux-wireless/CAHp75Vd5iCLELx8s+Zvcj8ufd2bN6CK26soDMkZyC1CwMO2Qeg@mail.gmail.com/
Lorenzo Pieralisi Oct. 4, 2021, 8:53 a.m. UTC | #3
On Mon, Oct 04, 2021 at 09:21:48AM +0200, Marek Behún wrote:
> On Sat, 2 Oct 2021 11:35:19 -0500
> Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> > On Fri, Oct 01, 2021 at 09:58:52PM +0200, Marek Behún wrote:
> > > From: Pali Rohár <pali@kernel.org>
> > > 
> > > Commit 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value") fixed
> > > handling of CRS response and when CRSSVE flag was not enabled it marked CRS
> > > response as failed transaction (due to simplicity).
> > > 
> > > But pci-aardvark.c driver is already waiting up to the PIO_RETRY_CNT count
> > > for PIO config response and so we can with a small change implement
> > > re-issuing of config requests as described in PCIe base specification.
> > > 
> > > This change implements re-issuing of config requests when response is CRS.
> > > Set upper bound of wait cycles to around PIO_RETRY_CNT, afterwards the
> > > transaction is marked as failed and an all-ones value is returned as
> > > before.  
> > 
> > Does this fix a problem?
> 
> Hello Bjorn,
> 
> Pali has suspisions that certain Marvell WiFi cards may need this [1]
> to work, but we do not have them so we cannot test this.
> 
> I guess you think this should be considered a new feature instead of a
> fix, so that the Fixes tag should be removed, yes? Pali was of the
> opinion that this patch "fixes" the driver to conform more to the PCIe
> specification, that is why we added the Fixes tag.
> 
> Anyway if you think this should be considered a new feature, this patch
> can be skipped. The following patches apply even without it.

I do not think we should apply to the mainline a fix that can't be
tested sorry, I will skip this patch.

Thanks,
Lorenzo

> Marek
> 
> [1]
> https://lore.kernel.org/linux-wireless/CAHp75Vd5iCLELx8s+Zvcj8ufd2bN6CK26soDMkZyC1CwMO2Qeg@mail.gmail.com/
Marek Behún Oct. 4, 2021, 10:19 a.m. UTC | #4
On Mon, 4 Oct 2021 09:53:35 +0100
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:

> On Mon, Oct 04, 2021 at 09:21:48AM +0200, Marek Behún wrote:
> > On Sat, 2 Oct 2021 11:35:19 -0500
> > Bjorn Helgaas <helgaas@kernel.org> wrote:
> >   
> > > On Fri, Oct 01, 2021 at 09:58:52PM +0200, Marek Behún wrote:  
> > > > From: Pali Rohár <pali@kernel.org>
> > > > 
> > > > Commit 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value") fixed
> > > > handling of CRS response and when CRSSVE flag was not enabled it marked CRS
> > > > response as failed transaction (due to simplicity).
> > > > 
> > > > But pci-aardvark.c driver is already waiting up to the PIO_RETRY_CNT count
> > > > for PIO config response and so we can with a small change implement
> > > > re-issuing of config requests as described in PCIe base specification.
> > > > 
> > > > This change implements re-issuing of config requests when response is CRS.
> > > > Set upper bound of wait cycles to around PIO_RETRY_CNT, afterwards the
> > > > transaction is marked as failed and an all-ones value is returned as
> > > > before.    
> > > 
> > > Does this fix a problem?  
> > 
> > Hello Bjorn,
> > 
> > Pali has suspisions that certain Marvell WiFi cards may need this [1]
> > to work, but we do not have them so we cannot test this.
> > 
> > I guess you think this should be considered a new feature instead of a
> > fix, so that the Fixes tag should be removed, yes? Pali was of the
> > opinion that this patch "fixes" the driver to conform more to the PCIe
> > specification, that is why we added the Fixes tag.
> > 
> > Anyway if you think this should be considered a new feature, this patch
> > can be skipped. The following patches apply even without it.  
> 
> I do not think we should apply to the mainline a fix that can't be
> tested sorry, I will skip this patch.

Lorenzo,

my explanation was incorrect.

The functionality added by this patch _is_ tested and correctly does a
retry: it was done by simulating a CRS reply.

We just don't know whether there are real cards used by users that
need this functionality (the mentioned Marvell card may be such a card).

Marek
Bjorn Helgaas Oct. 5, 2021, 7:28 p.m. UTC | #5
On Mon, Oct 04, 2021 at 12:19:38PM +0200, Marek Behún wrote:
> On Mon, 4 Oct 2021 09:53:35 +0100
> Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:
> > On Mon, Oct 04, 2021 at 09:21:48AM +0200, Marek Behún wrote:
> > > On Sat, 2 Oct 2021 11:35:19 -0500
> > > Bjorn Helgaas <helgaas@kernel.org> wrote:
> > >   
> > > > On Fri, Oct 01, 2021 at 09:58:52PM +0200, Marek Behún wrote:  
> > > > > From: Pali Rohár <pali@kernel.org>
> > > > > 
> > > > > Commit 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value") fixed
> > > > > handling of CRS response and when CRSSVE flag was not enabled it marked CRS
> > > > > response as failed transaction (due to simplicity).
> > > > > 
> > > > > But pci-aardvark.c driver is already waiting up to the PIO_RETRY_CNT count
> > > > > for PIO config response and so we can with a small change implement
> > > > > re-issuing of config requests as described in PCIe base specification.
> > > > > 
> > > > > This change implements re-issuing of config requests when response is CRS.
> > > > > Set upper bound of wait cycles to around PIO_RETRY_CNT, afterwards the
> > > > > transaction is marked as failed and an all-ones value is returned as
> > > > > before.    
> > > > 
> > > > Does this fix a problem?  
> > > 
> > > Hello Bjorn,
> > > 
> > > Pali has suspisions that certain Marvell WiFi cards may need this [1]
> > > to work, but we do not have them so we cannot test this.
> > > 
> > > I guess you think this should be considered a new feature instead of a
> > > fix, so that the Fixes tag should be removed, yes? Pali was of the
> > > opinion that this patch "fixes" the driver to conform more to the PCIe
> > > specification, that is why we added the Fixes tag.
> > > 
> > > Anyway if you think this should be considered a new feature, this patch
> > > can be skipped. The following patches apply even without it.  
> > 
> > I do not think we should apply to the mainline a fix that can't be
> > tested sorry, I will skip this patch.
> 
> Lorenzo,
> 
> my explanation was incorrect.
> 
> The functionality added by this patch _is_ tested and correctly does a
> retry: it was done by simulating a CRS reply.
> 
> We just don't know whether there are real cards used by users that
> need this functionality (the mentioned Marvell card may be such a card).

My claim is that the spec allows root complexes that retry zero times,
so we must assume such a root complex exists and we cannot rely on any
retries.  If such a root complex exists, this patch might fix a
problem, but only for aardvark.  It would be better to fix the problem
in a way that works for all PCIe controllers.

I'm playing devil's advocate here, and it's quite possible that I'm
interpreting the spec incorrectly.  Maybe the Marvell card is a way to
test this in the real world.

Bjorn
Marek Behún Oct. 5, 2021, 10:45 p.m. UTC | #6
On Tue, 5 Oct 2021 14:28:26 -0500
Bjorn Helgaas <helgaas@kernel.org> wrote:

> My claim is that the spec allows root complexes that retry zero times,
> so we must assume such a root complex exists and we cannot rely on any
> retries.  If such a root complex exists, this patch might fix a
> problem, but only for aardvark.  It would be better to fix the problem
> in a way that works for all PCIe controllers.
> 
> I'm playing devil's advocate here, and it's quite possible that I'm
> interpreting the spec incorrectly.  Maybe the Marvell card is a way to
> test this in the real world.
> 
> Bjorn

Hello Bjorn,

this is what I understand from Pali's explanation, please correct me if 
something is wrong

- If HW supports CRSSVE bit, OS can ask HW to switch from HW-retry to 
SW-retry mode by setting this CRSSVE bit.

- If HW does not support CRSSVE bit, it means that HW supports only 
HW-retry.

- By default CRSSVE is disabled, and it is optional, so HW is required 
to support HW-retry.

- Linux' PCI core supports handling CRSSVE in probe.c: when HW says it 
supports it, PCI core enables it and retries on 0xffff0001 in function 
pci_bus_wait_crs().

- Aardvark controller violates specification: it does not support 
HW-retry even if it is mandatory. Pali is solving this in his patch by 
doing the retry in the driver when CRSSVE is disabled. He is able to do 
this because he gets the information about CRS from another channel 
(another register).

- You are talking about wanting to implement an abstraction for what 
Pali's patch does in PCI core, so that if CRSSVE is not set and someone 
reads PCI_VENDOR_ID, you want to make PCI core doing this retry.

Am I correct here?

This could be done by changing Pali's patch so that instead of 
retrying, the pci_ops->read() method would instead return a value 
indicating that a retry should be done (this would be a new value, 
PCIBIOS_CRS), and then in access.c in the pci_bus_read_config_dword() 
(and pci_user_read_config_dword()), if the pci_ops->read() method 
returns this PCIBIOS_CRS value, the function will retry reading the 
register.

Is this what you mean?

It would make sense to do this, if there are other controllers where 
HW-retry does not work and instead informs about it via side-channel 
even when CRSSVE is disabled.

Marek

PS: Btw, looking at the code, why do we use these PCIBIOS_* macros? And 
then sometimes convert them to error codes with pcibios_err_to_errno()? 
Is this some legacy thing? Should this be converted to errno?
Lorenzo Pieralisi Oct. 6, 2021, 4:29 p.m. UTC | #7
On Tue, Oct 05, 2021 at 02:28:26PM -0500, Bjorn Helgaas wrote:
> On Mon, Oct 04, 2021 at 12:19:38PM +0200, Marek Beh�n wrote:
> > On Mon, 4 Oct 2021 09:53:35 +0100
> > Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:
> > > On Mon, Oct 04, 2021 at 09:21:48AM +0200, Marek Beh�n wrote:
> > > > On Sat, 2 Oct 2021 11:35:19 -0500
> > > > Bjorn Helgaas <helgaas@kernel.org> wrote:
> > > >   
> > > > > On Fri, Oct 01, 2021 at 09:58:52PM +0200, Marek Beh�n wrote:  
> > > > > > From: Pali Roh�r <pali@kernel.org>
> > > > > > 
> > > > > > Commit 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value") fixed
> > > > > > handling of CRS response and when CRSSVE flag was not enabled it marked CRS
> > > > > > response as failed transaction (due to simplicity).
> > > > > > 
> > > > > > But pci-aardvark.c driver is already waiting up to the PIO_RETRY_CNT count
> > > > > > for PIO config response and so we can with a small change implement
> > > > > > re-issuing of config requests as described in PCIe base specification.
> > > > > > 
> > > > > > This change implements re-issuing of config requests when response is CRS.
> > > > > > Set upper bound of wait cycles to around PIO_RETRY_CNT, afterwards the
> > > > > > transaction is marked as failed and an all-ones value is returned as
> > > > > > before.    
> > > > > 
> > > > > Does this fix a problem?  
> > > > 
> > > > Hello Bjorn,
> > > > 
> > > > Pali has suspisions that certain Marvell WiFi cards may need this [1]
> > > > to work, but we do not have them so we cannot test this.
> > > > 
> > > > I guess you think this should be considered a new feature instead of a
> > > > fix, so that the Fixes tag should be removed, yes? Pali was of the
> > > > opinion that this patch "fixes" the driver to conform more to the PCIe
> > > > specification, that is why we added the Fixes tag.
> > > > 
> > > > Anyway if you think this should be considered a new feature, this patch
> > > > can be skipped. The following patches apply even without it.  
> > > 
> > > I do not think we should apply to the mainline a fix that can't be
> > > tested sorry, I will skip this patch.
> > 
> > Lorenzo,
> > 
> > my explanation was incorrect.
> > 
> > The functionality added by this patch _is_ tested and correctly does a
> > retry: it was done by simulating a CRS reply.
> > 
> > We just don't know whether there are real cards used by users that
> > need this functionality (the mentioned Marvell card may be such a card).
> 
> My claim is that the spec allows root complexes that retry zero times,
> so we must assume such a root complex exists and we cannot rely on any
> retries.  If such a root complex exists, this patch might fix a
> problem, but only for aardvark.  It would be better to fix the problem
> in a way that works for all PCIe controllers.

We need a way for those PCI controllers to communicate to SW that
they actually received a CRS completion (and that they don't retry
in HW).

By implementing the logic in the aardvark controller that platform
information is there so to the best of my knowledge this patch
is sound.

I assume that the HW retry is in the specs because there is no
architected way if CRS Software Visibility is not enabled/present to
report CRS completion in an architected PCI manner but I just
don't know the entire background behind this.

Lorenzo

> I'm playing devil's advocate here, and it's quite possible that I'm
> interpreting the spec incorrectly.  Maybe the Marvell card is a way to
> test this in the real world.
Bjorn Helgaas Oct. 6, 2021, 8:13 p.m. UTC | #8
On Wed, Oct 06, 2021 at 05:29:34PM +0100, Lorenzo Pieralisi wrote:
> On Tue, Oct 05, 2021 at 02:28:26PM -0500, Bjorn Helgaas wrote:
> > On Mon, Oct 04, 2021 at 12:19:38PM +0200, Marek Beh�n wrote:
> > > On Mon, 4 Oct 2021 09:53:35 +0100
> > > Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:
> > > > On Mon, Oct 04, 2021 at 09:21:48AM +0200, Marek Beh�n wrote:
> > > > > On Sat, 2 Oct 2021 11:35:19 -0500
> > > > > Bjorn Helgaas <helgaas@kernel.org> wrote:
> > > > >   
> > > > > > On Fri, Oct 01, 2021 at 09:58:52PM +0200, Marek Beh�n wrote:  
> > > > > > > From: Pali Roh�r <pali@kernel.org>
> > > > > > > 
> > > > > > > Commit 43f5c77bcbd2 ("PCI: aardvark: Fix reporting CRS value") fixed
> > > > > > > handling of CRS response and when CRSSVE flag was not enabled it marked CRS
> > > > > > > response as failed transaction (due to simplicity).
> > > > > > > 
> > > > > > > But pci-aardvark.c driver is already waiting up to the PIO_RETRY_CNT count
> > > > > > > for PIO config response and so we can with a small change implement
> > > > > > > re-issuing of config requests as described in PCIe base specification.
> > > > > > > 
> > > > > > > This change implements re-issuing of config requests when response is CRS.
> > > > > > > Set upper bound of wait cycles to around PIO_RETRY_CNT, afterwards the
> > > > > > > transaction is marked as failed and an all-ones value is returned as
> > > > > > > before.    
> > > > > > 
> > > > > > Does this fix a problem?  
> > > > > 
> > > > > Hello Bjorn,
> > > > > 
> > > > > Pali has suspisions that certain Marvell WiFi cards may need this [1]
> > > > > to work, but we do not have them so we cannot test this.
> > > > > 
> > > > > I guess you think this should be considered a new feature instead of a
> > > > > fix, so that the Fixes tag should be removed, yes? Pali was of the
> > > > > opinion that this patch "fixes" the driver to conform more to the PCIe
> > > > > specification, that is why we added the Fixes tag.
> > > > > 
> > > > > Anyway if you think this should be considered a new feature, this patch
> > > > > can be skipped. The following patches apply even without it.  
> > > > 
> > > > I do not think we should apply to the mainline a fix that can't be
> > > > tested sorry, I will skip this patch.
> > > 
> > > Lorenzo,
> > > 
> > > my explanation was incorrect.
> > > 
> > > The functionality added by this patch _is_ tested and correctly does a
> > > retry: it was done by simulating a CRS reply.
> > > 
> > > We just don't know whether there are real cards used by users that
> > > need this functionality (the mentioned Marvell card may be such a card).
> > 
> > My claim is that the spec allows root complexes that retry zero times,
> > so we must assume such a root complex exists and we cannot rely on any
> > retries.  If such a root complex exists, this patch might fix a
> > problem, but only for aardvark.  It would be better to fix the problem
> > in a way that works for all PCIe controllers.
> 
> We need a way for those PCI controllers to communicate to SW that
> they actually received a CRS completion (and that they don't retry
> in HW).

AFAICT this would be controller-dependent.  I think some controllers
have registers that control the number of retries, but of course
that's completely controller-dependent, too.

> By implementing the logic in the aardvark controller that platform
> information is there so to the best of my knowledge this patch
> is sound.
> 
> I assume that the HW retry is in the specs because there is no
> architected way if CRS Software Visibility is not enabled/present to
> report CRS completion in an architected PCI manner but I just
> don't know the entire background behind this.

I assume an error bit would be set in the Status or Secondary Status
register when a PCIe transaction fails.  I'm not sure anybody *looks*
at those, though.

This still looks like a PCI controller band-aid for a device or driver
problem that will likely still exist on other platforms.

Bjorn
Lorenzo Pieralisi Oct. 7, 2021, 11:51 a.m. UTC | #9
On Wed, Oct 06, 2021 at 03:13:05PM -0500, Bjorn Helgaas wrote:

[...]

> > We need a way for those PCI controllers to communicate to SW that
> > they actually received a CRS completion (and that they don't retry
> > in HW).
> 
> AFAICT this would be controller-dependent.  I think some controllers
> have registers that control the number of retries, but of course
> that's completely controller-dependent, too.
> 
> > By implementing the logic in the aardvark controller that platform
> > information is there so to the best of my knowledge this patch
> > is sound.
> > 
> > I assume that the HW retry is in the specs because there is no
> > architected way if CRS Software Visibility is not enabled/present to
> > report CRS completion in an architected PCI manner but I just
> > don't know the entire background behind this.
> 
> I assume an error bit would be set in the Status or Secondary Status
> register when a PCIe transaction fails.  I'm not sure anybody *looks*
> at those, though.
> 
> This still looks like a PCI controller band-aid for a device or driver
> problem that will likely still exist on other platforms.

Yes that's true. I believe we can merge this patch (?) but it would
also be good if Pali/Marek have time to test on other HW and
maybe generalize the concept.

Lorenzo
Marek Behún Oct. 7, 2021, 12:36 p.m. UTC | #10
On Thu, 7 Oct 2021 12:51:57 +0100
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:

> On Wed, Oct 06, 2021 at 03:13:05PM -0500, Bjorn Helgaas wrote:
> 
> [...]
> 
> > > We need a way for those PCI controllers to communicate to SW that
> > > they actually received a CRS completion (and that they don't retry
> > > in HW).  
> > 
> > AFAICT this would be controller-dependent.  I think some controllers
> > have registers that control the number of retries, but of course
> > that's completely controller-dependent, too.
> >   
> > > By implementing the logic in the aardvark controller that platform
> > > information is there so to the best of my knowledge this patch
> > > is sound.
> > > 
> > > I assume that the HW retry is in the specs because there is no
> > > architected way if CRS Software Visibility is not enabled/present to
> > > report CRS completion in an architected PCI manner but I just
> > > don't know the entire background behind this.  
> > 
> > I assume an error bit would be set in the Status or Secondary Status
> > register when a PCIe transaction fails.  I'm not sure anybody *looks*
> > at those, though.
> > 
> > This still looks like a PCI controller band-aid for a device or driver
> > problem that will likely still exist on other platforms.  
> 
> Yes that's true. I believe we can merge this patch (?) but it would
> also be good if Pali/Marek have time to test on other HW and
> maybe generalize the concept.

We are willing to try to implement a generic API if you propose should
such API look (at least some hints).

But let's merge this into aardvark for now, since the generic case will
take a non-trivial time to implement.

Marek
Bjorn Helgaas Oct. 7, 2021, 7:25 p.m. UTC | #11
On Thu, Oct 07, 2021 at 02:36:25PM +0200, Marek Behún wrote:
> On Thu, 7 Oct 2021 12:51:57 +0100
> Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:
> 
> > On Wed, Oct 06, 2021 at 03:13:05PM -0500, Bjorn Helgaas wrote:
> > 
> > [...]
> > 
> > > > We need a way for those PCI controllers to communicate to SW that
> > > > they actually received a CRS completion (and that they don't retry
> > > > in HW).  
> > > 
> > > AFAICT this would be controller-dependent.  I think some controllers
> > > have registers that control the number of retries, but of course
> > > that's completely controller-dependent, too.
> > >   
> > > > By implementing the logic in the aardvark controller that platform
> > > > information is there so to the best of my knowledge this patch
> > > > is sound.
> > > > 
> > > > I assume that the HW retry is in the specs because there is no
> > > > architected way if CRS Software Visibility is not enabled/present to
> > > > report CRS completion in an architected PCI manner but I just
> > > > don't know the entire background behind this.  
> > > 
> > > I assume an error bit would be set in the Status or Secondary Status
> > > register when a PCIe transaction fails.  I'm not sure anybody *looks*
> > > at those, though.
> > > 
> > > This still looks like a PCI controller band-aid for a device or driver
> > > problem that will likely still exist on other platforms.  
> > 
> > Yes that's true. I believe we can merge this patch (?) but it would
> > also be good if Pali/Marek have time to test on other HW and
> > maybe generalize the concept.
> 
> We are willing to try to implement a generic API if you propose should
> such API look (at least some hints).

That's the whole point -- CRS SV *is* the generic way for controllers
to report CRS completions to software.  If the controller doesn't
support CRS SV, I don't think a generic API is possible.

Or maybe I don't understand the sort of generic API you have in mind.
What sort of information do you envision the API might provide?

Bjorn
Pali Rohár Oct. 11, 2021, 6:15 p.m. UTC | #12
On Wednesday 06 October 2021 00:45:31 Marek Behún wrote:
> PS: Btw, looking at the code, why do we use these PCIBIOS_* macros? And 
> then sometimes convert them to error codes with pcibios_err_to_errno()? 
> Is this some legacy thing? Should this be converted to errno?

Hello! I would like to remind this Marek's PS. Do you know why config
read and write functions return these PCIBIOS_* macros and not standard
linux errno codes?
Bjorn Helgaas Oct. 11, 2021, 8:58 p.m. UTC | #13
On Mon, Oct 11, 2021 at 08:15:53PM +0200, Pali Rohár wrote:
> On Wednesday 06 October 2021 00:45:31 Marek Behún wrote:
> > PS: Btw, looking at the code, why do we use these PCIBIOS_* macros? And 
> > then sometimes convert them to error codes with pcibios_err_to_errno()? 
> > Is this some legacy thing? Should this be converted to errno?
> 
> Hello! I would like to remind this Marek's PS. Do you know why config
> read and write functions return these PCIBIOS_* macros and not standard
> linux errno codes?

I don't know.  The PCIBIOS_SUCCESSFUL..PCIBIOS_BUFFER_TOO_SMALL codes
are actually still in the latest PCI Firmware Spec, r3.3, of
1/20/2021, sec 2.9, but are clearly x86-specific.

I think it would be nicer if their usage were confined to arch/x86,
but they've leaked out somewhat.
diff mbox series

Patch

diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 51d2955d9cca..7b9870d0b81f 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -603,6 +603,7 @@  static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
 	u32 reg;
 	unsigned int status;
 	char *strcomp_status, *str_posted;
+	int ret;
 
 	reg = advk_readl(pcie, PIO_STAT);
 	status = (reg & PIO_COMPLETION_STATUS_MASK) >>
@@ -627,6 +628,7 @@  static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
 	case PIO_COMPLETION_STATUS_OK:
 		if (reg & PIO_ERR_STATUS) {
 			strcomp_status = "COMP_ERR";
+			ret = -EFAULT;
 			break;
 		}
 		/* Get the read result */
@@ -634,9 +636,11 @@  static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
 			*val = advk_readl(pcie, PIO_RD_DATA);
 		/* No error */
 		strcomp_status = NULL;
+		ret = 0;
 		break;
 	case PIO_COMPLETION_STATUS_UR:
 		strcomp_status = "UR";
+		ret = -EOPNOTSUPP;
 		break;
 	case PIO_COMPLETION_STATUS_CRS:
 		if (allow_crs && val) {
@@ -654,6 +658,7 @@  static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
 			 */
 			*val = CFG_RD_CRS_VAL;
 			strcomp_status = NULL;
+			ret = 0;
 			break;
 		}
 		/* PCIe r4.0, sec 2.3.2, says:
@@ -669,21 +674,24 @@  static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
 		 * Request and taking appropriate action, e.g., complete the
 		 * Request to the host as a failed transaction.
 		 *
-		 * To simplify implementation do not re-issue the Configuration
-		 * Request and complete the Request as a failed transaction.
+		 * So return -EAGAIN and caller (pci-aardvark.c driver) will
+		 * re-issue request again up to the PIO_RETRY_CNT retries.
 		 */
 		strcomp_status = "CRS";
+		ret = -EAGAIN;
 		break;
 	case PIO_COMPLETION_STATUS_CA:
 		strcomp_status = "CA";
+		ret = -ECANCELED;
 		break;
 	default:
 		strcomp_status = "Unknown";
+		ret = -EINVAL;
 		break;
 	}
 
 	if (!strcomp_status)
-		return 0;
+		return ret;
 
 	if (reg & PIO_NON_POSTED_REQ)
 		str_posted = "Non-posted";
@@ -693,7 +701,7 @@  static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3
 	dev_dbg(dev, "%s PIO Response Status: %s, %#x @ %#x\n",
 		str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS));
 
-	return -EFAULT;
+	return ret;
 }
 
 static int advk_pcie_wait_pio(struct advk_pcie *pcie)
@@ -701,13 +709,13 @@  static int advk_pcie_wait_pio(struct advk_pcie *pcie)
 	struct device *dev = &pcie->pdev->dev;
 	int i;
 
-	for (i = 0; i < PIO_RETRY_CNT; i++) {
+	for (i = 1; i <= PIO_RETRY_CNT; i++) {
 		u32 start, isr;
 
 		start = advk_readl(pcie, PIO_START);
 		isr = advk_readl(pcie, PIO_ISR);
 		if (!start && isr)
-			return 0;
+			return i;
 		udelay(PIO_RETRY_DELAY);
 	}
 
@@ -898,6 +906,7 @@  static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
 			     int where, int size, u32 *val)
 {
 	struct advk_pcie *pcie = bus->sysdata;
+	int retry_count;
 	bool allow_crs;
 	u32 reg;
 	int ret;
@@ -940,16 +949,22 @@  static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
 	/* Program the data strobe */
 	advk_writel(pcie, 0xf, PIO_WR_DATA_STRB);
 
-	/* Clear PIO DONE ISR and start the transfer */
-	advk_writel(pcie, 1, PIO_ISR);
-	advk_writel(pcie, 1, PIO_START);
+	retry_count = 0;
+	do {
+		/* Clear PIO DONE ISR and start the transfer */
+		advk_writel(pcie, 1, PIO_ISR);
+		advk_writel(pcie, 1, PIO_START);
 
-	ret = advk_pcie_wait_pio(pcie);
-	if (ret < 0)
-		goto try_crs;
+		ret = advk_pcie_wait_pio(pcie);
+		if (ret < 0)
+			goto try_crs;
+
+		retry_count += ret;
+
+		/* Check PIO status and get the read result */
+		ret = advk_pcie_check_pio_status(pcie, allow_crs, val);
+	} while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT);
 
-	/* Check PIO status and get the read result */
-	ret = advk_pcie_check_pio_status(pcie, allow_crs, val);
 	if (ret < 0)
 		goto fail;
 
@@ -981,6 +996,7 @@  static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
 	struct advk_pcie *pcie = bus->sysdata;
 	u32 reg;
 	u32 data_strobe = 0x0;
+	int retry_count;
 	int offset;
 	int ret;
 
@@ -1022,19 +1038,22 @@  static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
 	/* Program the data strobe */
 	advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB);
 
-	/* Clear PIO DONE ISR and start the transfer */
-	advk_writel(pcie, 1, PIO_ISR);
-	advk_writel(pcie, 1, PIO_START);
+	retry_count = 0;
+	do {
+		/* Clear PIO DONE ISR and start the transfer */
+		advk_writel(pcie, 1, PIO_ISR);
+		advk_writel(pcie, 1, PIO_START);
 
-	ret = advk_pcie_wait_pio(pcie);
-	if (ret < 0)
-		return PCIBIOS_SET_FAILED;
+		ret = advk_pcie_wait_pio(pcie);
+		if (ret < 0)
+			return PCIBIOS_SET_FAILED;
 
-	ret = advk_pcie_check_pio_status(pcie, false, NULL);
-	if (ret < 0)
-		return PCIBIOS_SET_FAILED;
+		retry_count += ret;
 
-	return PCIBIOS_SUCCESSFUL;
+		ret = advk_pcie_check_pio_status(pcie, false, NULL);
+	} while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT);
+
+	return ret < 0 ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
 }
 
 static struct pci_ops advk_pcie_ops = {