diff mbox

[V6,3/5] PCI: thunder-pem: Allow to probe PEM-specific register range for ACPI case

Message ID 1473449047-10499-4-git-send-email-tn@semihalf.com
State Changes Requested
Headers show

Commit Message

Tomasz Nowicki Sept. 9, 2016, 7:24 p.m. UTC
thunder-pem driver stands for being ACPI based PCI host controller.
However, there is no standard way to describe its PEM-specific register
ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension
to obtain hardcoded addresses from static resource array.
Although it is not pretty, it prevents from creating standard mechanism to
handle similar cases in future.

Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
---
 drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 13 deletions(-)

Comments

Bjorn Helgaas Sept. 19, 2016, 6:09 p.m. UTC | #1
On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
> thunder-pem driver stands for being ACPI based PCI host controller.
> However, there is no standard way to describe its PEM-specific register
> ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension
> to obtain hardcoded addresses from static resource array.
> Although it is not pretty, it prevents from creating standard mechanism to
> handle similar cases in future.
> 
> Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
> ---
>  drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++--------
>  1 file changed, 48 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
> index 6abaf80..b048761 100644
> --- a/drivers/pci/host/pci-thunder-pem.c
> +++ b/drivers/pci/host/pci-thunder-pem.c
> @@ -18,6 +18,7 @@
>  #include <linux/init.h>
>  #include <linux/of_address.h>
>  #include <linux/of_pci.h>
> +#include <linux/pci-acpi.h>
>  #include <linux/pci-ecam.h>
>  #include <linux/platform_device.h>
>  
> @@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
>  	return pci_generic_config_write(bus, devfn, where, size, val);
>  }
>  
> +#ifdef CONFIG_ACPI
> +static struct resource thunder_pem_reg_res[] = {
> +	[4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
> +	[5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
> +	[6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
> +	[7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
> +	[8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
> +	[9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
> +	[14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
> +	[15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
> +	[16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
> +	[17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
> +	[18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
> +	[19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),

1) The "correct" way to discover the resources consumed by an ACPI
   device is to use the _CRS method.  I know there are some issues
   there for bridges (not the fault of ThunderX!) because there's not
   a good way to distinguish windows from resources consumed directly
   by the bridge.

   But we should either do this correctly, or include a comment about
   why we're doing it wrong, so we don't give the impression that this
   is the right way to do it.

   I seem to recall some discussion about why we're doing it this way,
   but I don't remember the details.  It'd be nice to include a
   summary here.

2) This is a little weird because here we define the resource size as
   16MB, in the OF case we get the resource size from OF, in either
   case we ioremap 64K of it, and then as far as I can tell, we only
   ever access PEM_CFG_WR and PEM_CFG_RD, at offsets 0x28 and 0x30
   into the space.

   If the hardware actually decodes the entire 16MB, we should ioremap
   the whole 16MB.  (Strictly speaking, drivers only need to ioremap
   the parts they're using, but in this case nobody claims the entire
   resource because of deficiencies in the ACPI and OF cores, so the
   driver should ioremap the entire thing to help prevent conflicts
   with other devices.)

   It'd be nice if we didn't have the 64KB magic number.  I think
   using devm_ioremap_resource() would be nice.

> +};
> +
> +static struct resource *thunder_pem_acpi_res(struct pci_config_window *cfg)
> +{
> +	struct acpi_device *adev = to_acpi_device(cfg->parent);
> +	struct acpi_pci_root *root = acpi_driver_data(adev);
> +
> +	if ((root->segment >= 4 && root->segment <= 9) ||
> +	    (root->segment >= 14 && root->segment <= 19))
> +		return &thunder_pem_reg_res[root->segment];
> +
> +	return NULL;
> +}
> +#else
> +static struct resource *thunder_pem_acpi_res(struct pci_config_window *cfg)
> +{
> +	return NULL;
> +}
> +#endif
> +
>  static int thunder_pem_init(struct pci_config_window *cfg)
>  {
>  	struct device *dev = cfg->parent;
> @@ -292,24 +327,24 @@ static int thunder_pem_init(struct pci_config_window *cfg)
>  	struct thunder_pem_pci *pem_pci;
>  	struct platform_device *pdev;
>  
> -	/* Only OF support for now */
> -	if (!dev->of_node)
> -		return -EINVAL;
> -
>  	pem_pci = devm_kzalloc(dev, sizeof(*pem_pci), GFP_KERNEL);
>  	if (!pem_pci)
>  		return -ENOMEM;
>  
> -	pdev = to_platform_device(dev);
> -
> -	/*
> -	 * The second register range is the PEM bridge to the PCIe
> -	 * bus.  It has a different config access method than those
> -	 * devices behind the bridge.
> -	 */
> -	res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (acpi_disabled) {
> +		pdev = to_platform_device(dev);
> +
> +		/*
> +		 * The second register range is the PEM bridge to the PCIe
> +		 * bus.  It has a different config access method than those
> +		 * devices behind the bridge.
> +		 */
> +		res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	} else {
> +		res_pem = thunder_pem_acpi_res(cfg);
> +	}
>  	if (!res_pem) {
> -		dev_err(dev, "missing \"reg[1]\"property\n");
> +		dev_err(dev, "missing configuration region\n");
>  		return -EINVAL;
>  	}
>  
> -- 
> 1.9.1
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tomasz Nowicki Sept. 20, 2016, 7:23 a.m. UTC | #2
On 19.09.2016 20:09, Bjorn Helgaas wrote:
> On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
>> thunder-pem driver stands for being ACPI based PCI host controller.
>> However, there is no standard way to describe its PEM-specific register
>> ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension
>> to obtain hardcoded addresses from static resource array.
>> Although it is not pretty, it prevents from creating standard mechanism to
>> handle similar cases in future.
>>
>> Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
>> ---
>>  drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++--------
>>  1 file changed, 48 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
>> index 6abaf80..b048761 100644
>> --- a/drivers/pci/host/pci-thunder-pem.c
>> +++ b/drivers/pci/host/pci-thunder-pem.c
>> @@ -18,6 +18,7 @@
>>  #include <linux/init.h>
>>  #include <linux/of_address.h>
>>  #include <linux/of_pci.h>
>> +#include <linux/pci-acpi.h>
>>  #include <linux/pci-ecam.h>
>>  #include <linux/platform_device.h>
>>
>> @@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
>>  	return pci_generic_config_write(bus, devfn, where, size, val);
>>  }
>>
>> +#ifdef CONFIG_ACPI
>> +static struct resource thunder_pem_reg_res[] = {
>> +	[4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
>> +	[5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
>> +	[6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
>> +	[7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
>> +	[8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
>> +	[9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
>> +	[14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
>> +	[15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
>> +	[16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
>> +	[17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
>> +	[18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
>> +	[19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
>
> 1) The "correct" way to discover the resources consumed by an ACPI
>    device is to use the _CRS method.  I know there are some issues
>    there for bridges (not the fault of ThunderX!) because there's not
>    a good way to distinguish windows from resources consumed directly
>    by the bridge.
>
>    But we should either do this correctly, or include a comment about
>    why we're doing it wrong, so we don't give the impression that this
>    is the right way to do it.
>
>    I seem to recall some discussion about why we're doing it this way,
>    but I don't remember the details.  It'd be nice to include a
>    summary here.

OK I will. The reason why we cannot use _CRS for this case is that 
CONSUMER flag was not use consistently for the bridge so far.

>
> 2) This is a little weird because here we define the resource size as
>    16MB, in the OF case we get the resource size from OF, in either
>    case we ioremap 64K of it, and then as far as I can tell, we only
>    ever access PEM_CFG_WR and PEM_CFG_RD, at offsets 0x28 and 0x30
>    into the space.
>
>    If the hardware actually decodes the entire 16MB, we should ioremap
>    the whole 16MB.  (Strictly speaking, drivers only need to ioremap
>    the parts they're using, but in this case nobody claims the entire
>    resource because of deficiencies in the ACPI and OF cores, so the
>    driver should ioremap the entire thing to help prevent conflicts
>    with other devices.)
>
>    It'd be nice if we didn't have the 64KB magic number.  I think
>    using devm_ioremap_resource() would be nice.

I agree.

David, is there anything which prevents us from using 
devm_ioremap_resource() here with SZ_16M size?

Tomasz
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Sept. 20, 2016, 1:33 p.m. UTC | #3
[+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]

On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
> On 19.09.2016 20:09, Bjorn Helgaas wrote:
> >On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
> >>thunder-pem driver stands for being ACPI based PCI host controller.
> >>However, there is no standard way to describe its PEM-specific register
> >>ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension
> >>to obtain hardcoded addresses from static resource array.
> >>Although it is not pretty, it prevents from creating standard mechanism to
> >>handle similar cases in future.
> >>
> >>Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
> >>---
> >> drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++--------
> >> 1 file changed, 48 insertions(+), 13 deletions(-)
> >>
> >>diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
> >>index 6abaf80..b048761 100644
> >>--- a/drivers/pci/host/pci-thunder-pem.c
> >>+++ b/drivers/pci/host/pci-thunder-pem.c
> >>@@ -18,6 +18,7 @@
> >> #include <linux/init.h>
> >> #include <linux/of_address.h>
> >> #include <linux/of_pci.h>
> >>+#include <linux/pci-acpi.h>
> >> #include <linux/pci-ecam.h>
> >> #include <linux/platform_device.h>
> >>
> >>@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
> >> 	return pci_generic_config_write(bus, devfn, where, size, val);
> >> }
> >>
> >>+#ifdef CONFIG_ACPI
> >>+static struct resource thunder_pem_reg_res[] = {
> >>+	[4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
> >>+	[5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
> >>+	[6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
> >>+	[7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
> >>+	[8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
> >>+	[9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
> >>+	[14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
> >>+	[15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
> >>+	[16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
> >>+	[17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
> >>+	[18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
> >>+	[19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
> >
> >1) The "correct" way to discover the resources consumed by an ACPI
> >   device is to use the _CRS method.  I know there are some issues
> >   there for bridges (not the fault of ThunderX!) because there's not
> >   a good way to distinguish windows from resources consumed directly
> >   by the bridge.
> >
> >   But we should either do this correctly, or include a comment about
> >   why we're doing it wrong, so we don't give the impression that this
> >   is the right way to do it.
> >
> >   I seem to recall some discussion about why we're doing it this way,
> >   but I don't remember the details.  It'd be nice to include a
> >   summary here.
> 
> OK I will. The reason why we cannot use _CRS for this case is that
> CONSUMER flag was not use consistently for the bridge so far.

Yes, I'm aware of that problem, but hard-coding resources into drivers
is just a disaster.  The PCI and ACPI cores need generic ways to learn
what resources are consumed by devices.  For PCI devices, that's done
with BARs.  For ACPI devices, it's done with _CRS.  Without generic
resource discovery, we can't manage resources reliably at the system
level [1].

You have a PNP0A03/PNP0A08 device for the PCI host bridge.  Because of
the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS
is a window and not consumed by the bridge itself.  What if you added
a companion ACPI device with a _CRS that contained the bridge
resources?  Then you'd have some driver ugliness to find that device,
but at least the ACPI core could tell what resources were in use.

Maybe Rafael has a better idea?

Bjorn

[1] I know the ACPI core currently doesn't actually *do* anything with
_CRS.  But I think it *should*, and someday it might, so I want to
preserve the principle of using _CRS to document all the resources.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ard Biesheuvel Sept. 20, 2016, 1:40 p.m. UTC | #4
On 20 September 2016 at 14:33, Bjorn Helgaas <helgaas@kernel.org> wrote:
> [+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
>
> On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
>> On 19.09.2016 20:09, Bjorn Helgaas wrote:
>> >On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
>> >>thunder-pem driver stands for being ACPI based PCI host controller.
>> >>However, there is no standard way to describe its PEM-specific register
>> >>ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension
>> >>to obtain hardcoded addresses from static resource array.
>> >>Although it is not pretty, it prevents from creating standard mechanism to
>> >>handle similar cases in future.
>> >>
>> >>Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
>> >>---
>> >> drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++--------
>> >> 1 file changed, 48 insertions(+), 13 deletions(-)
>> >>
>> >>diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
>> >>index 6abaf80..b048761 100644
>> >>--- a/drivers/pci/host/pci-thunder-pem.c
>> >>+++ b/drivers/pci/host/pci-thunder-pem.c
>> >>@@ -18,6 +18,7 @@
>> >> #include <linux/init.h>
>> >> #include <linux/of_address.h>
>> >> #include <linux/of_pci.h>
>> >>+#include <linux/pci-acpi.h>
>> >> #include <linux/pci-ecam.h>
>> >> #include <linux/platform_device.h>
>> >>
>> >>@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
>> >>    return pci_generic_config_write(bus, devfn, where, size, val);
>> >> }
>> >>
>> >>+#ifdef CONFIG_ACPI
>> >>+static struct resource thunder_pem_reg_res[] = {
>> >>+   [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
>> >>+   [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
>> >>+   [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
>> >>+   [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
>> >>+   [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
>> >>+   [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
>> >>+   [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
>> >>+   [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
>> >>+   [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
>> >>+   [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
>> >>+   [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
>> >>+   [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
>> >
>> >1) The "correct" way to discover the resources consumed by an ACPI
>> >   device is to use the _CRS method.  I know there are some issues
>> >   there for bridges (not the fault of ThunderX!) because there's not
>> >   a good way to distinguish windows from resources consumed directly
>> >   by the bridge.
>> >
>> >   But we should either do this correctly, or include a comment about
>> >   why we're doing it wrong, so we don't give the impression that this
>> >   is the right way to do it.
>> >
>> >   I seem to recall some discussion about why we're doing it this way,
>> >   but I don't remember the details.  It'd be nice to include a
>> >   summary here.
>>
>> OK I will. The reason why we cannot use _CRS for this case is that
>> CONSUMER flag was not use consistently for the bridge so far.
>
> Yes, I'm aware of that problem, but hard-coding resources into drivers
> is just a disaster.  The PCI and ACPI cores need generic ways to learn
> what resources are consumed by devices.  For PCI devices, that's done
> with BARs.  For ACPI devices, it's done with _CRS.  Without generic
> resource discovery, we can't manage resources reliably at the system
> level [1].
>
> You have a PNP0A03/PNP0A08 device for the PCI host bridge.  Because of
> the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS
> is a window and not consumed by the bridge itself.  What if you added
> a companion ACPI device with a _CRS that contained the bridge
> resources?  Then you'd have some driver ugliness to find that device,
> but at least the ACPI core could tell what resources were in use.
>
> Maybe Rafael has a better idea?
>

In the discussions leading up to this, we tried very hard to make this
arm64/acpi quirks mechanism just as flexible as we need it to be to
cover the current crop of incompatible hardware, but not more so.
Going forward, we intend to require all arm64/acpi hardware to be spec
compliant, and so any parametrization beyond what is required for the
currently known broken hardware is only going to make it easier for
others to ship with tweaked ACPI descriptions so that an existing
quirk is triggered for hardware that it was not intended for. It also
implies that we have to deal with the ACPI descriptions as they were
shipped with the current hardware.

That does not mean, of course, that we should use bare constants
rather than symbolic ones, but anything beyond that exceeds the
desired scope of quirks handling.
Bjorn Helgaas Sept. 20, 2016, 2:05 p.m. UTC | #5
Hi Ard,

On Tue, Sep 20, 2016 at 02:40:13PM +0100, Ard Biesheuvel wrote:
> On 20 September 2016 at 14:33, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > [+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
> >
> > On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
> >> On 19.09.2016 20:09, Bjorn Helgaas wrote:
> >> >On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
> >> >>thunder-pem driver stands for being ACPI based PCI host controller.
> >> >>However, there is no standard way to describe its PEM-specific register
> >> >>ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension
> >> >>to obtain hardcoded addresses from static resource array.
> >> >>Although it is not pretty, it prevents from creating standard mechanism to
> >> >>handle similar cases in future.
> >> >>
> >> >>Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
> >> >>---
> >> >> drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++--------
> >> >> 1 file changed, 48 insertions(+), 13 deletions(-)
> >> >>
> >> >>diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
> >> >>index 6abaf80..b048761 100644
> >> >>--- a/drivers/pci/host/pci-thunder-pem.c
> >> >>+++ b/drivers/pci/host/pci-thunder-pem.c
> >> >>@@ -18,6 +18,7 @@
> >> >> #include <linux/init.h>
> >> >> #include <linux/of_address.h>
> >> >> #include <linux/of_pci.h>
> >> >>+#include <linux/pci-acpi.h>
> >> >> #include <linux/pci-ecam.h>
> >> >> #include <linux/platform_device.h>
> >> >>
> >> >>@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
> >> >>    return pci_generic_config_write(bus, devfn, where, size, val);
> >> >> }
> >> >>
> >> >>+#ifdef CONFIG_ACPI
> >> >>+static struct resource thunder_pem_reg_res[] = {
> >> >>+   [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
> >> >>+   [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
> >> >>+   [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
> >> >>+   [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
> >> >>+   [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
> >> >>+   [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
> >> >>+   [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
> >> >>+   [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
> >> >>+   [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
> >> >>+   [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
> >> >>+   [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
> >> >>+   [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
> >> >
> >> >1) The "correct" way to discover the resources consumed by an ACPI
> >> >   device is to use the _CRS method.  I know there are some issues
> >> >   there for bridges (not the fault of ThunderX!) because there's not
> >> >   a good way to distinguish windows from resources consumed directly
> >> >   by the bridge.
> >> >
> >> >   But we should either do this correctly, or include a comment about
> >> >   why we're doing it wrong, so we don't give the impression that this
> >> >   is the right way to do it.
> >> >
> >> >   I seem to recall some discussion about why we're doing it this way,
> >> >   but I don't remember the details.  It'd be nice to include a
> >> >   summary here.
> >>
> >> OK I will. The reason why we cannot use _CRS for this case is that
> >> CONSUMER flag was not use consistently for the bridge so far.
> >
> > Yes, I'm aware of that problem, but hard-coding resources into drivers
> > is just a disaster.  The PCI and ACPI cores need generic ways to learn
> > what resources are consumed by devices.  For PCI devices, that's done
> > with BARs.  For ACPI devices, it's done with _CRS.  Without generic
> > resource discovery, we can't manage resources reliably at the system
> > level [1].
> >
> > You have a PNP0A03/PNP0A08 device for the PCI host bridge.  Because of
> > the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS
> > is a window and not consumed by the bridge itself.  What if you added
> > a companion ACPI device with a _CRS that contained the bridge
> > resources?  Then you'd have some driver ugliness to find that device,
> > but at least the ACPI core could tell what resources were in use.
> >
> > Maybe Rafael has a better idea?
> 
> In the discussions leading up to this, we tried very hard to make this
> arm64/acpi quirks mechanism just as flexible as we need it to be to
> cover the current crop of incompatible hardware, but not more so.
> Going forward, we intend to require all arm64/acpi hardware to be spec
> compliant, and so any parametrization beyond what is required for the
> currently known broken hardware is only going to make it easier for
> others to ship with tweaked ACPI descriptions so that an existing
> quirk is triggered for hardware that it was not intended for. It also
> implies that we have to deal with the ACPI descriptions as they were
> shipped with the current hardware.
> 
> That does not mean, of course, that we should use bare constants
> rather than symbolic ones, but anything beyond that exceeds the
> desired scope of quirks handling.

Symbolic vs bare constants is the least of my worries.  I'm pretty
happy with the current quirk implementation.  It's pretty simple and
straightforward.

Apparently you shipped broken firmware that doesn't accurately
describe system resource usage.  Presumably that firmware could be
updated, but maybe it's worthwhile to work around it in the kernel,
depending on where it got shipped.

I'd like to step back and come up with some understanding of how
non-broken firmware *should* deal with this issue.  Then, if we *do*
work around this particular broken firmware in the kernel, it would be
nice to do it in a way that fits in with that understanding.

For example, if a companion ACPI device is the preferred solution, an
ACPI quirk could fabricate a device with the required resources.  That
would address the problem closer to the source and make it more likely
that the rest of the system will work correctly: /proc/iomem could
make sense, things that look at _CRS generically would work (e.g,
/sys/, an admittedly hypothetical "lsacpi", etc.)

Hard-coding stuff in drivers is a point solution that doesn't provide
any guidance for future platforms and makes it likely that the hack
will get copied into even more drivers.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ard Biesheuvel Sept. 20, 2016, 3:09 p.m. UTC | #6
On 20 September 2016 at 15:05, Bjorn Helgaas <helgaas@kernel.org> wrote:
> Hi Ard,
>
> On Tue, Sep 20, 2016 at 02:40:13PM +0100, Ard Biesheuvel wrote:
>> On 20 September 2016 at 14:33, Bjorn Helgaas <helgaas@kernel.org> wrote:
>> > [+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
>> >
>> > On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
>> >> On 19.09.2016 20:09, Bjorn Helgaas wrote:
>> >> >On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
>> >> >>thunder-pem driver stands for being ACPI based PCI host controller.
>> >> >>However, there is no standard way to describe its PEM-specific register
>> >> >>ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension
>> >> >>to obtain hardcoded addresses from static resource array.
>> >> >>Although it is not pretty, it prevents from creating standard mechanism to
>> >> >>handle similar cases in future.
>> >> >>
>> >> >>Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
>> >> >>---
>> >> >> drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++--------
>> >> >> 1 file changed, 48 insertions(+), 13 deletions(-)
>> >> >>
>> >> >>diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
>> >> >>index 6abaf80..b048761 100644
>> >> >>--- a/drivers/pci/host/pci-thunder-pem.c
>> >> >>+++ b/drivers/pci/host/pci-thunder-pem.c
>> >> >>@@ -18,6 +18,7 @@
>> >> >> #include <linux/init.h>
>> >> >> #include <linux/of_address.h>
>> >> >> #include <linux/of_pci.h>
>> >> >>+#include <linux/pci-acpi.h>
>> >> >> #include <linux/pci-ecam.h>
>> >> >> #include <linux/platform_device.h>
>> >> >>
>> >> >>@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
>> >> >>    return pci_generic_config_write(bus, devfn, where, size, val);
>> >> >> }
>> >> >>
>> >> >>+#ifdef CONFIG_ACPI
>> >> >>+static struct resource thunder_pem_reg_res[] = {
>> >> >>+   [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
>> >> >>+   [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
>> >> >>+   [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
>> >> >>+   [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
>> >> >>+   [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
>> >> >>+   [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
>> >> >>+   [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
>> >> >>+   [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
>> >> >>+   [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
>> >> >>+   [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
>> >> >>+   [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
>> >> >>+   [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
>> >> >
>> >> >1) The "correct" way to discover the resources consumed by an ACPI
>> >> >   device is to use the _CRS method.  I know there are some issues
>> >> >   there for bridges (not the fault of ThunderX!) because there's not
>> >> >   a good way to distinguish windows from resources consumed directly
>> >> >   by the bridge.
>> >> >
>> >> >   But we should either do this correctly, or include a comment about
>> >> >   why we're doing it wrong, so we don't give the impression that this
>> >> >   is the right way to do it.
>> >> >
>> >> >   I seem to recall some discussion about why we're doing it this way,
>> >> >   but I don't remember the details.  It'd be nice to include a
>> >> >   summary here.
>> >>
>> >> OK I will. The reason why we cannot use _CRS for this case is that
>> >> CONSUMER flag was not use consistently for the bridge so far.
>> >
>> > Yes, I'm aware of that problem, but hard-coding resources into drivers
>> > is just a disaster.  The PCI and ACPI cores need generic ways to learn
>> > what resources are consumed by devices.  For PCI devices, that's done
>> > with BARs.  For ACPI devices, it's done with _CRS.  Without generic
>> > resource discovery, we can't manage resources reliably at the system
>> > level [1].
>> >
>> > You have a PNP0A03/PNP0A08 device for the PCI host bridge.  Because of
>> > the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS
>> > is a window and not consumed by the bridge itself.  What if you added
>> > a companion ACPI device with a _CRS that contained the bridge
>> > resources?  Then you'd have some driver ugliness to find that device,
>> > but at least the ACPI core could tell what resources were in use.
>> >
>> > Maybe Rafael has a better idea?
>>
>> In the discussions leading up to this, we tried very hard to make this
>> arm64/acpi quirks mechanism just as flexible as we need it to be to
>> cover the current crop of incompatible hardware, but not more so.
>> Going forward, we intend to require all arm64/acpi hardware to be spec
>> compliant, and so any parametrization beyond what is required for the
>> currently known broken hardware is only going to make it easier for
>> others to ship with tweaked ACPI descriptions so that an existing
>> quirk is triggered for hardware that it was not intended for. It also
>> implies that we have to deal with the ACPI descriptions as they were
>> shipped with the current hardware.
>>
>> That does not mean, of course, that we should use bare constants
>> rather than symbolic ones, but anything beyond that exceeds the
>> desired scope of quirks handling.
>
> Symbolic vs bare constants is the least of my worries.  I'm pretty
> happy with the current quirk implementation.  It's pretty simple and
> straightforward.
>

OK, good to know that we are on the right track here.

> Apparently you shipped broken firmware that doesn't accurately
> describe system resource usage.  Presumably that firmware could be
> updated, but maybe it's worthwhile to work around it in the kernel,
> depending on where it got shipped.
>

None of these platforms can be fixed entirely in software, and given
that we will not be adding quirks for new broken hardware, we should
ask ourselves whether having two versions of a quirk, i.e., one for
broken hardware + currently shipping firmware, and one for the same
broken hardware with fixed firmware is really an improvement over what
has been proposed here.

> I'd like to step back and come up with some understanding of how
> non-broken firmware *should* deal with this issue.  Then, if we *do*
> work around this particular broken firmware in the kernel, it would be
> nice to do it in a way that fits in with that understanding.
>
> For example, if a companion ACPI device is the preferred solution, an
> ACPI quirk could fabricate a device with the required resources.  That
> would address the problem closer to the source and make it more likely
> that the rest of the system will work correctly: /proc/iomem could
> make sense, things that look at _CRS generically would work (e.g,
> /sys/, an admittedly hypothetical "lsacpi", etc.)
>
> Hard-coding stuff in drivers is a point solution that doesn't provide
> any guidance for future platforms and makes it likely that the hack
> will get copied into even more drivers.
>

OK, I see. But the guidance for future platforms should be 'do not
rely on quirks', and what I am arguing here is that the more we polish
up this code and make it clean and reusable, the more likely it is
that will end up getting abused by new broken hardware that we set out
to reject entirely in the first place.

So of course, if the quirk involves claiming resources, let's make
sure that this occurs in the cleanest and most compliant way possible.
But any factoring/reuse concerns other than for the current crop of
broken hardware should be avoided imo.
Bjorn Helgaas Sept. 20, 2016, 7:17 p.m. UTC | #7
On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
> On 20 September 2016 at 15:05, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > Hi Ard,
> >
> > On Tue, Sep 20, 2016 at 02:40:13PM +0100, Ard Biesheuvel wrote:
> >> On 20 September 2016 at 14:33, Bjorn Helgaas <helgaas@kernel.org> wrote:
> >> > [+cc Rafael (maybe already cc'd; I didn't recognize rafael@kernel.org, Duc]
> >> >
> >> > On Tue, Sep 20, 2016 at 09:23:21AM +0200, Tomasz Nowicki wrote:
> >> >> On 19.09.2016 20:09, Bjorn Helgaas wrote:
> >> >> >On Fri, Sep 09, 2016 at 09:24:05PM +0200, Tomasz Nowicki wrote:
> >> >> >>thunder-pem driver stands for being ACPI based PCI host controller.
> >> >> >>However, there is no standard way to describe its PEM-specific register
> >> >> >>ranges in ACPI tables. Thus we add thunder_pem_init() ACPI extension
> >> >> >>to obtain hardcoded addresses from static resource array.
> >> >> >>Although it is not pretty, it prevents from creating standard mechanism to
> >> >> >>handle similar cases in future.
> >> >> >>
> >> >> >>Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
> >> >> >>---
> >> >> >> drivers/pci/host/pci-thunder-pem.c | 61 ++++++++++++++++++++++++++++++--------
> >> >> >> 1 file changed, 48 insertions(+), 13 deletions(-)
> >> >> >>
> >> >> >>diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
> >> >> >>index 6abaf80..b048761 100644
> >> >> >>--- a/drivers/pci/host/pci-thunder-pem.c
> >> >> >>+++ b/drivers/pci/host/pci-thunder-pem.c
> >> >> >>@@ -18,6 +18,7 @@
> >> >> >> #include <linux/init.h>
> >> >> >> #include <linux/of_address.h>
> >> >> >> #include <linux/of_pci.h>
> >> >> >>+#include <linux/pci-acpi.h>
> >> >> >> #include <linux/pci-ecam.h>
> >> >> >> #include <linux/platform_device.h>
> >> >> >>
> >> >> >>@@ -284,6 +285,40 @@ static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
> >> >> >>    return pci_generic_config_write(bus, devfn, where, size, val);
> >> >> >> }
> >> >> >>
> >> >> >>+#ifdef CONFIG_ACPI
> >> >> >>+static struct resource thunder_pem_reg_res[] = {
> >> >> >>+   [4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
> >> >> >>+   [5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
> >> >> >>+   [6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
> >> >> >>+   [7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
> >> >> >>+   [8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
> >> >> >>+   [9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
> >> >> >>+   [14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
> >> >> >>+   [15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
> >> >> >>+   [16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
> >> >> >>+   [17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
> >> >> >>+   [18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
> >> >> >>+   [19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
> >> >> >
> >> >> >1) The "correct" way to discover the resources consumed by an ACPI
> >> >> >   device is to use the _CRS method.  I know there are some issues
> >> >> >   there for bridges (not the fault of ThunderX!) because there's not
> >> >> >   a good way to distinguish windows from resources consumed directly
> >> >> >   by the bridge.
> >> >> >
> >> >> >   But we should either do this correctly, or include a comment about
> >> >> >   why we're doing it wrong, so we don't give the impression that this
> >> >> >   is the right way to do it.
> >> >> >
> >> >> >   I seem to recall some discussion about why we're doing it this way,
> >> >> >   but I don't remember the details.  It'd be nice to include a
> >> >> >   summary here.
> >> >>
> >> >> OK I will. The reason why we cannot use _CRS for this case is that
> >> >> CONSUMER flag was not use consistently for the bridge so far.
> >> >
> >> > Yes, I'm aware of that problem, but hard-coding resources into drivers
> >> > is just a disaster.  The PCI and ACPI cores need generic ways to learn
> >> > what resources are consumed by devices.  For PCI devices, that's done
> >> > with BARs.  For ACPI devices, it's done with _CRS.  Without generic
> >> > resource discovery, we can't manage resources reliably at the system
> >> > level [1].
> >> >
> >> > You have a PNP0A03/PNP0A08 device for the PCI host bridge.  Because of
> >> > the BIOS bugs in CONSUMER flag usage, we assume everything in its _CRS
> >> > is a window and not consumed by the bridge itself.  What if you added
> >> > a companion ACPI device with a _CRS that contained the bridge
> >> > resources?  Then you'd have some driver ugliness to find that device,
> >> > but at least the ACPI core could tell what resources were in use.
> >> >
> >> > Maybe Rafael has a better idea?
> >>
> >> In the discussions leading up to this, we tried very hard to make this
> >> arm64/acpi quirks mechanism just as flexible as we need it to be to
> >> cover the current crop of incompatible hardware, but not more so.
> >> Going forward, we intend to require all arm64/acpi hardware to be spec
> >> compliant, and so any parametrization beyond what is required for the
> >> currently known broken hardware is only going to make it easier for
> >> others to ship with tweaked ACPI descriptions so that an existing
> >> quirk is triggered for hardware that it was not intended for. It also
> >> implies that we have to deal with the ACPI descriptions as they were
> >> shipped with the current hardware.
> >>
> >> That does not mean, of course, that we should use bare constants
> >> rather than symbolic ones, but anything beyond that exceeds the
> >> desired scope of quirks handling.
> >
> > Symbolic vs bare constants is the least of my worries.  I'm pretty
> > happy with the current quirk implementation.  It's pretty simple and
> > straightforward.
> >
> 
> OK, good to know that we are on the right track here.
> 
> > Apparently you shipped broken firmware that doesn't accurately
> > describe system resource usage.  Presumably that firmware could be
> > updated, but maybe it's worthwhile to work around it in the kernel,
> > depending on where it got shipped.
> >
> 
> None of these platforms can be fixed entirely in software, and given
> that we will not be adding quirks for new broken hardware, we should
> ask ourselves whether having two versions of a quirk, i.e., one for
> broken hardware + currently shipping firmware, and one for the same
> broken hardware with fixed firmware is really an improvement over what
> has been proposed here.

We're talking about two completely different types of quirks:

  1) MCFG quirks to use memory-mapped config space that doesn't quite
     conform to the ECAM model in the PCIe spec, and

  2) Some yet-to-be-determined method to describe address space
     consumed by a bridge.

The first two patches of this series are a nice implementation for 1).
The third patch (ThunderX-specific) is one possibility for 2), but I
don't like it because there's no way for generic software like the
ACPI core to discover these resources.

> > I'd like to step back and come up with some understanding of how
> > non-broken firmware *should* deal with this issue.  Then, if we *do*
> > work around this particular broken firmware in the kernel, it would be
> > nice to do it in a way that fits in with that understanding.
> >
> > For example, if a companion ACPI device is the preferred solution, an
> > ACPI quirk could fabricate a device with the required resources.  That
> > would address the problem closer to the source and make it more likely
> > that the rest of the system will work correctly: /proc/iomem could
> > make sense, things that look at _CRS generically would work (e.g,
> > /sys/, an admittedly hypothetical "lsacpi", etc.)
> >
> > Hard-coding stuff in drivers is a point solution that doesn't provide
> > any guidance for future platforms and makes it likely that the hack
> > will get copied into even more drivers.
> >
> 
> OK, I see. But the guidance for future platforms should be 'do not
> rely on quirks', and what I am arguing here is that the more we polish
> up this code and make it clean and reusable, the more likely it is
> that will end up getting abused by new broken hardware that we set out
> to reject entirely in the first place.
> 
> So of course, if the quirk involves claiming resources, let's make
> sure that this occurs in the cleanest and most compliant way possible.
> But any factoring/reuse concerns other than for the current crop of
> broken hardware should be avoided imo.

If future hardware is completely ECAM-compliant and we don't need any
more MCFG quirks, that would be great.

But we'll still need to describe that memory-mapped config space
somewhere.  If that's done with PNP0C02 or similar devices (as is done
on my x86 laptop), we'd be all set.

If we need to work around firmware in the field that doesn't do that,
one possibility is a PNP quirk along the lines of
quirk_amd_mmconfig_area().

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lorenzo Pieralisi Sept. 21, 2016, 2:05 p.m. UTC | #8
On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
> On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:

[...]

> > None of these platforms can be fixed entirely in software, and given
> > that we will not be adding quirks for new broken hardware, we should
> > ask ourselves whether having two versions of a quirk, i.e., one for
> > broken hardware + currently shipping firmware, and one for the same
> > broken hardware with fixed firmware is really an improvement over what
> > has been proposed here.
> 
> We're talking about two completely different types of quirks:
> 
>   1) MCFG quirks to use memory-mapped config space that doesn't quite
>      conform to the ECAM model in the PCIe spec, and
> 
>   2) Some yet-to-be-determined method to describe address space
>      consumed by a bridge.
> 
> The first two patches of this series are a nice implementation for 1).
> The third patch (ThunderX-specific) is one possibility for 2), but I
> don't like it because there's no way for generic software like the
> ACPI core to discover these resources.

Ok, so basically this means that to implement (2) we need to assign
some sort of _HID to these quirky PCI bridges (so that we know what
device they represent and we can retrieve their _CRS). I take from
this discussion that the goal is to make sure that all non-config
resources have to be declared through _CRS device objects, which is
fine but that requires a FW update (unless we can fabricate ACPI
devices and corresponding _CRS in the kernel whenever we match a
given MCFG table signature).

We discussed this already and I think we should make a decision:

http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html

> > > I'd like to step back and come up with some understanding of how
> > > non-broken firmware *should* deal with this issue.  Then, if we *do*
> > > work around this particular broken firmware in the kernel, it would be
> > > nice to do it in a way that fits in with that understanding.
> > >
> > > For example, if a companion ACPI device is the preferred solution, an
> > > ACPI quirk could fabricate a device with the required resources.  That
> > > would address the problem closer to the source and make it more likely
> > > that the rest of the system will work correctly: /proc/iomem could
> > > make sense, things that look at _CRS generically would work (e.g,
> > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > >
> > > Hard-coding stuff in drivers is a point solution that doesn't provide
> > > any guidance for future platforms and makes it likely that the hack
> > > will get copied into even more drivers.
> > >
> > 
> > OK, I see. But the guidance for future platforms should be 'do not
> > rely on quirks', and what I am arguing here is that the more we polish
> > up this code and make it clean and reusable, the more likely it is
> > that will end up getting abused by new broken hardware that we set out
> > to reject entirely in the first place.
> > 
> > So of course, if the quirk involves claiming resources, let's make
> > sure that this occurs in the cleanest and most compliant way possible.
> > But any factoring/reuse concerns other than for the current crop of
> > broken hardware should be avoided imo.
> 
> If future hardware is completely ECAM-compliant and we don't need any
> more MCFG quirks, that would be great.

Yes.

> But we'll still need to describe that memory-mapped config space
> somewhere.  If that's done with PNP0C02 or similar devices (as is done
> on my x86 laptop), we'd be all set.

I am not sure I understand what you mean here. Are you referring
to MCFG regions reported as PNP0c02 resources through its _CRS ?

IIUC PNP0C02 is a reservation mechanism, but it does not help us
associate its _CRS to a specific PCI host bridge instance, right ?

> If we need to work around firmware in the field that doesn't do that,
> one possibility is a PNP quirk along the lines of
> quirk_amd_mmconfig_area().

You mean matching PNP0C01/PNP0c02 and create a resource (that has to
hardcoded in a static array in the kernel anyway, there is no way to
retrieve it otherwise) in the corresponding PNP quirk handler ?

And it is not a given we can match against PNP0c01/PNP0c02.

So it looks like the only solution is allocating an _HID for each
host bridge that is not ECAM compliant to add resources to its _CRS
(unless the MCFG quirk does not need any additional data/resource,
eg "use different set of PCI accessorsi 32-bit vs byte-access").

For FW that is immutable I really do not see what we can do apart
from hardcoding the non-config resources (consumed by a bridge),
somehow.

Thanks,
Lorenzo
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gabriele Paoloni Sept. 21, 2016, 2:10 p.m. UTC | #9
Hi Bjorn

[...]


> 
> If future hardware is completely ECAM-compliant and we don't need any
> more MCFG quirks, that would be great.
> 
> But we'll still need to describe that memory-mapped config space
> somewhere.  If that's done with PNP0C02 or similar devices (as is done
> on my x86 laptop), we'd be all set.
> 
> If we need to work around firmware in the field that doesn't do that,
> one possibility is a PNP quirk along the lines of
> quirk_amd_mmconfig_area().

So, if my understanding is correct, for platforms that have not been
shipped yet you propose to use PNP0C02 in the ACPI table in order to
declare a motherboard reserved resource whereas for shipped platforms
you propose to have a quirk along pnp_fixups in order to track the
resource usage even if values are hardcoded...correct?

Before Tomasz came up with this patchset we had a call between the vendors
involved in this PCI quirks saga and other guys from Linaro and ARM.

Lorenzo summarized the outcome as in the following link
http://lkml.iu.edu/hypermail/linux/kernel/1606.2/03344.html

Since this quirks mechanism has been discussed for quite a long time now
IMHO it would be good to have a last call including also you (Bjorn) so
that we can all agree on what to do and we avoid changing our drivers again
and again...

What do you think?

Thanks

Gab

> 
> Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Sept. 21, 2016, 6:04 p.m. UTC | #10
On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
> On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
> > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
> 
> [...]
> 
> > > None of these platforms can be fixed entirely in software, and given
> > > that we will not be adding quirks for new broken hardware, we should
> > > ask ourselves whether having two versions of a quirk, i.e., one for
> > > broken hardware + currently shipping firmware, and one for the same
> > > broken hardware with fixed firmware is really an improvement over what
> > > has been proposed here.
> > 
> > We're talking about two completely different types of quirks:
> > 
> >   1) MCFG quirks to use memory-mapped config space that doesn't quite
> >      conform to the ECAM model in the PCIe spec, and
> > 
> >   2) Some yet-to-be-determined method to describe address space
> >      consumed by a bridge.
> > 
> > The first two patches of this series are a nice implementation for 1).
> > The third patch (ThunderX-specific) is one possibility for 2), but I
> > don't like it because there's no way for generic software like the
> > ACPI core to discover these resources.
> 
> Ok, so basically this means that to implement (2) we need to assign
> some sort of _HID to these quirky PCI bridges (so that we know what
> device they represent and we can retrieve their _CRS). I take from
> this discussion that the goal is to make sure that all non-config
> resources have to be declared through _CRS device objects, which is
> fine but that requires a FW update (unless we can fabricate ACPI
> devices and corresponding _CRS in the kernel whenever we match a
> given MCFG table signature).

All resources consumed by ACPI devices should be declared through
_CRS.  If you want to fabricate ACPI devices or _CRS via kernel
quirks, that's fine with me.  This could be triggered via MCFG
signature, DMI info, host bridge _HID, etc.

> We discussed this already and I think we should make a decision:
> 
> http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
> 
> > > > I'd like to step back and come up with some understanding of how
> > > > non-broken firmware *should* deal with this issue.  Then, if we *do*
> > > > work around this particular broken firmware in the kernel, it would be
> > > > nice to do it in a way that fits in with that understanding.
> > > >
> > > > For example, if a companion ACPI device is the preferred solution, an
> > > > ACPI quirk could fabricate a device with the required resources.  That
> > > > would address the problem closer to the source and make it more likely
> > > > that the rest of the system will work correctly: /proc/iomem could
> > > > make sense, things that look at _CRS generically would work (e.g,
> > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > > >
> > > > Hard-coding stuff in drivers is a point solution that doesn't provide
> > > > any guidance for future platforms and makes it likely that the hack
> > > > will get copied into even more drivers.
> > > >
> > > 
> > > OK, I see. But the guidance for future platforms should be 'do not
> > > rely on quirks', and what I am arguing here is that the more we polish
> > > up this code and make it clean and reusable, the more likely it is
> > > that will end up getting abused by new broken hardware that we set out
> > > to reject entirely in the first place.
> > > 
> > > So of course, if the quirk involves claiming resources, let's make
> > > sure that this occurs in the cleanest and most compliant way possible.
> > > But any factoring/reuse concerns other than for the current crop of
> > > broken hardware should be avoided imo.
> > 
> > If future hardware is completely ECAM-compliant and we don't need any
> > more MCFG quirks, that would be great.
> 
> Yes.
> 
> > But we'll still need to describe that memory-mapped config space
> > somewhere.  If that's done with PNP0C02 or similar devices (as is done
> > on my x86 laptop), we'd be all set.
> 
> I am not sure I understand what you mean here. Are you referring
> to MCFG regions reported as PNP0c02 resources through its _CRS ?

Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
device.

> IIUC PNP0C02 is a reservation mechanism, but it does not help us
> associate its _CRS to a specific PCI host bridge instance, right ?

Gab proposed a hierarchy that *would* associate a PNP0C02 device with
a PCI bridge:

  Device (PCI1) {
    Name (_HID, "HISI0080") // PCI Express Root Bridge
    Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
    Method (_CRS, 0, Serialized) { // Root complex resources (windows) }
    Device (RES0) {
      Name (_HID, "HISI0081") // HiSi PCIe RC config base address
      Name (_CID, "PNP0C02")  // Motherboard reserved resource
      Name (_CRS, ResourceTemplate () { ... }
    }
  }

That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
the root (under \_SB), but I don't know why.

Putting it at the root means we couldn't generically associate it with
a bridge, although I could imagine something like this:

  Device (RES1) {
    Name (_HID, "HISI0081") // HiSi PCIe RC config base address
    Name (_CID, "PNP0C02")  // Motherboard reserved resource
    Name (_CRS, ResourceTemplate () { ... }
    Method (BRDG) { "PCI1" }  // hand-wavy ASL
  }
  Device (PCI1) {
    Name (_HID, "HISI0080") // PCI Express Root Bridge
    Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
    Method (_CRS, 0, Serialized) { // Root complex resources (windows) }
  }

Where you could search PNP0C02 devices for a cookie that matched the
host bridge.

> > If we need to work around firmware in the field that doesn't do that,
> > one possibility is a PNP quirk along the lines of
> > quirk_amd_mmconfig_area().
> 
> You mean matching PNP0C01/PNP0c02 and create a resource (that has to
> hardcoded in a static array in the kernel anyway, there is no way to
> retrieve it otherwise) in the corresponding PNP quirk handler ?

Right.  On some hardware we can read the resource out of a
device-specific register, as we do in quirk_intel_mch().  But if
that's not possible, it would have to be hard-coded.

> And it is not a given we can match against PNP0c01/PNP0c02.
> 
> So it looks like the only solution is allocating an _HID for each
> host bridge that is not ECAM compliant to add resources to its _CRS
> (unless the MCFG quirk does not need any additional data/resource,
> eg "use different set of PCI accessorsi 32-bit vs byte-access").

It doesn't matter whether it's ECAM-compliant or not.  Any
memory-mapped config space should be reported via some device's _CRS.

The existing x86 practice is to use PNP0C02 devices for this purpose,
and I think we should just follow that practice.

> For FW that is immutable I really do not see what we can do apart
> from hardcoding the non-config resources (consumed by a bridge),
> somehow.

Right.  Well, I assume you mean we should hard-code "non-window
resources consumed directly by a bridge".  If firmware in the field is
broken, we should work around it, and that may mean hard-coding some
resources.

My point is that the hard-coding should not be buried in a driver
where it's invisible to the rest of the kernel.  If we hard-code it in
a quirk that adds _CRS entries, then the kernel will work just like it
would if the firmware had been correct in the first place.  The
resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem,
and if we ever used _SRS to assign or move ACPI devices, we would know
to avoid the bridge resource.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duc Dang Sept. 21, 2016, 6:58 p.m. UTC | #11
On Wed, Sep 21, 2016 at 11:04 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
>> On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
>> > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
>>
>> [...]
>>
>> > > None of these platforms can be fixed entirely in software, and given
>> > > that we will not be adding quirks for new broken hardware, we should
>> > > ask ourselves whether having two versions of a quirk, i.e., one for
>> > > broken hardware + currently shipping firmware, and one for the same
>> > > broken hardware with fixed firmware is really an improvement over what
>> > > has been proposed here.
>> >
>> > We're talking about two completely different types of quirks:
>> >
>> >   1) MCFG quirks to use memory-mapped config space that doesn't quite
>> >      conform to the ECAM model in the PCIe spec, and
>> >
>> >   2) Some yet-to-be-determined method to describe address space
>> >      consumed by a bridge.
>> >
>> > The first two patches of this series are a nice implementation for 1).
>> > The third patch (ThunderX-specific) is one possibility for 2), but I
>> > don't like it because there's no way for generic software like the
>> > ACPI core to discover these resources.
>>
>> Ok, so basically this means that to implement (2) we need to assign
>> some sort of _HID to these quirky PCI bridges (so that we know what
>> device they represent and we can retrieve their _CRS). I take from
>> this discussion that the goal is to make sure that all non-config
>> resources have to be declared through _CRS device objects, which is
>> fine but that requires a FW update (unless we can fabricate ACPI
>> devices and corresponding _CRS in the kernel whenever we match a
>> given MCFG table signature).
>
> All resources consumed by ACPI devices should be declared through
> _CRS.  If you want to fabricate ACPI devices or _CRS via kernel
> quirks, that's fine with me.  This could be triggered via MCFG
> signature, DMI info, host bridge _HID, etc.
>
>> We discussed this already and I think we should make a decision:
>>
>> http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
>>
>> > > > I'd like to step back and come up with some understanding of how
>> > > > non-broken firmware *should* deal with this issue.  Then, if we *do*
>> > > > work around this particular broken firmware in the kernel, it would be
>> > > > nice to do it in a way that fits in with that understanding.
>> > > >
>> > > > For example, if a companion ACPI device is the preferred solution, an
>> > > > ACPI quirk could fabricate a device with the required resources.  That
>> > > > would address the problem closer to the source and make it more likely
>> > > > that the rest of the system will work correctly: /proc/iomem could
>> > > > make sense, things that look at _CRS generically would work (e.g,
>> > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
>> > > >
>> > > > Hard-coding stuff in drivers is a point solution that doesn't provide
>> > > > any guidance for future platforms and makes it likely that the hack
>> > > > will get copied into even more drivers.
>> > > >
>> > >
>> > > OK, I see. But the guidance for future platforms should be 'do not
>> > > rely on quirks', and what I am arguing here is that the more we polish
>> > > up this code and make it clean and reusable, the more likely it is
>> > > that will end up getting abused by new broken hardware that we set out
>> > > to reject entirely in the first place.
>> > >
>> > > So of course, if the quirk involves claiming resources, let's make
>> > > sure that this occurs in the cleanest and most compliant way possible.
>> > > But any factoring/reuse concerns other than for the current crop of
>> > > broken hardware should be avoided imo.
>> >
>> > If future hardware is completely ECAM-compliant and we don't need any
>> > more MCFG quirks, that would be great.
>>
>> Yes.
>>
>> > But we'll still need to describe that memory-mapped config space
>> > somewhere.  If that's done with PNP0C02 or similar devices (as is done
>> > on my x86 laptop), we'd be all set.
>>
>> I am not sure I understand what you mean here. Are you referring
>> to MCFG regions reported as PNP0c02 resources through its _CRS ?
>
> Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
> reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
> device.
>
>> IIUC PNP0C02 is a reservation mechanism, but it does not help us
>> associate its _CRS to a specific PCI host bridge instance, right ?
>
> Gab proposed a hierarchy that *would* associate a PNP0C02 device with
> a PCI bridge:
>
>   Device (PCI1) {
>     Name (_HID, "HISI0080") // PCI Express Root Bridge
>     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
>     Method (_CRS, 0, Serialized) { // Root complex resources (windows) }
>     Device (RES0) {
>       Name (_HID, "HISI0081") // HiSi PCIe RC config base address
>       Name (_CID, "PNP0C02")  // Motherboard reserved resource
>       Name (_CRS, ResourceTemplate () { ... }
>     }
>   }
>
> That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
> the root (under \_SB), but I don't know why.
>
> Putting it at the root means we couldn't generically associate it with
> a bridge, although I could imagine something like this:
>
>   Device (RES1) {
>     Name (_HID, "HISI0081") // HiSi PCIe RC config base address
>     Name (_CID, "PNP0C02")  // Motherboard reserved resource
>     Name (_CRS, ResourceTemplate () { ... }
>     Method (BRDG) { "PCI1" }  // hand-wavy ASL
>   }
>   Device (PCI1) {
>     Name (_HID, "HISI0080") // PCI Express Root Bridge
>     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
>     Method (_CRS, 0, Serialized) { // Root complex resources (windows) }
>   }
>
> Where you could search PNP0C02 devices for a cookie that matched the
> host bridge.
>
>> > If we need to work around firmware in the field that doesn't do that,
>> > one possibility is a PNP quirk along the lines of
>> > quirk_amd_mmconfig_area().
>>
>> You mean matching PNP0C01/PNP0c02 and create a resource (that has to
>> hardcoded in a static array in the kernel anyway, there is no way to
>> retrieve it otherwise) in the corresponding PNP quirk handler ?
>
> Right.  On some hardware we can read the resource out of a
> device-specific register, as we do in quirk_intel_mch().  But if
> that's not possible, it would have to be hard-coded.
>
>> And it is not a given we can match against PNP0c01/PNP0c02.
>>
>> So it looks like the only solution is allocating an _HID for each
>> host bridge that is not ECAM compliant to add resources to its _CRS
>> (unless the MCFG quirk does not need any additional data/resource,
>> eg "use different set of PCI accessorsi 32-bit vs byte-access").
>
> It doesn't matter whether it's ECAM-compliant or not.  Any
> memory-mapped config space should be reported via some device's _CRS.
>
> The existing x86 practice is to use PNP0C02 devices for this purpose,
> and I think we should just follow that practice.
>
>> For FW that is immutable I really do not see what we can do apart
>> from hardcoding the non-config resources (consumed by a bridge),
>> somehow.
>
> Right.  Well, I assume you mean we should hard-code "non-window
> resources consumed directly by a bridge".  If firmware in the field is
> broken, we should work around it, and that may mean hard-coding some
> resources.
>
> My point is that the hard-coding should not be buried in a driver
> where it's invisible to the rest of the kernel.  If we hard-code it in
> a quirk that adds _CRS entries, then the kernel will work just like it
> would if the firmware had been correct in the first place.  The
> resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem,
> and if we ever used _SRS to assign or move ACPI devices, we would know
> to avoid the bridge resource.

Hi Bjorn,

Are you suggesting to add code similar to functions in
linux/drivers/pnp/quirks.c to declare/attach the additional resource
that the host need to have when the resource is not in MCFG table?

>
> Bjorn
Regards,
Duc Dang.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Sept. 21, 2016, 6:59 p.m. UTC | #12
On Wed, Sep 21, 2016 at 02:10:55PM +0000, Gabriele Paoloni wrote:
> Hi Bjorn
> 
> [...]
> 
> 
> > 
> > If future hardware is completely ECAM-compliant and we don't need any
> > more MCFG quirks, that would be great.
> > 
> > But we'll still need to describe that memory-mapped config space
> > somewhere.  If that's done with PNP0C02 or similar devices (as is done
> > on my x86 laptop), we'd be all set.
> > 
> > If we need to work around firmware in the field that doesn't do that,
> > one possibility is a PNP quirk along the lines of
> > quirk_amd_mmconfig_area().
> 
> So, if my understanding is correct, for platforms that have not been
> shipped yet you propose to use PNP0C02 in the ACPI table in order to
> declare a motherboard reserved resource whereas for shipped platforms
> you propose to have a quirk along pnp_fixups in order to track the
> resource usage even if values are hardcoded...correct?

Yes.  I'm open to alternate proposals, but x86 uses PNP0C02, and
following existing practice seems reasonable.

> Before Tomasz came up with this patchset we had a call between the vendors
> involved in this PCI quirks saga and other guys from Linaro and ARM.
> 
> Lorenzo summarized the outcome as in the following link
> http://lkml.iu.edu/hypermail/linux/kernel/1606.2/03344.html
> 
> Since this quirks mechanism has been discussed for quite a long time now
> IMHO it would be good to have a last call including also you (Bjorn) so
> that we can all agree on what to do and we avoid changing our drivers again
> and again...

I think we're converging pretty fast.  As far as I'm concerned, the
v6 ECAM quirks implementation is perfect.  The only remaining issue is
reporting the ECAM resources, and I haven't seen objections to using
PNP0C02 + PNP quirks for broken firmware.

There is the question of how or whether to associate a PNP0A03 PCI
bridge with resources from a different PNP0C02 device, but that's not
super important.  If the hard-coded resources appear both in a quirk
and in the PCI bridge driver, it's ugly but not the end of the world.
We've still achieved the objective of avoiding landmines in the
address space.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Sept. 21, 2016, 7:18 p.m. UTC | #13
On Wed, Sep 21, 2016 at 11:58:22AM -0700, Duc Dang wrote:
> On Wed, Sep 21, 2016 at 11:04 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:

> > The existing x86 practice is to use PNP0C02 devices for this purpose,
> > and I think we should just follow that practice.
> >
> > ...
> >
> > My point is that the hard-coding should not be buried in a driver
> > where it's invisible to the rest of the kernel.  If we hard-code it in
> > a quirk that adds _CRS entries, then the kernel will work just like it
> > would if the firmware had been correct in the first place.  The
> > resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem,
> > and if we ever used _SRS to assign or move ACPI devices, we would know
> > to avoid the bridge resource.
> 
> Are you suggesting to add code similar to functions in
> linux/drivers/pnp/quirks.c to declare/attach the additional resource
> that the host need to have when the resource is not in MCFG table?

Yes, but what I'm suggesting is actually a little stronger.  This has
nothing to do with whether a resource is in the MCFG table or not.

I'm suggesting ACPI firmware should always describe the resource.  If the
firmware is defective and doesn't describe it, we should add a quirk in
pnp/quirks.c to add a resource for it.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lorenzo Pieralisi Sept. 22, 2016, 9:49 a.m. UTC | #14
On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
> On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
> > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
> > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
> > 
> > [...]
> > 
> > > > None of these platforms can be fixed entirely in software, and given
> > > > that we will not be adding quirks for new broken hardware, we should
> > > > ask ourselves whether having two versions of a quirk, i.e., one for
> > > > broken hardware + currently shipping firmware, and one for the same
> > > > broken hardware with fixed firmware is really an improvement over what
> > > > has been proposed here.
> > > 
> > > We're talking about two completely different types of quirks:
> > > 
> > >   1) MCFG quirks to use memory-mapped config space that doesn't quite
> > >      conform to the ECAM model in the PCIe spec, and
> > > 
> > >   2) Some yet-to-be-determined method to describe address space
> > >      consumed by a bridge.
> > > 
> > > The first two patches of this series are a nice implementation for 1).
> > > The third patch (ThunderX-specific) is one possibility for 2), but I
> > > don't like it because there's no way for generic software like the
> > > ACPI core to discover these resources.
> > 
> > Ok, so basically this means that to implement (2) we need to assign
> > some sort of _HID to these quirky PCI bridges (so that we know what
> > device they represent and we can retrieve their _CRS). I take from
> > this discussion that the goal is to make sure that all non-config
> > resources have to be declared through _CRS device objects, which is
> > fine but that requires a FW update (unless we can fabricate ACPI
> > devices and corresponding _CRS in the kernel whenever we match a
> > given MCFG table signature).
> 
> All resources consumed by ACPI devices should be declared through
> _CRS.  If you want to fabricate ACPI devices or _CRS via kernel
> quirks, that's fine with me.  This could be triggered via MCFG
> signature, DMI info, host bridge _HID, etc.

I think the PNP quirk approach + PNP0c02 resource put forward by Gab
is enough.

> > We discussed this already and I think we should make a decision:
> > 
> > http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
> > 
> > > > > I'd like to step back and come up with some understanding of how
> > > > > non-broken firmware *should* deal with this issue.  Then, if we *do*
> > > > > work around this particular broken firmware in the kernel, it would be
> > > > > nice to do it in a way that fits in with that understanding.
> > > > >
> > > > > For example, if a companion ACPI device is the preferred solution, an
> > > > > ACPI quirk could fabricate a device with the required resources.  That
> > > > > would address the problem closer to the source and make it more likely
> > > > > that the rest of the system will work correctly: /proc/iomem could
> > > > > make sense, things that look at _CRS generically would work (e.g,
> > > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > > > >
> > > > > Hard-coding stuff in drivers is a point solution that doesn't provide
> > > > > any guidance for future platforms and makes it likely that the hack
> > > > > will get copied into even more drivers.
> > > > >
> > > > 
> > > > OK, I see. But the guidance for future platforms should be 'do not
> > > > rely on quirks', and what I am arguing here is that the more we polish
> > > > up this code and make it clean and reusable, the more likely it is
> > > > that will end up getting abused by new broken hardware that we set out
> > > > to reject entirely in the first place.
> > > > 
> > > > So of course, if the quirk involves claiming resources, let's make
> > > > sure that this occurs in the cleanest and most compliant way possible.
> > > > But any factoring/reuse concerns other than for the current crop of
> > > > broken hardware should be avoided imo.
> > > 
> > > If future hardware is completely ECAM-compliant and we don't need any
> > > more MCFG quirks, that would be great.
> > 
> > Yes.
> > 
> > > But we'll still need to describe that memory-mapped config space
> > > somewhere.  If that's done with PNP0C02 or similar devices (as is done
> > > on my x86 laptop), we'd be all set.
> > 
> > I am not sure I understand what you mean here. Are you referring
> > to MCFG regions reported as PNP0c02 resources through its _CRS ?
> 
> Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
> reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
> device.

Ok, that's agreed. It goes without saying that since you are quoting
the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device
_CRS I will consider that a FW bug.

> > IIUC PNP0C02 is a reservation mechanism, but it does not help us
> > associate its _CRS to a specific PCI host bridge instance, right ?
> 
> Gab proposed a hierarchy that *would* associate a PNP0C02 device with
> a PCI bridge:
> 
>   Device (PCI1) {
>     Name (_HID, "HISI0080") // PCI Express Root Bridge
>     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
>     Method (_CRS, 0, Serialized) { // Root complex resources (windows) }
>     Device (RES0) {
>       Name (_HID, "HISI0081") // HiSi PCIe RC config base address
>       Name (_CID, "PNP0C02")  // Motherboard reserved resource
>       Name (_CRS, ResourceTemplate () { ... }
>     }
>   }
> 
> That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
> the root (under \_SB), but I don't know why.
> 
> Putting it at the root means we couldn't generically associate it with
> a bridge, although I could imagine something like this:
> 
>   Device (RES1) {
>     Name (_HID, "HISI0081") // HiSi PCIe RC config base address
>     Name (_CID, "PNP0C02")  // Motherboard reserved resource
>     Name (_CRS, ResourceTemplate () { ... }
>     Method (BRDG) { "PCI1" }  // hand-wavy ASL
>   }
>   Device (PCI1) {
>     Name (_HID, "HISI0080") // PCI Express Root Bridge
>     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
>     Method (_CRS, 0, Serialized) { // Root complex resources (windows) }
>   }
> 
> Where you could search PNP0C02 devices for a cookie that matched the
> host bridge.o

Ok, I am fine with both and I think we are converging, but the way
to solve this problem has to be uniform for all ARM partners (and
not only ARM). Two points here:

1) Adding a device/subdevice allows people to add a _CRS reporting the
   non-window bridge resources. Fine. It also allows people to chuck in
   there all sorts of _DSD properties to describe their PCI host bridge
   as it is done with DT properties (those _DSD can contain eg clocks
   etc.), this may be tempting (so that they can reuse the same DT
   driver and do not have to update their firmware) but I want to be
   clear here: that must not happen. So, a subdevice with a _CRS to
   report resources, yes, but it will stop there.
2) It is unclear to me how to formalize the above. People should not
   write FW by reading the PCI mailing list, so these guidelines have to
   be written, somehow. I do not want to standardize quirks, I want
   to prevent random ACPI table content, which is different.
   Should I report this to the ACPI spec working group ? If we do
   not do that everyone will go solve this problem as they deem fit.

[...]

> > For FW that is immutable I really do not see what we can do apart
> > from hardcoding the non-config resources (consumed by a bridge),
> > somehow.
> 
> Right.  Well, I assume you mean we should hard-code "non-window
> resources consumed directly by a bridge".  If firmware in the field is
> broken, we should work around it, and that may mean hard-coding some
> resources.
> 
> My point is that the hard-coding should not be buried in a driver
> where it's invisible to the rest of the kernel.  If we hard-code it in
> a quirk that adds _CRS entries, then the kernel will work just like it
> would if the firmware had been correct in the first place.  The
> resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem,
> and if we ever used _SRS to assign or move ACPI devices, we would know
> to avoid the bridge resource.

We are in complete agreement here.

Thanks,
Lorenzo
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gabriele Paoloni Sept. 22, 2016, 11:10 a.m. UTC | #15
Hi Lorenzo, Bjorn

> -----Original Message-----
> From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com]
> Sent: 22 September 2016 10:50
> To: Bjorn Helgaas
> Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin
> Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya;
> Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin
> Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux-
> pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI
> Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong
> (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux-
> kernel@vger.kernel.org; Rafael J. Wysocki
> Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM-
> specific register range for ACPI case
> 
> On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
> > On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
> > > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
> > > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
> > >
> > > [...]
> > >
> > > > > None of these platforms can be fixed entirely in software, and
> given
> > > > > that we will not be adding quirks for new broken hardware, we
> should
> > > > > ask ourselves whether having two versions of a quirk, i.e., one
> for
> > > > > broken hardware + currently shipping firmware, and one for the
> same
> > > > > broken hardware with fixed firmware is really an improvement
> over what
> > > > > has been proposed here.
> > > >
> > > > We're talking about two completely different types of quirks:
> > > >
> > > >   1) MCFG quirks to use memory-mapped config space that doesn't
> quite
> > > >      conform to the ECAM model in the PCIe spec, and
> > > >
> > > >   2) Some yet-to-be-determined method to describe address space
> > > >      consumed by a bridge.
> > > >
> > > > The first two patches of this series are a nice implementation
> for 1).
> > > > The third patch (ThunderX-specific) is one possibility for 2),
> but I
> > > > don't like it because there's no way for generic software like
> the
> > > > ACPI core to discover these resources.
> > >
> > > Ok, so basically this means that to implement (2) we need to assign
> > > some sort of _HID to these quirky PCI bridges (so that we know what
> > > device they represent and we can retrieve their _CRS). I take from
> > > this discussion that the goal is to make sure that all non-config
> > > resources have to be declared through _CRS device objects, which is
> > > fine but that requires a FW update (unless we can fabricate ACPI
> > > devices and corresponding _CRS in the kernel whenever we match a
> > > given MCFG table signature).
> >
> > All resources consumed by ACPI devices should be declared through
> > _CRS.  If you want to fabricate ACPI devices or _CRS via kernel
> > quirks, that's fine with me.  This could be triggered via MCFG
> > signature, DMI info, host bridge _HID, etc.
> 
> I think the PNP quirk approach + PNP0c02 resource put forward by Gab
> is enough.

Great thanks as we take a final decision I will ask Dogndgong to submit
another RFC based on this approach

> 
> > > We discussed this already and I think we should make a decision:
> > >
> > > http://lists.infradead.org/pipermail/linux-arm-kernel/2016-
> March/414722.html
> > >
> > > > > > I'd like to step back and come up with some understanding of
> how
> > > > > > non-broken firmware *should* deal with this issue.  Then, if
> we *do*
> > > > > > work around this particular broken firmware in the kernel, it
> would be
> > > > > > nice to do it in a way that fits in with that understanding.
> > > > > >
> > > > > > For example, if a companion ACPI device is the preferred
> solution, an
> > > > > > ACPI quirk could fabricate a device with the required
> resources.  That
> > > > > > would address the problem closer to the source and make it
> more likely
> > > > > > that the rest of the system will work correctly: /proc/iomem
> could
> > > > > > make sense, things that look at _CRS generically would work
> (e.g,
> > > > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > > > > >
> > > > > > Hard-coding stuff in drivers is a point solution that doesn't
> provide
> > > > > > any guidance for future platforms and makes it likely that
> the hack
> > > > > > will get copied into even more drivers.
> > > > > >
> > > > >
> > > > > OK, I see. But the guidance for future platforms should be 'do
> not
> > > > > rely on quirks', and what I am arguing here is that the more we
> polish
> > > > > up this code and make it clean and reusable, the more likely it
> is
> > > > > that will end up getting abused by new broken hardware that we
> set out
> > > > > to reject entirely in the first place.
> > > > >
> > > > > So of course, if the quirk involves claiming resources, let's
> make
> > > > > sure that this occurs in the cleanest and most compliant way
> possible.
> > > > > But any factoring/reuse concerns other than for the current
> crop of
> > > > > broken hardware should be avoided imo.
> > > >
> > > > If future hardware is completely ECAM-compliant and we don't need
> any
> > > > more MCFG quirks, that would be great.
> > >
> > > Yes.
> > >
> > > > But we'll still need to describe that memory-mapped config space
> > > > somewhere.  If that's done with PNP0C02 or similar devices (as is
> done
> > > > on my x86 laptop), we'd be all set.
> > >
> > > I am not sure I understand what you mean here. Are you referring
> > > to MCFG regions reported as PNP0c02 resources through its _CRS ?
> >
> > Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
> > reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
> > device.
> 
> Ok, that's agreed. It goes without saying that since you are quoting
> the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device
> _CRS I will consider that a FW bug.
> 
> > > IIUC PNP0C02 is a reservation mechanism, but it does not help us
> > > associate its _CRS to a specific PCI host bridge instance, right ?
> >
> > Gab proposed a hierarchy that *would* associate a PNP0C02 device with
> > a PCI bridge:
> >
> >   Device (PCI1) {
> >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> >     Method (_CRS, 0, Serialized) { // Root complex resources
> (windows) }
> >     Device (RES0) {
> >       Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> >       Name (_CID, "PNP0C02")  // Motherboard reserved resource
> >       Name (_CRS, ResourceTemplate () { ... }
> >     }
> >   }
> >
> > That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
> > the root (under \_SB), but I don't know why.
> >
> > Putting it at the root means we couldn't generically associate it
> with
> > a bridge, although I could imagine something like this:
> >
> >   Device (RES1) {
> >     Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> >     Name (_CID, "PNP0C02")  // Motherboard reserved resource
> >     Name (_CRS, ResourceTemplate () { ... }
> >     Method (BRDG) { "PCI1" }  // hand-wavy ASL
> >   }
> >   Device (PCI1) {
> >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> >     Method (_CRS, 0, Serialized) { // Root complex resources
> (windows) }
> >   }
> >
> > Where you could search PNP0C02 devices for a cookie that matched the
> > host bridge.o
> 
> Ok, I am fine with both and I think we are converging, but the way
> to solve this problem has to be uniform for all ARM partners (and
> not only ARM). Two points here:
> 
> 1) Adding a device/subdevice allows people to add a _CRS reporting the
>    non-window bridge resources. Fine. It also allows people to chuck in
>    there all sorts of _DSD properties to describe their PCI host bridge
>    as it is done with DT properties (those _DSD can contain eg clocks
>    etc.), this may be tempting (so that they can reuse the same DT
>    driver and do not have to update their firmware) but I want to be
>    clear here: that must not happen. So, a subdevice with a _CRS to
>    report resources, yes, but it will stop there.
> 2) It is unclear to me how to formalize the above. People should not
>    write FW by reading the PCI mailing list, so these guidelines have
> to
>    be written, somehow. I do not want to standardize quirks, I want
>    to prevent random ACPI table content, which is different.
>    Should I report this to the ACPI spec working group ? If we do
>    not do that everyone will go solve this problem as they deem fit.
> 

Do we really need to formalize this?

As we discussed in the Linaro call at the moment we have few vendors
that need quirks and we want to avoid promoting/accepting quirks for
the future.

At the time of the call I think we decided to informally accept a set
of quirks for the current platforms and reject any other quirk coming
after a certain date/kernel version (this to be decided).

I am not sure if there is a way to document/formalize a temporary
exception from the rule...

Thanks

Gab 


> [...]
> 
> > > For FW that is immutable I really do not see what we can do apart
> > > from hardcoding the non-config resources (consumed by a bridge),
> > > somehow.
> >
> > Right.  Well, I assume you mean we should hard-code "non-window
> > resources consumed directly by a bridge".  If firmware in the field
> is
> > broken, we should work around it, and that may mean hard-coding some
> > resources.
> >
> > My point is that the hard-coding should not be buried in a driver
> > where it's invisible to the rest of the kernel.  If we hard-code it
> in
> > a quirk that adds _CRS entries, then the kernel will work just like
> it
> > would if the firmware had been correct in the first place.  The
> > resource will appear in /sys/devices/pnp*/*/resources and
> /proc/iomem,
> > and if we ever used _SRS to assign or move ACPI devices, we would
> know
> > to avoid the bridge resource.
> 
> We are in complete agreement here.
> 
> Thanks,
> Lorenzo
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gabriele Paoloni Sept. 22, 2016, 11:12 a.m. UTC | #16
Hi Bjorn

> -----Original Message-----
> From: Bjorn Helgaas [mailto:helgaas@kernel.org]
> Sent: 21 September 2016 19:59
> To: Gabriele Paoloni
> Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin
> Marinas; Rafael Wysocki; Lorenzo Pieralisi; Arnd Bergmann; Hanjun Guo;
> Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert
> Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux-
> pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI
> Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong
> (C); Jeff Hugo; linux-acpi@vger.kernel.org; linux-
> kernel@vger.kernel.org; Rafael J. Wysocki
> Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM-
> specific register range for ACPI case
> 
> On Wed, Sep 21, 2016 at 02:10:55PM +0000, Gabriele Paoloni wrote:
> > Hi Bjorn
> >
> > [...]
> >
> >
> > >
> > > If future hardware is completely ECAM-compliant and we don't need
> any
> > > more MCFG quirks, that would be great.
> > >
> > > But we'll still need to describe that memory-mapped config space
> > > somewhere.  If that's done with PNP0C02 or similar devices (as is
> done
> > > on my x86 laptop), we'd be all set.
> > >
> > > If we need to work around firmware in the field that doesn't do
> that,
> > > one possibility is a PNP quirk along the lines of
> > > quirk_amd_mmconfig_area().
> >
> > So, if my understanding is correct, for platforms that have not been
> > shipped yet you propose to use PNP0C02 in the ACPI table in order to
> > declare a motherboard reserved resource whereas for shipped platforms
> > you propose to have a quirk along pnp_fixups in order to track the
> > resource usage even if values are hardcoded...correct?
> 
> Yes.  I'm open to alternate proposals, but x86 uses PNP0C02, and
> following existing practice seems reasonable.
> 
> > Before Tomasz came up with this patchset we had a call between the
> vendors
> > involved in this PCI quirks saga and other guys from Linaro and ARM.
> >
> > Lorenzo summarized the outcome as in the following link
> > http://lkml.iu.edu/hypermail/linux/kernel/1606.2/03344.html
> >
> > Since this quirks mechanism has been discussed for quite a long time
> now
> > IMHO it would be good to have a last call including also you (Bjorn)
> so
> > that we can all agree on what to do and we avoid changing our drivers
> again
> > and again...
> 
> I think we're converging pretty fast.  As far as I'm concerned, the
> v6 ECAM quirks implementation is perfect.  The only remaining issue is
> reporting the ECAM resources, and I haven't seen objections to using
> PNP0C02 + PNP quirks for broken firmware.
> 
> There is the question of how or whether to associate a PNP0A03 PCI
> bridge with resources from a different PNP0C02 device, but that's not
> super important.  If the hard-coded resources appear both in a quirk
> and in the PCI bridge driver, it's ugly but not the end of the world.
> We've still achieved the objective of avoiding landmines in the
> address space.

Ok got it many thanks

Gab

> 
> Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lorenzo Pieralisi Sept. 22, 2016, 12:44 p.m. UTC | #17
On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
> Hi Lorenzo, Bjorn
> 
> > -----Original Message-----
> > From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com]
> > Sent: 22 September 2016 10:50
> > To: Bjorn Helgaas
> > Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin
> > Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya;
> > Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin
> > Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux-
> > pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI
> > Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong
> > (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux-
> > kernel@vger.kernel.org; Rafael J. Wysocki
> > Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM-
> > specific register range for ACPI case
> > 
> > On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
> > > On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
> > > > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
> > > > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
> > > >
> > > > [...]
> > > >
> > > > > > None of these platforms can be fixed entirely in software, and
> > given
> > > > > > that we will not be adding quirks for new broken hardware, we
> > should
> > > > > > ask ourselves whether having two versions of a quirk, i.e., one
> > for
> > > > > > broken hardware + currently shipping firmware, and one for the
> > same
> > > > > > broken hardware with fixed firmware is really an improvement
> > over what
> > > > > > has been proposed here.
> > > > >
> > > > > We're talking about two completely different types of quirks:
> > > > >
> > > > >   1) MCFG quirks to use memory-mapped config space that doesn't
> > quite
> > > > >      conform to the ECAM model in the PCIe spec, and
> > > > >
> > > > >   2) Some yet-to-be-determined method to describe address space
> > > > >      consumed by a bridge.
> > > > >
> > > > > The first two patches of this series are a nice implementation
> > for 1).
> > > > > The third patch (ThunderX-specific) is one possibility for 2),
> > but I
> > > > > don't like it because there's no way for generic software like
> > the
> > > > > ACPI core to discover these resources.
> > > >
> > > > Ok, so basically this means that to implement (2) we need to assign
> > > > some sort of _HID to these quirky PCI bridges (so that we know what
> > > > device they represent and we can retrieve their _CRS). I take from
> > > > this discussion that the goal is to make sure that all non-config
> > > > resources have to be declared through _CRS device objects, which is
> > > > fine but that requires a FW update (unless we can fabricate ACPI
> > > > devices and corresponding _CRS in the kernel whenever we match a
> > > > given MCFG table signature).
> > >
> > > All resources consumed by ACPI devices should be declared through
> > > _CRS.  If you want to fabricate ACPI devices or _CRS via kernel
> > > quirks, that's fine with me.  This could be triggered via MCFG
> > > signature, DMI info, host bridge _HID, etc.
> > 
> > I think the PNP quirk approach + PNP0c02 resource put forward by Gab
> > is enough.
> 
> Great thanks as we take a final decision I will ask Dogndgong to submit
> another RFC based on this approach
> 
> > 
> > > > We discussed this already and I think we should make a decision:
> > > >
> > > > http://lists.infradead.org/pipermail/linux-arm-kernel/2016-
> > March/414722.html
> > > >
> > > > > > > I'd like to step back and come up with some understanding of
> > how
> > > > > > > non-broken firmware *should* deal with this issue.  Then, if
> > we *do*
> > > > > > > work around this particular broken firmware in the kernel, it
> > would be
> > > > > > > nice to do it in a way that fits in with that understanding.
> > > > > > >
> > > > > > > For example, if a companion ACPI device is the preferred
> > solution, an
> > > > > > > ACPI quirk could fabricate a device with the required
> > resources.  That
> > > > > > > would address the problem closer to the source and make it
> > more likely
> > > > > > > that the rest of the system will work correctly: /proc/iomem
> > could
> > > > > > > make sense, things that look at _CRS generically would work
> > (e.g,
> > > > > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > > > > > >
> > > > > > > Hard-coding stuff in drivers is a point solution that doesn't
> > provide
> > > > > > > any guidance for future platforms and makes it likely that
> > the hack
> > > > > > > will get copied into even more drivers.
> > > > > > >
> > > > > >
> > > > > > OK, I see. But the guidance for future platforms should be 'do
> > not
> > > > > > rely on quirks', and what I am arguing here is that the more we
> > polish
> > > > > > up this code and make it clean and reusable, the more likely it
> > is
> > > > > > that will end up getting abused by new broken hardware that we
> > set out
> > > > > > to reject entirely in the first place.
> > > > > >
> > > > > > So of course, if the quirk involves claiming resources, let's
> > make
> > > > > > sure that this occurs in the cleanest and most compliant way
> > possible.
> > > > > > But any factoring/reuse concerns other than for the current
> > crop of
> > > > > > broken hardware should be avoided imo.
> > > > >
> > > > > If future hardware is completely ECAM-compliant and we don't need
> > any
> > > > > more MCFG quirks, that would be great.
> > > >
> > > > Yes.
> > > >
> > > > > But we'll still need to describe that memory-mapped config space
> > > > > somewhere.  If that's done with PNP0C02 or similar devices (as is
> > done
> > > > > on my x86 laptop), we'd be all set.
> > > >
> > > > I am not sure I understand what you mean here. Are you referring
> > > > to MCFG regions reported as PNP0c02 resources through its _CRS ?
> > >
> > > Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
> > > reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
> > > device.
> > 
> > Ok, that's agreed. It goes without saying that since you are quoting
> > the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device
> > _CRS I will consider that a FW bug.
> > 
> > > > IIUC PNP0C02 is a reservation mechanism, but it does not help us
> > > > associate its _CRS to a specific PCI host bridge instance, right ?
> > >
> > > Gab proposed a hierarchy that *would* associate a PNP0C02 device with
> > > a PCI bridge:
> > >
> > >   Device (PCI1) {
> > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > >     Method (_CRS, 0, Serialized) { // Root complex resources
> > (windows) }
> > >     Device (RES0) {
> > >       Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> > >       Name (_CID, "PNP0C02")  // Motherboard reserved resource
> > >       Name (_CRS, ResourceTemplate () { ... }
> > >     }
> > >   }
> > >
> > > That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
> > > the root (under \_SB), but I don't know why.
> > >
> > > Putting it at the root means we couldn't generically associate it
> > with
> > > a bridge, although I could imagine something like this:
> > >
> > >   Device (RES1) {
> > >     Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> > >     Name (_CID, "PNP0C02")  // Motherboard reserved resource
> > >     Name (_CRS, ResourceTemplate () { ... }
> > >     Method (BRDG) { "PCI1" }  // hand-wavy ASL
> > >   }
> > >   Device (PCI1) {
> > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > >     Method (_CRS, 0, Serialized) { // Root complex resources
> > (windows) }
> > >   }
> > >
> > > Where you could search PNP0C02 devices for a cookie that matched the
> > > host bridge.o
> > 
> > Ok, I am fine with both and I think we are converging, but the way
> > to solve this problem has to be uniform for all ARM partners (and
> > not only ARM). Two points here:
> > 
> > 1) Adding a device/subdevice allows people to add a _CRS reporting the
> >    non-window bridge resources. Fine. It also allows people to chuck in
> >    there all sorts of _DSD properties to describe their PCI host bridge
> >    as it is done with DT properties (those _DSD can contain eg clocks
> >    etc.), this may be tempting (so that they can reuse the same DT
> >    driver and do not have to update their firmware) but I want to be
> >    clear here: that must not happen. So, a subdevice with a _CRS to
> >    report resources, yes, but it will stop there.
> > 2) It is unclear to me how to formalize the above. People should not
> >    write FW by reading the PCI mailing list, so these guidelines have
> > to
> >    be written, somehow. I do not want to standardize quirks, I want
> >    to prevent random ACPI table content, which is different.
> >    Should I report this to the ACPI spec working group ? If we do
> >    not do that everyone will go solve this problem as they deem fit.
> > 
> 
> Do we really need to formalize this?
> 
> As we discussed in the Linaro call at the moment we have few vendors
> that need quirks and we want to avoid promoting/accepting quirks for
> the future.
> 
> At the time of the call I think we decided to informally accept a set
> of quirks for the current platforms and reject any other quirk coming
> after a certain date/kernel version (this to be decided).
> 
> I am not sure if there is a way to document/formalize a temporary
> exception from the rule...

- (1) will be enforced.
- We do not know whether PNP0c02 can be used in non-root devices _CRS
- Are we sure (given that we are implementing this to make sure we are
  able to validate resources) that it is valid to have a subdevice with
  a _CRS whose resources are not contained in its parent _CRS address
  space (because that's exactly the case for these quirks) ?

That's what I mean by formalizing, I want to know how PNP0c02 should
be used. We all want platforms with quirks to be enabled asap but only
if we stick to the ACPI specifications. On top of that, with the
bindings above, the kernel would end up creating a platform device for
the "fake" device with a _CRS approach, which is questionable.

Lorenzo

> 
> Thanks
> 
> Gab 
> 
> 
> > [...]
> > 
> > > > For FW that is immutable I really do not see what we can do apart
> > > > from hardcoding the non-config resources (consumed by a bridge),
> > > > somehow.
> > >
> > > Right.  Well, I assume you mean we should hard-code "non-window
> > > resources consumed directly by a bridge".  If firmware in the field
> > is
> > > broken, we should work around it, and that may mean hard-coding some
> > > resources.
> > >
> > > My point is that the hard-coding should not be buried in a driver
> > > where it's invisible to the rest of the kernel.  If we hard-code it
> > in
> > > a quirk that adds _CRS entries, then the kernel will work just like
> > it
> > > would if the firmware had been correct in the first place.  The
> > > resource will appear in /sys/devices/pnp*/*/resources and
> > /proc/iomem,
> > > and if we ever used _SRS to assign or move ACPI devices, we would
> > know
> > > to avoid the bridge resource.
> > 
> > We are in complete agreement here.
> > 
> > Thanks,
> > Lorenzo
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christopher Covington Sept. 22, 2016, 2:20 p.m. UTC | #18
On 09/22/2016 05:49 AM, Lorenzo Pieralisi wrote:
> On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
>> On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
>>> On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
>>>> On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
>>>
>>> [...]
>>>
>>>>> None of these platforms can be fixed entirely in software, and given
>>>>> that we will not be adding quirks for new broken hardware, we should
>>>>> ask ourselves whether having two versions of a quirk, i.e., one for
>>>>> broken hardware + currently shipping firmware, and one for the same
>>>>> broken hardware with fixed firmware is really an improvement over what
>>>>> has been proposed here.
>>>>
>>>> We're talking about two completely different types of quirks:
>>>>
>>>>   1) MCFG quirks to use memory-mapped config space that doesn't quite
>>>>      conform to the ECAM model in the PCIe spec, and
>>>>
>>>>   2) Some yet-to-be-determined method to describe address space
>>>>      consumed by a bridge.
>>>>
>>>> The first two patches of this series are a nice implementation for 1).
>>>> The third patch (ThunderX-specific) is one possibility for 2), but I
>>>> don't like it because there's no way for generic software like the
>>>> ACPI core to discover these resources.
>>>
>>> Ok, so basically this means that to implement (2) we need to assign
>>> some sort of _HID to these quirky PCI bridges (so that we know what
>>> device they represent and we can retrieve their _CRS). I take from
>>> this discussion that the goal is to make sure that all non-config
>>> resources have to be declared through _CRS device objects, which is
>>> fine but that requires a FW update (unless we can fabricate ACPI
>>> devices and corresponding _CRS in the kernel whenever we match a
>>> given MCFG table signature).
>>
>> All resources consumed by ACPI devices should be declared through
>> _CRS.  If you want to fabricate ACPI devices or _CRS via kernel
>> quirks, that's fine with me.  This could be triggered via MCFG
>> signature, DMI info, host bridge _HID, etc.
> 
> I think the PNP quirk approach + PNP0c02 resource put forward by Gab
> is enough.
> 
>>> We discussed this already and I think we should make a decision:
>>>
>>> http://lists.infradead.org/pipermail/linux-arm-kernel/2016-March/414722.html
>>>
>>>>>> I'd like to step back and come up with some understanding of how
>>>>>> non-broken firmware *should* deal with this issue.  Then, if we *do*
>>>>>> work around this particular broken firmware in the kernel, it would be
>>>>>> nice to do it in a way that fits in with that understanding.
>>>>>>
>>>>>> For example, if a companion ACPI device is the preferred solution, an
>>>>>> ACPI quirk could fabricate a device with the required resources.  That
>>>>>> would address the problem closer to the source and make it more likely
>>>>>> that the rest of the system will work correctly: /proc/iomem could
>>>>>> make sense, things that look at _CRS generically would work (e.g,
>>>>>> /sys/, an admittedly hypothetical "lsacpi", etc.)
>>>>>>
>>>>>> Hard-coding stuff in drivers is a point solution that doesn't provide
>>>>>> any guidance for future platforms and makes it likely that the hack
>>>>>> will get copied into even more drivers.
>>>>>>
>>>>>
>>>>> OK, I see. But the guidance for future platforms should be 'do not
>>>>> rely on quirks', and what I am arguing here is that the more we polish
>>>>> up this code and make it clean and reusable, the more likely it is
>>>>> that will end up getting abused by new broken hardware that we set out
>>>>> to reject entirely in the first place.
>>>>>
>>>>> So of course, if the quirk involves claiming resources, let's make
>>>>> sure that this occurs in the cleanest and most compliant way possible.
>>>>> But any factoring/reuse concerns other than for the current crop of
>>>>> broken hardware should be avoided imo.
>>>>
>>>> If future hardware is completely ECAM-compliant and we don't need any
>>>> more MCFG quirks, that would be great.
>>>
>>> Yes.
>>>
>>>> But we'll still need to describe that memory-mapped config space
>>>> somewhere.  If that's done with PNP0C02 or similar devices (as is done
>>>> on my x86 laptop), we'd be all set.
>>>
>>> I am not sure I understand what you mean here. Are you referring
>>> to MCFG regions reported as PNP0c02 resources through its _CRS ?
>>
>> Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
>> reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
>> device.
> 
> Ok, that's agreed. It goes without saying that since you are quoting
> the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device
> _CRS I will consider that a FW bug.
> 
>>> IIUC PNP0C02 is a reservation mechanism, but it does not help us
>>> associate its _CRS to a specific PCI host bridge instance, right ?
>>
>> Gab proposed a hierarchy that *would* associate a PNP0C02 device with
>> a PCI bridge:
>>
>>   Device (PCI1) {
>>     Name (_HID, "HISI0080") // PCI Express Root Bridge
>>     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
>>     Method (_CRS, 0, Serialized) { // Root complex resources (windows) }
>>     Device (RES0) {
>>       Name (_HID, "HISI0081") // HiSi PCIe RC config base address
>>       Name (_CID, "PNP0C02")  // Motherboard reserved resource
>>       Name (_CRS, ResourceTemplate () { ... }
>>     }
>>   }
>>
>> That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
>> the root (under \_SB), but I don't know why.
>>
>> Putting it at the root means we couldn't generically associate it with
>> a bridge, although I could imagine something like this:
>>
>>   Device (RES1) {
>>     Name (_HID, "HISI0081") // HiSi PCIe RC config base address
>>     Name (_CID, "PNP0C02")  // Motherboard reserved resource
>>     Name (_CRS, ResourceTemplate () { ... }
>>     Method (BRDG) { "PCI1" }  // hand-wavy ASL
>>   }
>>   Device (PCI1) {
>>     Name (_HID, "HISI0080") // PCI Express Root Bridge
>>     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
>>     Method (_CRS, 0, Serialized) { // Root complex resources (windows) }
>>   }
>>
>> Where you could search PNP0C02 devices for a cookie that matched the
>> host bridge.o
> 
> Ok, I am fine with both and I think we are converging, but the way
> to solve this problem has to be uniform for all ARM partners (and
> not only ARM). Two points here:
> 
> 1) Adding a device/subdevice allows people to add a _CRS reporting the
>    non-window bridge resources. Fine. It also allows people to chuck in
>    there all sorts of _DSD properties to describe their PCI host bridge
>    as it is done with DT properties (those _DSD can contain eg clocks
>    etc.), this may be tempting (so that they can reuse the same DT
>    driver and do not have to update their firmware) but I want to be
>    clear here: that must not happen. So, a subdevice with a _CRS to
>    report resources, yes, but it will stop there.
> 2) It is unclear to me how to formalize the above. People should not
>    write FW by reading the PCI mailing list, so these guidelines have to
>    be written, somehow. I do not want to standardize quirks, I want
>    to prevent random ACPI table content, which is different.
>    Should I report this to the ACPI spec working group ? If we do
>    not do that everyone will go solve this problem as they deem fit.

Could you add some checks to fwts?

Cov
Bjorn Helgaas Sept. 22, 2016, 6:31 p.m. UTC | #19
On Thu, Sep 22, 2016 at 01:44:46PM +0100, Lorenzo Pieralisi wrote:
> On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
> > Hi Lorenzo, Bjorn
> > 
> > > -----Original Message-----
> > > From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com]
> > > Sent: 22 September 2016 10:50
> > > To: Bjorn Helgaas
> > > Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin
> > > Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya;
> > > Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin
> > > Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux-
> > > pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI
> > > Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong
> > > (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux-
> > > kernel@vger.kernel.org; Rafael J. Wysocki
> > > Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM-
> > > specific register range for ACPI case
> > > 
> > > On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
> > > > On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
> > > > > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
> > > > > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
> > > > >
> > > > > [...]
> > > > >
> > > > > > > None of these platforms can be fixed entirely in software, and
> > > given
> > > > > > > that we will not be adding quirks for new broken hardware, we
> > > should
> > > > > > > ask ourselves whether having two versions of a quirk, i.e., one
> > > for
> > > > > > > broken hardware + currently shipping firmware, and one for the
> > > same
> > > > > > > broken hardware with fixed firmware is really an improvement
> > > over what
> > > > > > > has been proposed here.
> > > > > >
> > > > > > We're talking about two completely different types of quirks:
> > > > > >
> > > > > >   1) MCFG quirks to use memory-mapped config space that doesn't
> > > quite
> > > > > >      conform to the ECAM model in the PCIe spec, and
> > > > > >
> > > > > >   2) Some yet-to-be-determined method to describe address space
> > > > > >      consumed by a bridge.
> > > > > >
> > > > > > The first two patches of this series are a nice implementation
> > > for 1).
> > > > > > The third patch (ThunderX-specific) is one possibility for 2),
> > > but I
> > > > > > don't like it because there's no way for generic software like
> > > the
> > > > > > ACPI core to discover these resources.
> > > > >
> > > > > Ok, so basically this means that to implement (2) we need to assign
> > > > > some sort of _HID to these quirky PCI bridges (so that we know what
> > > > > device they represent and we can retrieve their _CRS). I take from
> > > > > this discussion that the goal is to make sure that all non-config
> > > > > resources have to be declared through _CRS device objects, which is
> > > > > fine but that requires a FW update (unless we can fabricate ACPI
> > > > > devices and corresponding _CRS in the kernel whenever we match a
> > > > > given MCFG table signature).
> > > >
> > > > All resources consumed by ACPI devices should be declared through
> > > > _CRS.  If you want to fabricate ACPI devices or _CRS via kernel
> > > > quirks, that's fine with me.  This could be triggered via MCFG
> > > > signature, DMI info, host bridge _HID, etc.
> > > 
> > > I think the PNP quirk approach + PNP0c02 resource put forward by Gab
> > > is enough.
> > 
> > Great thanks as we take a final decision I will ask Dogndgong to submit
> > another RFC based on this approach
> > 
> > > 
> > > > > We discussed this already and I think we should make a decision:
> > > > >
> > > > > http://lists.infradead.org/pipermail/linux-arm-kernel/2016-
> > > March/414722.html
> > > > >
> > > > > > > > I'd like to step back and come up with some understanding of
> > > how
> > > > > > > > non-broken firmware *should* deal with this issue.  Then, if
> > > we *do*
> > > > > > > > work around this particular broken firmware in the kernel, it
> > > would be
> > > > > > > > nice to do it in a way that fits in with that understanding.
> > > > > > > >
> > > > > > > > For example, if a companion ACPI device is the preferred
> > > solution, an
> > > > > > > > ACPI quirk could fabricate a device with the required
> > > resources.  That
> > > > > > > > would address the problem closer to the source and make it
> > > more likely
> > > > > > > > that the rest of the system will work correctly: /proc/iomem
> > > could
> > > > > > > > make sense, things that look at _CRS generically would work
> > > (e.g,
> > > > > > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > > > > > > >
> > > > > > > > Hard-coding stuff in drivers is a point solution that doesn't
> > > provide
> > > > > > > > any guidance for future platforms and makes it likely that
> > > the hack
> > > > > > > > will get copied into even more drivers.
> > > > > > > >
> > > > > > >
> > > > > > > OK, I see. But the guidance for future platforms should be 'do
> > > not
> > > > > > > rely on quirks', and what I am arguing here is that the more we
> > > polish
> > > > > > > up this code and make it clean and reusable, the more likely it
> > > is
> > > > > > > that will end up getting abused by new broken hardware that we
> > > set out
> > > > > > > to reject entirely in the first place.
> > > > > > >
> > > > > > > So of course, if the quirk involves claiming resources, let's
> > > make
> > > > > > > sure that this occurs in the cleanest and most compliant way
> > > possible.
> > > > > > > But any factoring/reuse concerns other than for the current
> > > crop of
> > > > > > > broken hardware should be avoided imo.
> > > > > >
> > > > > > If future hardware is completely ECAM-compliant and we don't need
> > > any
> > > > > > more MCFG quirks, that would be great.
> > > > >
> > > > > Yes.
> > > > >
> > > > > > But we'll still need to describe that memory-mapped config space
> > > > > > somewhere.  If that's done with PNP0C02 or similar devices (as is
> > > done
> > > > > > on my x86 laptop), we'd be all set.
> > > > >
> > > > > I am not sure I understand what you mean here. Are you referring
> > > > > to MCFG regions reported as PNP0c02 resources through its _CRS ?
> > > >
> > > > Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
> > > > reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
> > > > device.
> > > 
> > > Ok, that's agreed. It goes without saying that since you are quoting
> > > the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device
> > > _CRS I will consider that a FW bug.
> > > 
> > > > > IIUC PNP0C02 is a reservation mechanism, but it does not help us
> > > > > associate its _CRS to a specific PCI host bridge instance, right ?
> > > >
> > > > Gab proposed a hierarchy that *would* associate a PNP0C02 device with
> > > > a PCI bridge:
> > > >
> > > >   Device (PCI1) {
> > > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > > >     Method (_CRS, 0, Serialized) { // Root complex resources
> > > (windows) }
> > > >     Device (RES0) {
> > > >       Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> > > >       Name (_CID, "PNP0C02")  // Motherboard reserved resource
> > > >       Name (_CRS, ResourceTemplate () { ... }
> > > >     }
> > > >   }
> > > >
> > > > That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
> > > > the root (under \_SB), but I don't know why.
> > > >
> > > > Putting it at the root means we couldn't generically associate it
> > > with
> > > > a bridge, although I could imagine something like this:
> > > >
> > > >   Device (RES1) {
> > > >     Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> > > >     Name (_CID, "PNP0C02")  // Motherboard reserved resource
> > > >     Name (_CRS, ResourceTemplate () { ... }
> > > >     Method (BRDG) { "PCI1" }  // hand-wavy ASL
> > > >   }
> > > >   Device (PCI1) {
> > > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > > >     Method (_CRS, 0, Serialized) { // Root complex resources
> > > (windows) }
> > > >   }
> > > >
> > > > Where you could search PNP0C02 devices for a cookie that matched the
> > > > host bridge.o
> > > 
> > > Ok, I am fine with both and I think we are converging, but the way
> > > to solve this problem has to be uniform for all ARM partners (and
> > > not only ARM). Two points here:
> > > 
> > > 1) Adding a device/subdevice allows people to add a _CRS reporting the
> > >    non-window bridge resources. Fine. It also allows people to chuck in
> > >    there all sorts of _DSD properties to describe their PCI host bridge
> > >    as it is done with DT properties (those _DSD can contain eg clocks
> > >    etc.), this may be tempting (so that they can reuse the same DT
> > >    driver and do not have to update their firmware) but I want to be
> > >    clear here: that must not happen. So, a subdevice with a _CRS to
> > >    report resources, yes, but it will stop there.
> > > 2) It is unclear to me how to formalize the above. People should not
> > >    write FW by reading the PCI mailing list, so these guidelines have
> > > to
> > >    be written, somehow. I do not want to standardize quirks, I want
> > >    to prevent random ACPI table content, which is different.
> > >    Should I report this to the ACPI spec working group ? If we do
> > >    not do that everyone will go solve this problem as they deem fit.
> > > 
> > 
> > Do we really need to formalize this?
> > 
> > As we discussed in the Linaro call at the moment we have few vendors
> > that need quirks and we want to avoid promoting/accepting quirks for
> > the future.
> > 
> > At the time of the call I think we decided to informally accept a set
> > of quirks for the current platforms and reject any other quirk coming
> > after a certain date/kernel version (this to be decided).
> > 
> > I am not sure if there is a way to document/formalize a temporary
> > exception from the rule...
> 
> - (1) will be enforced.

I'm not sure it's necessary or possible to enforce a "no future
quirks" rule.  For one thing, there's already a pretty strong
incentive to avoid quirks: if your hardware doesn't require quirks,
it works with OSes already in the field.

MCFG quirks allow us to use the generic ACPI pci_root.c driver even if
the hardware doesn't support ECAM quite according to the spec.

PNP0C02 usage is a workaround for the failure of the Consumer/Producer
bit.  PNP0C02 quirks compensate for firmware that doesn't describe
resource usage accurately.  It's possible the ACPI spec folks could
come up with a better Consumer/Producer workaround, if that's needed.
Apparently x86 hasn't needed it yet.

If people add _DSD methods for clocks or whatnot, the hardware won't
work with the generic pci_root.c driver, so there's already an
incentive for avoiding them.  x86 has managed without such methods;
arm64 should be able to do the same.

> - We do not know whether PNP0c02 can be used in non-root devices _CRS
> - Are we sure (given that we are implementing this to make sure we are
>   able to validate resources) that it is valid to have a subdevice with
>   a _CRS whose resources are not contained in its parent _CRS address
>   space (because that's exactly the case for these quirks) ?
>
> That's what I mean by formalizing, I want to know how PNP0c02 should
> be used. We all want platforms with quirks to be enabled asap but only
> if we stick to the ACPI specifications. On top of that, with the
> bindings above, the kernel would end up creating a platform device for
> the "fake" device with a _CRS approach, which is questionable.

> > > [...]
> > > 
> > > > > For FW that is immutable I really do not see what we can do apart
> > > > > from hardcoding the non-config resources (consumed by a bridge),
> > > > > somehow.
> > > >
> > > > Right.  Well, I assume you mean we should hard-code "non-window
> > > > resources consumed directly by a bridge".  If firmware in the field
> > > is
> > > > broken, we should work around it, and that may mean hard-coding some
> > > > resources.
> > > >
> > > > My point is that the hard-coding should not be buried in a driver
> > > > where it's invisible to the rest of the kernel.  If we hard-code it
> > > in
> > > > a quirk that adds _CRS entries, then the kernel will work just like
> > > it
> > > > would if the firmware had been correct in the first place.  The
> > > > resource will appear in /sys/devices/pnp*/*/resources and
> > > /proc/iomem,
> > > > and if we ever used _SRS to assign or move ACPI devices, we would
> > > know
> > > > to avoid the bridge resource.
> > > 
> > > We are in complete agreement here.
> > > 
> > > Thanks,
> > > Lorenzo
> > 
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Sept. 22, 2016, 10:10 p.m. UTC | #20
On Thu, Sep 22, 2016 at 01:31:01PM -0500, Bjorn Helgaas wrote:
> On Thu, Sep 22, 2016 at 01:44:46PM +0100, Lorenzo Pieralisi wrote:
> > On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
> > > Hi Lorenzo, Bjorn
> > > 
> > > > -----Original Message-----
> > > > From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com]
> > > > Sent: 22 September 2016 10:50
> > > > To: Bjorn Helgaas
> > > > Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin
> > > > Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya;
> > > > Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin
> > > > Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux-
> > > > pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI
> > > > Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong
> > > > (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux-
> > > > kernel@vger.kernel.org; Rafael J. Wysocki
> > > > Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM-
> > > > specific register range for ACPI case
> > > > 
> > > > On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
> > > > > On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
> > > > > > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
> > > > > > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
> > > > > >
> > > > > > [...]
> > > > > >
> > > > > > > > None of these platforms can be fixed entirely in software, and
> > > > given
> > > > > > > > that we will not be adding quirks for new broken hardware, we
> > > > should
> > > > > > > > ask ourselves whether having two versions of a quirk, i.e., one
> > > > for
> > > > > > > > broken hardware + currently shipping firmware, and one for the
> > > > same
> > > > > > > > broken hardware with fixed firmware is really an improvement
> > > > over what
> > > > > > > > has been proposed here.
> > > > > > >
> > > > > > > We're talking about two completely different types of quirks:
> > > > > > >
> > > > > > >   1) MCFG quirks to use memory-mapped config space that doesn't
> > > > quite
> > > > > > >      conform to the ECAM model in the PCIe spec, and
> > > > > > >
> > > > > > >   2) Some yet-to-be-determined method to describe address space
> > > > > > >      consumed by a bridge.
> > > > > > >
> > > > > > > The first two patches of this series are a nice implementation
> > > > for 1).
> > > > > > > The third patch (ThunderX-specific) is one possibility for 2),
> > > > but I
> > > > > > > don't like it because there's no way for generic software like
> > > > the
> > > > > > > ACPI core to discover these resources.
> > > > > >
> > > > > > Ok, so basically this means that to implement (2) we need to assign
> > > > > > some sort of _HID to these quirky PCI bridges (so that we know what
> > > > > > device they represent and we can retrieve their _CRS). I take from
> > > > > > this discussion that the goal is to make sure that all non-config
> > > > > > resources have to be declared through _CRS device objects, which is
> > > > > > fine but that requires a FW update (unless we can fabricate ACPI
> > > > > > devices and corresponding _CRS in the kernel whenever we match a
> > > > > > given MCFG table signature).
> > > > >
> > > > > All resources consumed by ACPI devices should be declared through
> > > > > _CRS.  If you want to fabricate ACPI devices or _CRS via kernel
> > > > > quirks, that's fine with me.  This could be triggered via MCFG
> > > > > signature, DMI info, host bridge _HID, etc.
> > > > 
> > > > I think the PNP quirk approach + PNP0c02 resource put forward by Gab
> > > > is enough.
> > > 
> > > Great thanks as we take a final decision I will ask Dogndgong to submit
> > > another RFC based on this approach
> > > 
> > > > 
> > > > > > We discussed this already and I think we should make a decision:
> > > > > >
> > > > > > http://lists.infradead.org/pipermail/linux-arm-kernel/2016-
> > > > March/414722.html
> > > > > >
> > > > > > > > > I'd like to step back and come up with some understanding of
> > > > how
> > > > > > > > > non-broken firmware *should* deal with this issue.  Then, if
> > > > we *do*
> > > > > > > > > work around this particular broken firmware in the kernel, it
> > > > would be
> > > > > > > > > nice to do it in a way that fits in with that understanding.
> > > > > > > > >
> > > > > > > > > For example, if a companion ACPI device is the preferred
> > > > solution, an
> > > > > > > > > ACPI quirk could fabricate a device with the required
> > > > resources.  That
> > > > > > > > > would address the problem closer to the source and make it
> > > > more likely
> > > > > > > > > that the rest of the system will work correctly: /proc/iomem
> > > > could
> > > > > > > > > make sense, things that look at _CRS generically would work
> > > > (e.g,
> > > > > > > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > > > > > > > >
> > > > > > > > > Hard-coding stuff in drivers is a point solution that doesn't
> > > > provide
> > > > > > > > > any guidance for future platforms and makes it likely that
> > > > the hack
> > > > > > > > > will get copied into even more drivers.
> > > > > > > > >
> > > > > > > >
> > > > > > > > OK, I see. But the guidance for future platforms should be 'do
> > > > not
> > > > > > > > rely on quirks', and what I am arguing here is that the more we
> > > > polish
> > > > > > > > up this code and make it clean and reusable, the more likely it
> > > > is
> > > > > > > > that will end up getting abused by new broken hardware that we
> > > > set out
> > > > > > > > to reject entirely in the first place.
> > > > > > > >
> > > > > > > > So of course, if the quirk involves claiming resources, let's
> > > > make
> > > > > > > > sure that this occurs in the cleanest and most compliant way
> > > > possible.
> > > > > > > > But any factoring/reuse concerns other than for the current
> > > > crop of
> > > > > > > > broken hardware should be avoided imo.
> > > > > > >
> > > > > > > If future hardware is completely ECAM-compliant and we don't need
> > > > any
> > > > > > > more MCFG quirks, that would be great.
> > > > > >
> > > > > > Yes.
> > > > > >
> > > > > > > But we'll still need to describe that memory-mapped config space
> > > > > > > somewhere.  If that's done with PNP0C02 or similar devices (as is
> > > > done
> > > > > > > on my x86 laptop), we'd be all set.
> > > > > >
> > > > > > I am not sure I understand what you mean here. Are you referring
> > > > > > to MCFG regions reported as PNP0c02 resources through its _CRS ?
> > > > >
> > > > > Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
> > > > > reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
> > > > > device.
> > > > 
> > > > Ok, that's agreed. It goes without saying that since you are quoting
> > > > the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device
> > > > _CRS I will consider that a FW bug.
> > > > 
> > > > > > IIUC PNP0C02 is a reservation mechanism, but it does not help us
> > > > > > associate its _CRS to a specific PCI host bridge instance, right ?
> > > > >
> > > > > Gab proposed a hierarchy that *would* associate a PNP0C02 device with
> > > > > a PCI bridge:
> > > > >
> > > > >   Device (PCI1) {
> > > > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > > > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > > > >     Method (_CRS, 0, Serialized) { // Root complex resources
> > > > (windows) }
> > > > >     Device (RES0) {
> > > > >       Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> > > > >       Name (_CID, "PNP0C02")  // Motherboard reserved resource
> > > > >       Name (_CRS, ResourceTemplate () { ... }
> > > > >     }
> > > > >   }
> > > > >
> > > > > That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
> > > > > the root (under \_SB), but I don't know why.
> > > > >
> > > > > Putting it at the root means we couldn't generically associate it
> > > > with
> > > > > a bridge, although I could imagine something like this:
> > > > >
> > > > >   Device (RES1) {
> > > > >     Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> > > > >     Name (_CID, "PNP0C02")  // Motherboard reserved resource
> > > > >     Name (_CRS, ResourceTemplate () { ... }
> > > > >     Method (BRDG) { "PCI1" }  // hand-wavy ASL
> > > > >   }
> > > > >   Device (PCI1) {
> > > > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > > > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > > > >     Method (_CRS, 0, Serialized) { // Root complex resources
> > > > (windows) }
> > > > >   }
> > > > >
> > > > > Where you could search PNP0C02 devices for a cookie that matched the
> > > > > host bridge.o
> > > > 
> > > > Ok, I am fine with both and I think we are converging, but the way
> > > > to solve this problem has to be uniform for all ARM partners (and
> > > > not only ARM). Two points here:
> > > > 
> > > > 1) Adding a device/subdevice allows people to add a _CRS reporting the
> > > >    non-window bridge resources. Fine. It also allows people to chuck in
> > > >    there all sorts of _DSD properties to describe their PCI host bridge
> > > >    as it is done with DT properties (those _DSD can contain eg clocks
> > > >    etc.), this may be tempting (so that they can reuse the same DT
> > > >    driver and do not have to update their firmware) but I want to be
> > > >    clear here: that must not happen. So, a subdevice with a _CRS to
> > > >    report resources, yes, but it will stop there.
> > > > 2) It is unclear to me how to formalize the above. People should not
> > > >    write FW by reading the PCI mailing list, so these guidelines have
> > > > to
> > > >    be written, somehow. I do not want to standardize quirks, I want
> > > >    to prevent random ACPI table content, which is different.
> > > >    Should I report this to the ACPI spec working group ? If we do
> > > >    not do that everyone will go solve this problem as they deem fit.
> > > > 
> > > 
> > > Do we really need to formalize this?
> > > 
> > > As we discussed in the Linaro call at the moment we have few vendors
> > > that need quirks and we want to avoid promoting/accepting quirks for
> > > the future.
> > > 
> > > At the time of the call I think we decided to informally accept a set
> > > of quirks for the current platforms and reject any other quirk coming
> > > after a certain date/kernel version (this to be decided).
> > > 
> > > I am not sure if there is a way to document/formalize a temporary
> > > exception from the rule...
> > 
> > - (1) will be enforced.
> 
> I'm not sure it's necessary or possible to enforce a "no future
> quirks" rule.  For one thing, there's already a pretty strong
> incentive to avoid quirks: if your hardware doesn't require quirks,
> it works with OSes already in the field.
> 
> MCFG quirks allow us to use the generic ACPI pci_root.c driver even if
> the hardware doesn't support ECAM quite according to the spec.
> 
> PNP0C02 usage is a workaround for the failure of the Consumer/Producer
> bit.  PNP0C02 quirks compensate for firmware that doesn't describe
> resource usage accurately.  It's possible the ACPI spec folks could
> come up with a better Consumer/Producer workaround, if that's needed.
> Apparently x86 hasn't needed it yet.
> 
> If people add _DSD methods for clocks or whatnot, the hardware won't
> work with the generic pci_root.c driver, so there's already an
> incentive for avoiding them.  x86 has managed without such methods;
> arm64 should be able to do the same.

Re-reading this, I'm afraid my response sounds a little dismissive,
and I feel like I'm missing some important information.  So I
apologize if I missed your whole point, Lorenzo.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lorenzo Pieralisi Sept. 23, 2016, 10:11 a.m. UTC | #21
[+ Zhang Rui]

On Thu, Sep 22, 2016 at 05:10:42PM -0500, Bjorn Helgaas wrote:
> On Thu, Sep 22, 2016 at 01:31:01PM -0500, Bjorn Helgaas wrote:
> > On Thu, Sep 22, 2016 at 01:44:46PM +0100, Lorenzo Pieralisi wrote:
> > > On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
> > > > Hi Lorenzo, Bjorn
> > > > 
> > > > > -----Original Message-----
> > > > > From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com]
> > > > > Sent: 22 September 2016 10:50
> > > > > To: Bjorn Helgaas
> > > > > Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon; Catalin
> > > > > Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan Kaya;
> > > > > Jayachandran C; Christopher Covington; Duc Dang; Robert Richter; Marcin
> > > > > Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux-
> > > > > pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI
> > > > > Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong
> > > > > (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org; linux-
> > > > > kernel@vger.kernel.org; Rafael J. Wysocki
> > > > > Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM-
> > > > > specific register range for ACPI case
> > > > > 
> > > > > On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas wrote:
> > > > > > On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
> > > > > > > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas wrote:
> > > > > > > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard Biesheuvel wrote:
> > > > > > >
> > > > > > > [...]
> > > > > > >
> > > > > > > > > None of these platforms can be fixed entirely in software, and
> > > > > given
> > > > > > > > > that we will not be adding quirks for new broken hardware, we
> > > > > should
> > > > > > > > > ask ourselves whether having two versions of a quirk, i.e., one
> > > > > for
> > > > > > > > > broken hardware + currently shipping firmware, and one for the
> > > > > same
> > > > > > > > > broken hardware with fixed firmware is really an improvement
> > > > > over what
> > > > > > > > > has been proposed here.
> > > > > > > >
> > > > > > > > We're talking about two completely different types of quirks:
> > > > > > > >
> > > > > > > >   1) MCFG quirks to use memory-mapped config space that doesn't
> > > > > quite
> > > > > > > >      conform to the ECAM model in the PCIe spec, and
> > > > > > > >
> > > > > > > >   2) Some yet-to-be-determined method to describe address space
> > > > > > > >      consumed by a bridge.
> > > > > > > >
> > > > > > > > The first two patches of this series are a nice implementation
> > > > > for 1).
> > > > > > > > The third patch (ThunderX-specific) is one possibility for 2),
> > > > > but I
> > > > > > > > don't like it because there's no way for generic software like
> > > > > the
> > > > > > > > ACPI core to discover these resources.
> > > > > > >
> > > > > > > Ok, so basically this means that to implement (2) we need to assign
> > > > > > > some sort of _HID to these quirky PCI bridges (so that we know what
> > > > > > > device they represent and we can retrieve their _CRS). I take from
> > > > > > > this discussion that the goal is to make sure that all non-config
> > > > > > > resources have to be declared through _CRS device objects, which is
> > > > > > > fine but that requires a FW update (unless we can fabricate ACPI
> > > > > > > devices and corresponding _CRS in the kernel whenever we match a
> > > > > > > given MCFG table signature).
> > > > > >
> > > > > > All resources consumed by ACPI devices should be declared through
> > > > > > _CRS.  If you want to fabricate ACPI devices or _CRS via kernel
> > > > > > quirks, that's fine with me.  This could be triggered via MCFG
> > > > > > signature, DMI info, host bridge _HID, etc.
> > > > > 
> > > > > I think the PNP quirk approach + PNP0c02 resource put forward by Gab
> > > > > is enough.
> > > > 
> > > > Great thanks as we take a final decision I will ask Dogndgong to submit
> > > > another RFC based on this approach
> > > > 
> > > > > 
> > > > > > > We discussed this already and I think we should make a decision:
> > > > > > >
> > > > > > > http://lists.infradead.org/pipermail/linux-arm-kernel/2016-
> > > > > March/414722.html
> > > > > > >
> > > > > > > > > > I'd like to step back and come up with some understanding of
> > > > > how
> > > > > > > > > > non-broken firmware *should* deal with this issue.  Then, if
> > > > > we *do*
> > > > > > > > > > work around this particular broken firmware in the kernel, it
> > > > > would be
> > > > > > > > > > nice to do it in a way that fits in with that understanding.
> > > > > > > > > >
> > > > > > > > > > For example, if a companion ACPI device is the preferred
> > > > > solution, an
> > > > > > > > > > ACPI quirk could fabricate a device with the required
> > > > > resources.  That
> > > > > > > > > > would address the problem closer to the source and make it
> > > > > more likely
> > > > > > > > > > that the rest of the system will work correctly: /proc/iomem
> > > > > could
> > > > > > > > > > make sense, things that look at _CRS generically would work
> > > > > (e.g,
> > > > > > > > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > > > > > > > > >
> > > > > > > > > > Hard-coding stuff in drivers is a point solution that doesn't
> > > > > provide
> > > > > > > > > > any guidance for future platforms and makes it likely that
> > > > > the hack
> > > > > > > > > > will get copied into even more drivers.
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > OK, I see. But the guidance for future platforms should be 'do
> > > > > not
> > > > > > > > > rely on quirks', and what I am arguing here is that the more we
> > > > > polish
> > > > > > > > > up this code and make it clean and reusable, the more likely it
> > > > > is
> > > > > > > > > that will end up getting abused by new broken hardware that we
> > > > > set out
> > > > > > > > > to reject entirely in the first place.
> > > > > > > > >
> > > > > > > > > So of course, if the quirk involves claiming resources, let's
> > > > > make
> > > > > > > > > sure that this occurs in the cleanest and most compliant way
> > > > > possible.
> > > > > > > > > But any factoring/reuse concerns other than for the current
> > > > > crop of
> > > > > > > > > broken hardware should be avoided imo.
> > > > > > > >
> > > > > > > > If future hardware is completely ECAM-compliant and we don't need
> > > > > any
> > > > > > > > more MCFG quirks, that would be great.
> > > > > > >
> > > > > > > Yes.
> > > > > > >
> > > > > > > > But we'll still need to describe that memory-mapped config space
> > > > > > > > somewhere.  If that's done with PNP0C02 or similar devices (as is
> > > > > done
> > > > > > > > on my x86 laptop), we'd be all set.
> > > > > > >
> > > > > > > I am not sure I understand what you mean here. Are you referring
> > > > > > > to MCFG regions reported as PNP0c02 resources through its _CRS ?
> > > > > >
> > > > > > Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says address ranges
> > > > > > reported via MCFG or _CBA should be reserved by _CRS of a PNP0C02
> > > > > > device.
> > > > > 
> > > > > Ok, that's agreed. It goes without saying that since you are quoting
> > > > > the PCI spec, if FW fails to report MCFG regions in a PNP0c02 device
> > > > > _CRS I will consider that a FW bug.
> > > > > 
> > > > > > > IIUC PNP0C02 is a reservation mechanism, but it does not help us
> > > > > > > associate its _CRS to a specific PCI host bridge instance, right ?
> > > > > >
> > > > > > Gab proposed a hierarchy that *would* associate a PNP0C02 device with
> > > > > > a PCI bridge:
> > > > > >
> > > > > >   Device (PCI1) {
> > > > > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > > > > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > > > > >     Method (_CRS, 0, Serialized) { // Root complex resources
> > > > > (windows) }
> > > > > >     Device (RES0) {
> > > > > >       Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> > > > > >       Name (_CID, "PNP0C02")  // Motherboard reserved resource
> > > > > >       Name (_CRS, ResourceTemplate () { ... }
> > > > > >     }
> > > > > >   }
> > > > > >
> > > > > > That's a possibility.  The PCI Firmware Spec suggests putting RES0 at
> > > > > > the root (under \_SB), but I don't know why.
> > > > > >
> > > > > > Putting it at the root means we couldn't generically associate it
> > > > > with
> > > > > > a bridge, although I could imagine something like this:
> > > > > >
> > > > > >   Device (RES1) {
> > > > > >     Name (_HID, "HISI0081") // HiSi PCIe RC config base address
> > > > > >     Name (_CID, "PNP0C02")  // Motherboard reserved resource
> > > > > >     Name (_CRS, ResourceTemplate () { ... }
> > > > > >     Method (BRDG) { "PCI1" }  // hand-wavy ASL
> > > > > >   }
> > > > > >   Device (PCI1) {
> > > > > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > > > > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > > > > >     Method (_CRS, 0, Serialized) { // Root complex resources
> > > > > (windows) }
> > > > > >   }
> > > > > >
> > > > > > Where you could search PNP0C02 devices for a cookie that matched the
> > > > > > host bridge.o
> > > > > 
> > > > > Ok, I am fine with both and I think we are converging, but the way
> > > > > to solve this problem has to be uniform for all ARM partners (and
> > > > > not only ARM). Two points here:
> > > > > 
> > > > > 1) Adding a device/subdevice allows people to add a _CRS reporting the
> > > > >    non-window bridge resources. Fine. It also allows people to chuck in
> > > > >    there all sorts of _DSD properties to describe their PCI host bridge
> > > > >    as it is done with DT properties (those _DSD can contain eg clocks
> > > > >    etc.), this may be tempting (so that they can reuse the same DT
> > > > >    driver and do not have to update their firmware) but I want to be
> > > > >    clear here: that must not happen. So, a subdevice with a _CRS to
> > > > >    report resources, yes, but it will stop there.
> > > > > 2) It is unclear to me how to formalize the above. People should not
> > > > >    write FW by reading the PCI mailing list, so these guidelines have
> > > > > to
> > > > >    be written, somehow. I do not want to standardize quirks, I want
> > > > >    to prevent random ACPI table content, which is different.
> > > > >    Should I report this to the ACPI spec working group ? If we do
> > > > >    not do that everyone will go solve this problem as they deem fit.
> > > > > 
> > > > 
> > > > Do we really need to formalize this?
> > > > 
> > > > As we discussed in the Linaro call at the moment we have few vendors
> > > > that need quirks and we want to avoid promoting/accepting quirks for
> > > > the future.
> > > > 
> > > > At the time of the call I think we decided to informally accept a set
> > > > of quirks for the current platforms and reject any other quirk coming
> > > > after a certain date/kernel version (this to be decided).
> > > > 
> > > > I am not sure if there is a way to document/formalize a temporary
> > > > exception from the rule...
> > > 
> > > - (1) will be enforced.
> > 
> > I'm not sure it's necessary or possible to enforce a "no future
> > quirks" rule.  For one thing, there's already a pretty strong
> > incentive to avoid quirks: if your hardware doesn't require quirks,
> > it works with OSes already in the field.
> > 
> > MCFG quirks allow us to use the generic ACPI pci_root.c driver even if
> > the hardware doesn't support ECAM quite according to the spec.
> > 
> > PNP0C02 usage is a workaround for the failure of the Consumer/Producer
> > bit.  PNP0C02 quirks compensate for firmware that doesn't describe
> > resource usage accurately.  It's possible the ACPI spec folks could
> > come up with a better Consumer/Producer workaround, if that's needed.
> > Apparently x86 hasn't needed it yet.
> > 
> > If people add _DSD methods for clocks or whatnot, the hardware won't
> > work with the generic pci_root.c driver, so there's already an
> > incentive for avoiding them.  x86 has managed without such methods;
> > arm64 should be able to do the same.
> 
> Re-reading this, I'm afraid my response sounds a little dismissive,
> and I feel like I'm missing some important information.  So I
> apologize if I missed your whole point, Lorenzo.

No you are spot on, I just wanted to emphasize, given that we are
adding an _HID and a subdevice, that developer should not be tempted
to use it to match against a PCI host driver to reuse the DT code,
we should not use the quirk mechanism as a backdoor to re-using DT
drivers in ACPI context.

Anyway, there is a review process to spot these possible misuses,
mine was just a heads-up, quirks will happen, I just do not want
to wreak the standard ACPI PCI firmware model to support them.

Given that there are already PNP0c02 bindings out there where the
PNP0c02 is used as in Gab's example:

https://patchwork.kernel.org/patch/4757111/

I think the only pending question I have is whether we are allowed
to define a PNP0A03 subdevice with a _CRS resource space that is
not contained in its parent _CRS, if we answer this question I
think we are done.

I will raise the PNP0c02 usage issue with the ASWG anyway.

Thanks !
Lorenzo
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tomasz Nowicki Sept. 23, 2016, 10:53 a.m. UTC | #22
Hi Bjorn,

On 21.09.2016 21:18, Bjorn Helgaas wrote:
> On Wed, Sep 21, 2016 at 11:58:22AM -0700, Duc Dang wrote:
>> On Wed, Sep 21, 2016 at 11:04 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>> On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi wrote:
>
>>> The existing x86 practice is to use PNP0C02 devices for this purpose,
>>> and I think we should just follow that practice.
>>>
>>> ...
>>>
>>> My point is that the hard-coding should not be buried in a driver
>>> where it's invisible to the rest of the kernel.  If we hard-code it in
>>> a quirk that adds _CRS entries, then the kernel will work just like it
>>> would if the firmware had been correct in the first place.  The
>>> resource will appear in /sys/devices/pnp*/*/resources and /proc/iomem,
>>> and if we ever used _SRS to assign or move ACPI devices, we would know
>>> to avoid the bridge resource.
>>
>> Are you suggesting to add code similar to functions in
>> linux/drivers/pnp/quirks.c to declare/attach the additional resource
>> that the host need to have when the resource is not in MCFG table?
>
> Yes, but what I'm suggesting is actually a little stronger.  This has
> nothing to do with whether a resource is in the MCFG table or not.
>
> I'm suggesting ACPI firmware should always describe the resource.  If the
> firmware is defective and doesn't describe it, we should add a quirk in
> pnp/quirks.c to add a resource for it.
>

Thanks for pointers Bjorn.

ThunderX is the case where we cannot change firmware, also it has no 
PNP0c02 device in tables. So in order to use pnp/quirks.c we would have 
to fabricate PNP0c02 in kernel and then add quirk entry. I am looking 
for the best place to put such emulation code but it seems not trivial.

Thanks,
Tomasz

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gabriele Paoloni Sept. 23, 2016, 10:58 a.m. UTC | #23
Hi Lorenzo

> -----Original Message-----
> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
> owner@vger.kernel.org] On Behalf Of Lorenzo Pieralisi
> Sent: 23 September 2016 11:12
> To: Bjorn Helgaas
> Cc: Gabriele Paoloni; Ard Biesheuvel; Tomasz Nowicki; David Daney; Will
> Deacon; Catalin Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo;
> Sinan Kaya; Jayachandran C; Christopher Covington; Duc Dang; Robert
> Richter; Marcin Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux-
> pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Linaro ACPI
> Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton; liudongdong
> (C); Jeff Hugo; linux-acpi@vger.kernel.org; linux-
> kernel@vger.kernel.org; Rafael J. Wysocki; rui.zhang@intel.com
> Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM-
> specific register range for ACPI case
> 
> [+ Zhang Rui]
> 
> On Thu, Sep 22, 2016 at 05:10:42PM -0500, Bjorn Helgaas wrote:
> > On Thu, Sep 22, 2016 at 01:31:01PM -0500, Bjorn Helgaas wrote:
> > > On Thu, Sep 22, 2016 at 01:44:46PM +0100, Lorenzo Pieralisi wrote:
> > > > On Thu, Sep 22, 2016 at 11:10:13AM +0000, Gabriele Paoloni wrote:
> > > > > Hi Lorenzo, Bjorn
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Lorenzo Pieralisi [mailto:lorenzo.pieralisi@arm.com]
> > > > > > Sent: 22 September 2016 10:50
> > > > > > To: Bjorn Helgaas
> > > > > > Cc: Ard Biesheuvel; Tomasz Nowicki; David Daney; Will Deacon;
> Catalin
> > > > > > Marinas; Rafael Wysocki; Arnd Bergmann; Hanjun Guo; Sinan
> Kaya;
> > > > > > Jayachandran C; Christopher Covington; Duc Dang; Robert
> Richter; Marcin
> > > > > > Wojtas; Liviu Dudau; Wangyijing; Mark Salter; linux-
> > > > > > pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
> Linaro ACPI
> > > > > > Mailman List; Jon Masters; Andrea Gallo; Jeremy Linton;
> liudongdong
> > > > > > (C); Gabriele Paoloni; Jeff Hugo; linux-acpi@vger.kernel.org;
> linux-
> > > > > > kernel@vger.kernel.org; Rafael J. Wysocki
> > > > > > Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe
> PEM-
> > > > > > specific register range for ACPI case
> > > > > >
> > > > > > On Wed, Sep 21, 2016 at 01:04:57PM -0500, Bjorn Helgaas
> wrote:
> > > > > > > On Wed, Sep 21, 2016 at 03:05:49PM +0100, Lorenzo Pieralisi
> wrote:
> > > > > > > > On Tue, Sep 20, 2016 at 02:17:44PM -0500, Bjorn Helgaas
> wrote:
> > > > > > > > > On Tue, Sep 20, 2016 at 04:09:25PM +0100, Ard
> Biesheuvel wrote:
> > > > > > > >
> > > > > > > > [...]
> > > > > > > >
> > > > > > > > > > None of these platforms can be fixed entirely in
> software, and
> > > > > > given
> > > > > > > > > > that we will not be adding quirks for new broken
> hardware, we
> > > > > > should
> > > > > > > > > > ask ourselves whether having two versions of a quirk,
> i.e., one
> > > > > > for
> > > > > > > > > > broken hardware + currently shipping firmware, and
> one for the
> > > > > > same
> > > > > > > > > > broken hardware with fixed firmware is really an
> improvement
> > > > > > over what
> > > > > > > > > > has been proposed here.
> > > > > > > > >
> > > > > > > > > We're talking about two completely different types of
> quirks:
> > > > > > > > >
> > > > > > > > >   1) MCFG quirks to use memory-mapped config space that
> doesn't
> > > > > > quite
> > > > > > > > >      conform to the ECAM model in the PCIe spec, and
> > > > > > > > >
> > > > > > > > >   2) Some yet-to-be-determined method to describe
> address space
> > > > > > > > >      consumed by a bridge.
> > > > > > > > >
> > > > > > > > > The first two patches of this series are a nice
> implementation
> > > > > > for 1).
> > > > > > > > > The third patch (ThunderX-specific) is one possibility
> for 2),
> > > > > > but I
> > > > > > > > > don't like it because there's no way for generic
> software like
> > > > > > the
> > > > > > > > > ACPI core to discover these resources.
> > > > > > > >
> > > > > > > > Ok, so basically this means that to implement (2) we need
> to assign
> > > > > > > > some sort of _HID to these quirky PCI bridges (so that we
> know what
> > > > > > > > device they represent and we can retrieve their _CRS). I
> take from
> > > > > > > > this discussion that the goal is to make sure that all
> non-config
> > > > > > > > resources have to be declared through _CRS device
> objects, which is
> > > > > > > > fine but that requires a FW update (unless we can
> fabricate ACPI
> > > > > > > > devices and corresponding _CRS in the kernel whenever we
> match a
> > > > > > > > given MCFG table signature).
> > > > > > >
> > > > > > > All resources consumed by ACPI devices should be declared
> through
> > > > > > > _CRS.  If you want to fabricate ACPI devices or _CRS via
> kernel
> > > > > > > quirks, that's fine with me.  This could be triggered via
> MCFG
> > > > > > > signature, DMI info, host bridge _HID, etc.
> > > > > >
> > > > > > I think the PNP quirk approach + PNP0c02 resource put forward
> by Gab
> > > > > > is enough.
> > > > >
> > > > > Great thanks as we take a final decision I will ask Dogndgong
> to submit
> > > > > another RFC based on this approach
> > > > >
> > > > > >
> > > > > > > > We discussed this already and I think we should make a
> decision:
> > > > > > > >
> > > > > > > > http://lists.infradead.org/pipermail/linux-arm-
> kernel/2016-
> > > > > > March/414722.html
> > > > > > > >
> > > > > > > > > > > I'd like to step back and come up with some
> understanding of
> > > > > > how
> > > > > > > > > > > non-broken firmware *should* deal with this issue.
> Then, if
> > > > > > we *do*
> > > > > > > > > > > work around this particular broken firmware in the
> kernel, it
> > > > > > would be
> > > > > > > > > > > nice to do it in a way that fits in with that
> understanding.
> > > > > > > > > > >
> > > > > > > > > > > For example, if a companion ACPI device is the
> preferred
> > > > > > solution, an
> > > > > > > > > > > ACPI quirk could fabricate a device with the
> required
> > > > > > resources.  That
> > > > > > > > > > > would address the problem closer to the source and
> make it
> > > > > > more likely
> > > > > > > > > > > that the rest of the system will work correctly:
> /proc/iomem
> > > > > > could
> > > > > > > > > > > make sense, things that look at _CRS generically
> would work
> > > > > > (e.g,
> > > > > > > > > > > /sys/, an admittedly hypothetical "lsacpi", etc.)
> > > > > > > > > > >
> > > > > > > > > > > Hard-coding stuff in drivers is a point solution
> that doesn't
> > > > > > provide
> > > > > > > > > > > any guidance for future platforms and makes it
> likely that
> > > > > > the hack
> > > > > > > > > > > will get copied into even more drivers.
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > OK, I see. But the guidance for future platforms
> should be 'do
> > > > > > not
> > > > > > > > > > rely on quirks', and what I am arguing here is that
> the more we
> > > > > > polish
> > > > > > > > > > up this code and make it clean and reusable, the more
> likely it
> > > > > > is
> > > > > > > > > > that will end up getting abused by new broken
> hardware that we
> > > > > > set out
> > > > > > > > > > to reject entirely in the first place.
> > > > > > > > > >
> > > > > > > > > > So of course, if the quirk involves claiming
> resources, let's
> > > > > > make
> > > > > > > > > > sure that this occurs in the cleanest and most
> compliant way
> > > > > > possible.
> > > > > > > > > > But any factoring/reuse concerns other than for the
> current
> > > > > > crop of
> > > > > > > > > > broken hardware should be avoided imo.
> > > > > > > > >
> > > > > > > > > If future hardware is completely ECAM-compliant and we
> don't need
> > > > > > any
> > > > > > > > > more MCFG quirks, that would be great.
> > > > > > > >
> > > > > > > > Yes.
> > > > > > > >
> > > > > > > > > But we'll still need to describe that memory-mapped
> config space
> > > > > > > > > somewhere.  If that's done with PNP0C02 or similar
> devices (as is
> > > > > > done
> > > > > > > > > on my x86 laptop), we'd be all set.
> > > > > > > >
> > > > > > > > I am not sure I understand what you mean here. Are you
> referring
> > > > > > > > to MCFG regions reported as PNP0c02 resources through its
> _CRS ?
> > > > > > >
> > > > > > > Yes.  PCI Firmware Spec r3.0, Table 4-2, note 2 says
> address ranges
> > > > > > > reported via MCFG or _CBA should be reserved by _CRS of a
> PNP0C02
> > > > > > > device.
> > > > > >
> > > > > > Ok, that's agreed. It goes without saying that since you are
> quoting
> > > > > > the PCI spec, if FW fails to report MCFG regions in a PNP0c02
> device
> > > > > > _CRS I will consider that a FW bug.
> > > > > >
> > > > > > > > IIUC PNP0C02 is a reservation mechanism, but it does not
> help us
> > > > > > > > associate its _CRS to a specific PCI host bridge
> instance, right ?
> > > > > > >
> > > > > > > Gab proposed a hierarchy that *would* associate a PNP0C02
> device with
> > > > > > > a PCI bridge:
> > > > > > >
> > > > > > >   Device (PCI1) {
> > > > > > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > > > > > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > > > > > >     Method (_CRS, 0, Serialized) { // Root complex
> resources
> > > > > > (windows) }
> > > > > > >     Device (RES0) {
> > > > > > >       Name (_HID, "HISI0081") // HiSi PCIe RC config base
> address
> > > > > > >       Name (_CID, "PNP0C02")  // Motherboard reserved
> resource
> > > > > > >       Name (_CRS, ResourceTemplate () { ... }
> > > > > > >     }
> > > > > > >   }
> > > > > > >
> > > > > > > That's a possibility.  The PCI Firmware Spec suggests
> putting RES0 at
> > > > > > > the root (under \_SB), but I don't know why.
> > > > > > >
> > > > > > > Putting it at the root means we couldn't generically
> associate it
> > > > > > with
> > > > > > > a bridge, although I could imagine something like this:
> > > > > > >
> > > > > > >   Device (RES1) {
> > > > > > >     Name (_HID, "HISI0081") // HiSi PCIe RC config base
> address
> > > > > > >     Name (_CID, "PNP0C02")  // Motherboard reserved
> resource
> > > > > > >     Name (_CRS, ResourceTemplate () { ... }
> > > > > > >     Method (BRDG) { "PCI1" }  // hand-wavy ASL
> > > > > > >   }
> > > > > > >   Device (PCI1) {
> > > > > > >     Name (_HID, "HISI0080") // PCI Express Root Bridge
> > > > > > >     Name (_CID, "PNP0A03") // Compatible PCI Root Bridge
> > > > > > >     Method (_CRS, 0, Serialized) { // Root complex
> resources
> > > > > > (windows) }
> > > > > > >   }
> > > > > > >
> > > > > > > Where you could search PNP0C02 devices for a cookie that
> matched the
> > > > > > > host bridge.o
> > > > > >
> > > > > > Ok, I am fine with both and I think we are converging, but
> the way
> > > > > > to solve this problem has to be uniform for all ARM partners
> (and
> > > > > > not only ARM). Two points here:
> > > > > >
> > > > > > 1) Adding a device/subdevice allows people to add a _CRS
> reporting the
> > > > > >    non-window bridge resources. Fine. It also allows people
> to chuck in
> > > > > >    there all sorts of _DSD properties to describe their PCI
> host bridge
> > > > > >    as it is done with DT properties (those _DSD can contain
> eg clocks
> > > > > >    etc.), this may be tempting (so that they can reuse the
> same DT
> > > > > >    driver and do not have to update their firmware) but I
> want to be
> > > > > >    clear here: that must not happen. So, a subdevice with a
> _CRS to
> > > > > >    report resources, yes, but it will stop there.
> > > > > > 2) It is unclear to me how to formalize the above. People
> should not
> > > > > >    write FW by reading the PCI mailing list, so these
> guidelines have
> > > > > > to
> > > > > >    be written, somehow. I do not want to standardize quirks,
> I want
> > > > > >    to prevent random ACPI table content, which is different.
> > > > > >    Should I report this to the ACPI spec working group ? If
> we do
> > > > > >    not do that everyone will go solve this problem as they
> deem fit.
> > > > > >
> > > > >
> > > > > Do we really need to formalize this?
> > > > >
> > > > > As we discussed in the Linaro call at the moment we have few
> vendors
> > > > > that need quirks and we want to avoid promoting/accepting
> quirks for
> > > > > the future.
> > > > >
> > > > > At the time of the call I think we decided to informally accept
> a set
> > > > > of quirks for the current platforms and reject any other quirk
> coming
> > > > > after a certain date/kernel version (this to be decided).
> > > > >
> > > > > I am not sure if there is a way to document/formalize a
> temporary
> > > > > exception from the rule...
> > > >
> > > > - (1) will be enforced.
> > >
> > > I'm not sure it's necessary or possible to enforce a "no future
> > > quirks" rule.  For one thing, there's already a pretty strong
> > > incentive to avoid quirks: if your hardware doesn't require quirks,
> > > it works with OSes already in the field.
> > >
> > > MCFG quirks allow us to use the generic ACPI pci_root.c driver even
> if
> > > the hardware doesn't support ECAM quite according to the spec.
> > >
> > > PNP0C02 usage is a workaround for the failure of the
> Consumer/Producer
> > > bit.  PNP0C02 quirks compensate for firmware that doesn't describe
> > > resource usage accurately.  It's possible the ACPI spec folks could
> > > come up with a better Consumer/Producer workaround, if that's
> needed.
> > > Apparently x86 hasn't needed it yet.
> > >
> > > If people add _DSD methods for clocks or whatnot, the hardware
> won't
> > > work with the generic pci_root.c driver, so there's already an
> > > incentive for avoiding them.  x86 has managed without such methods;
> > > arm64 should be able to do the same.
> >
> > Re-reading this, I'm afraid my response sounds a little dismissive,
> > and I feel like I'm missing some important information.  So I
> > apologize if I missed your whole point, Lorenzo.
> 
> No you are spot on, I just wanted to emphasize, given that we are
> adding an _HID and a subdevice, that developer should not be tempted
> to use it to match against a PCI host driver to reuse the DT code,
> we should not use the quirk mechanism as a backdoor to re-using DT
> drivers in ACPI context.
> 
> Anyway, there is a review process to spot these possible misuses,
> mine was just a heads-up, quirks will happen, I just do not want
> to wreak the standard ACPI PCI firmware model to support them.
> 
> Given that there are already PNP0c02 bindings out there where the
> PNP0c02 is used as in Gab's example:
> 
> https://patchwork.kernel.org/patch/4757111/
> 
> I think the only pending question I have is whether we are allowed
> to define a PNP0A03 subdevice with a _CRS resource space that is
> not contained in its parent _CRS, if we answer this question I
> think we are done.

FMU part of your question is answered in the PCI Firmware specs
https://members.pcisig.com/wg/PCI-SIG/document/download/8232

Where from note 2 of 4.1.2 I quote: 
"For most systems, the motherboard resource would appear at the root
of the ACPI namespace (under \_SB) in a node with a _HID of EISAID 
(PNP0C02), and the resources in this case should not be claimed in the
root PCI bus's _CRS"

My interpretation is that the resource claimed in the PNP0C02 node
must never be in the PNP0A03 _CRS.

Now about having the PNP0C02 node under \_SB or as a sub-device we see
that the note above points out that most of system have it under \_SB
but I read it as a quite relaxed condition....

BTW this is just my interpretation...

Thanks

Gab 


> 
> I will raise the PNP0c02 usage issue with the ASWG anyway.
> 
> Thanks !
> Lorenzo
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ard Biesheuvel Sept. 14, 2017, 2:06 p.m. UTC | #24
(remove most cc's)

Hi all,

Apologies for digging up this old thread.

I am currently looking into whether it is feasible to refactor some of
the ACPI PCI quirk code we have so it can apply to more instances of
the Synopsys Designware PCIe (i.e., for Marvell and Socionext SOCs),
which all suffer from the same issue that type 0 config TLPs are not
filtered before being sent out on the link, resulting in devices
appearing multiple times.

The pci-hisi.c quirk already appears to do exactly what would solve
the problem on other SoCs as well. So I will try to refactor this code
into something that I could propose for upstream, while probably
getting the ARM SBSA authors to offer some guidance that this IP must
not be used for new designs.

In any case, the chances of any such changes being acceptable upstream
are probably higher if we can reuse the exact same quirk as we have
already implemented for HiSilicon, so I wonder if anyone from Huawei
could comment on some questions I have:
- Why does the config space accessor override use 32-bit accesses only
for bus #0? Is that really necessary?
- Why does the Hisi quirk use different config base addresses for bus
#0 and the other buses? Is this simply because the firmware does not
use contiguous iATU windows for bus #0 and buses #1 and up? So is
there anything mapped in the 1 MB slice at the base of the respective
256 MB mappings that generate type1 config cycles?

Having just a shared set of config space accessor overrides would
already be an improvement, given that they can be shared with a DT
based driver for this IP in ECAM mode as well.

Thanks,
Ard.
Gabriele Paoloni Sept. 26, 2017, 8:23 a.m. UTC | #25
Hi Ard

Sorry to reply late on this

> -----Original Message-----

> From: Ard Biesheuvel [mailto:ard.biesheuvel@linaro.org]

> Sent: 14 September 2017 15:07

> To: Gabriele Paoloni

> Cc: Lorenzo Pieralisi; Hanjun Guo; Marcin Wojtas; Wangyijing; linux-

> pci@vger.kernel.org; linux-arm-kernel@lists.infradead.org; liudongdong

> (C); linux-acpi@vger.kernel.org; linux-kernel@vger.kernel.org

> Subject: Re: [PATCH V6 3/5] PCI: thunder-pem: Allow to probe PEM-

> specific register range for ACPI case

> 

> (remove most cc's)

> 

> Hi all,

> 

> Apologies for digging up this old thread.

> 

> I am currently looking into whether it is feasible to refactor some of

> the ACPI PCI quirk code we have so it can apply to more instances of

> the Synopsys Designware PCIe (i.e., for Marvell and Socionext SOCs),

> which all suffer from the same issue that type 0 config TLPs are not

> filtered before being sent out on the link, resulting in devices

> appearing multiple times.


What do you mean by "filtered"? Looking at
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5f00f1a0178cf52928366a5e1f376a65f1f3f389
you can see that the issue is that accesses to the root buses config space
are done using a specific HW location rather than the ECAM space area
(used for the rest of the hierarchy).
Also only 32b accessors can operate on the root bus config space.

> 

> The pci-hisi.c quirk already appears to do exactly what would solve

> the problem on other SoCs as well. So I will try to refactor this code

> into something that I could propose for upstream, while probably

> getting the ARM SBSA authors to offer some guidance that this IP must

> not be used for new designs.


Mmm I am not sure how Kernel maintainers would open to accept more quirks...
as I remember we discussed to accept quirks only for existing platform
but not for any future one (i.e. future HW should not use the non-ECAM
DW IP). However this is my understanding, I may be wrong here...

> 

> In any case, the chances of any such changes being acceptable upstream

> are probably higher if we can reuse the exact same quirk as we have

> already implemented for HiSilicon, so I wonder if anyone from Huawei

> could comment on some questions I have:

> - Why does the config space accessor override use 32-bit accesses only

> for bus #0? Is that really necessary?


I have talked to our SoC HW designers and they said that 32b accesses is
a constraint due to Synopsys DW IP limitation and yes it is necessary

> - Why does the Hisi quirk use different config base addresses for bus

> #0 and the other buses? Is this simply because the firmware does not

> use contiguous iATU windows for bus #0 and buses #1 and up? So is


As I remember there is a dedicated continuous device memory slice that is
dedicated for all the root buses (therefore you cannot spilt such area
to have root bus slices contiguous with the different ECAM areas of each
RP hierarchy)

> there anything mapped in the 1 MB slice at the base of the respective

> 256 MB mappings that generate type1 config cycles?


If I remember correctly we map device memory to the 1MB slice but
we never access it (instead we in the accessors we use cfg->busr.start
to understand is the current cfg rd/wr if being done on a root bus and
eventually rout the access to the dedicate root bus cfg space) 

> 

> Having just a shared set of config space accessor overrides would

> already be an improvement, given that they can be shared with a DT

> based driver for this IP in ECAM mode as well.


Probably we could have shared accessors, but again we need to see
if the community is open to accept more quirks

Thanks
Gab

> 

> Thanks,

> Ard.
diff mbox

Patch

diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
index 6abaf80..b048761 100644
--- a/drivers/pci/host/pci-thunder-pem.c
+++ b/drivers/pci/host/pci-thunder-pem.c
@@ -18,6 +18,7 @@ 
 #include <linux/init.h>
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
+#include <linux/pci-acpi.h>
 #include <linux/pci-ecam.h>
 #include <linux/platform_device.h>
 
@@ -284,6 +285,40 @@  static int thunder_pem_config_write(struct pci_bus *bus, unsigned int devfn,
 	return pci_generic_config_write(bus, devfn, where, size, val);
 }
 
+#ifdef CONFIG_ACPI
+static struct resource thunder_pem_reg_res[] = {
+	[4] = DEFINE_RES_MEM(0x87e0c0000000UL, SZ_16M),
+	[5] = DEFINE_RES_MEM(0x87e0c1000000UL, SZ_16M),
+	[6] = DEFINE_RES_MEM(0x87e0c2000000UL, SZ_16M),
+	[7] = DEFINE_RES_MEM(0x87e0c3000000UL, SZ_16M),
+	[8] = DEFINE_RES_MEM(0x87e0c4000000UL, SZ_16M),
+	[9] = DEFINE_RES_MEM(0x87e0c5000000UL, SZ_16M),
+	[14] = DEFINE_RES_MEM(0x97e0c0000000UL, SZ_16M),
+	[15] = DEFINE_RES_MEM(0x97e0c1000000UL, SZ_16M),
+	[16] = DEFINE_RES_MEM(0x97e0c2000000UL, SZ_16M),
+	[17] = DEFINE_RES_MEM(0x97e0c3000000UL, SZ_16M),
+	[18] = DEFINE_RES_MEM(0x97e0c4000000UL, SZ_16M),
+	[19] = DEFINE_RES_MEM(0x97e0c5000000UL, SZ_16M),
+};
+
+static struct resource *thunder_pem_acpi_res(struct pci_config_window *cfg)
+{
+	struct acpi_device *adev = to_acpi_device(cfg->parent);
+	struct acpi_pci_root *root = acpi_driver_data(adev);
+
+	if ((root->segment >= 4 && root->segment <= 9) ||
+	    (root->segment >= 14 && root->segment <= 19))
+		return &thunder_pem_reg_res[root->segment];
+
+	return NULL;
+}
+#else
+static struct resource *thunder_pem_acpi_res(struct pci_config_window *cfg)
+{
+	return NULL;
+}
+#endif
+
 static int thunder_pem_init(struct pci_config_window *cfg)
 {
 	struct device *dev = cfg->parent;
@@ -292,24 +327,24 @@  static int thunder_pem_init(struct pci_config_window *cfg)
 	struct thunder_pem_pci *pem_pci;
 	struct platform_device *pdev;
 
-	/* Only OF support for now */
-	if (!dev->of_node)
-		return -EINVAL;
-
 	pem_pci = devm_kzalloc(dev, sizeof(*pem_pci), GFP_KERNEL);
 	if (!pem_pci)
 		return -ENOMEM;
 
-	pdev = to_platform_device(dev);
-
-	/*
-	 * The second register range is the PEM bridge to the PCIe
-	 * bus.  It has a different config access method than those
-	 * devices behind the bridge.
-	 */
-	res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (acpi_disabled) {
+		pdev = to_platform_device(dev);
+
+		/*
+		 * The second register range is the PEM bridge to the PCIe
+		 * bus.  It has a different config access method than those
+		 * devices behind the bridge.
+		 */
+		res_pem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	} else {
+		res_pem = thunder_pem_acpi_res(cfg);
+	}
 	if (!res_pem) {
-		dev_err(dev, "missing \"reg[1]\"property\n");
+		dev_err(dev, "missing configuration region\n");
 		return -EINVAL;
 	}