Message ID | 20241112064813.751736-1-jpatel2@marvell.com |
---|---|
State | New |
Headers | show |
Series | [1/1] PCI: armada8k: Add link-down handle | expand |
Overall, your recent patches look like they're part of a series - they are dependent on each other, but you haven't sent them as a series. They need to be sent as a series so people know what order these patches should be applied in, and that they're all related. On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar Maheshbhai Patel wrote: > In PCIE ISR routine caused by RST_LINK_DOWN > we schedule work to handle the link-down procedure. > Link-down procedure will: > 1. Remove PCIe bus > 2. Reset the MAC > 3. Reconfigure link back up > 4. Rescan PCIe bus > > Signed-off-by: Jenishkumar Maheshbhai Patel <jpatel2@marvell.com> > --- > drivers/pci/controller/dwc/pcie-armada8k.c | 84 ++++++++++++++++++++++ > 1 file changed, 84 insertions(+) > > diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c > index 07775539b321..b1b48c2016f7 100644 > --- a/drivers/pci/controller/dwc/pcie-armada8k.c > +++ b/drivers/pci/controller/dwc/pcie-armada8k.c > @@ -21,6 +21,8 @@ > #include <linux/platform_device.h> > #include <linux/resource.h> > #include <linux/of_pci.h> > +#include <linux/mfd/syscon.h> > +#include <linux/regmap.h> > > #include "pcie-designware.h" > > @@ -32,6 +34,9 @@ struct armada8k_pcie { > struct clk *clk_reg; > struct phy *phy[ARMADA8K_PCIE_MAX_LANES]; > unsigned int phy_count; > + struct regmap *sysctrl_base; > + u32 mac_rest_bitmask; > + struct work_struct recover_link_work; > }; > > #define PCIE_VENDOR_REGS_OFFSET 0x8000 > @@ -72,6 +77,8 @@ struct armada8k_pcie { > #define AX_USER_DOMAIN_MASK 0x3 > #define AX_USER_DOMAIN_SHIFT 4 > > +#define UNIT_SOFT_RESET_CONFIG_REG 0x268 > + > #define to_armada8k_pcie(x) dev_get_drvdata((x)->dev) > > static void armada8k_pcie_disable_phys(struct armada8k_pcie *pcie) > @@ -216,6 +223,65 @@ static int armada8k_pcie_host_init(struct dw_pcie_rp *pp) > return 0; > } > > +static void armada8k_pcie_recover_link(struct work_struct *ws) > +{ > + struct armada8k_pcie *pcie = container_of(ws, struct armada8k_pcie, recover_link_work); > + struct dw_pcie_rp *pp = &pcie->pci->pp; > + struct pci_bus *bus = pp->bridge->bus; > + struct pci_dev *root_port; > + int ret; > + > + root_port = pci_get_slot(bus, 0); > + if (!root_port) { > + dev_err(pcie->pci->dev, "failed to get root port\n"); > + return; > + } > + pci_lock_rescan_remove(); > + pci_stop_and_remove_bus_device(root_port); > + /* > + * Sleep needed to make sure all pcie transactions and access > + * are flushed before resetting the mac > + */ > + msleep(100); > + > + /* Reset mac */ > + regmap_update_bits_base(pcie->sysctrl_base, UNIT_SOFT_RESET_CONFIG_REG, > + pcie->mac_rest_bitmask, 0, NULL, false, true); > + udelay(1); > + regmap_update_bits_base(pcie->sysctrl_base, UNIT_SOFT_RESET_CONFIG_REG, > + pcie->mac_rest_bitmask, pcie->mac_rest_bitmask, > + NULL, false, true); > + udelay(1); > + > + ret = dw_pcie_setup_rc(pp); > + if (ret) > + goto fail; > + > + ret = armada8k_pcie_host_init(pp); > + if (ret) { > + dev_err(pcie->pci->dev, "failed to initialize host: %d\n", ret); > + goto fail; > + } > + > + if (!dw_pcie_link_up(pcie->pci)) { > + ret = dw_pcie_start_link(pcie->pci); > + if (ret) > + goto fail; > + } > + > + /* Wait until the link becomes active again */ > + if (dw_pcie_wait_for_link(pcie->pci)) > + dev_err(pcie->pci->dev, "Link not up after reconfiguration\n"); > + > + bus = NULL; > + while ((bus = pci_find_next_bus(bus)) != NULL) > + pci_rescan_bus(bus); > + > +fail: > + pci_unlock_rescan_remove(); > + pci_dev_put(root_port); > +} > + > static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) > { > struct armada8k_pcie *pcie = arg; > @@ -253,6 +319,9 @@ static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) > * initiate a link retrain. If link retrains were > * possible, that is. > */ > + if (pcie->sysctrl_base && pcie->mac_rest_bitmask) > + schedule_work(&pcie->recover_link_work); > + > dev_dbg(pci->dev, "%s: link went down\n", __func__); > } > > @@ -322,6 +391,8 @@ static int armada8k_pcie_probe(struct platform_device *pdev) > > pcie->pci = pci; > > + INIT_WORK(&pcie->recover_link_work, armada8k_pcie_recover_link); > + > pcie->clk = devm_clk_get(dev, NULL); > if (IS_ERR(pcie->clk)) > return PTR_ERR(pcie->clk); > @@ -349,6 +420,19 @@ static int armada8k_pcie_probe(struct platform_device *pdev) > goto fail_clkreg; > } > > + pcie->sysctrl_base = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, > + "marvell,system-controller"); > + if (IS_ERR(pcie->sysctrl_base)) { > + dev_warn(dev, "failed to find marvell,system-controller\n"); > + pcie->sysctrl_base = 0x0; > + } > + > + ret = of_property_read_u32(pdev->dev.of_node, "marvell,mac-reset-bit-mask", > + &pcie->mac_rest_bitmask); > + if (ret < 0) { > + dev_warn(dev, "couldn't find mac reset bit mask: %d\n", ret); > + pcie->mac_rest_bitmask = 0x0; > + } > ret = armada8k_pcie_setup_phys(pcie); > if (ret) > goto fail_clkreg; > -- > 2.25.1 > > >
In subject: PCI: armada8k: Add link-down handling On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar Maheshbhai Patel wrote: > In PCIE ISR routine caused by RST_LINK_DOWN > we schedule work to handle the link-down procedure. > Link-down procedure will: > 1. Remove PCIe bus > 2. Reset the MAC > 3. Reconfigure link back up > 4. Rescan PCIe bus s/PCIE/PCIe/ Rewrap to fill 75 columns. I assume this basically removes a Root Port (and the hierarchy below it) if the link goes down, and then resets the MAC and tries to bring up the link and enumerate the hierarchy again. No other drivers do this, so why does armada8k need it? Is this to work around some unreliable link? I would think this would be reported via AER and possibly handled there already, but apparently not? > Signed-off-by: Jenishkumar Maheshbhai Patel <jpatel2@marvell.com> > --- > drivers/pci/controller/dwc/pcie-armada8k.c | 84 ++++++++++++++++++++++ > 1 file changed, 84 insertions(+) > > diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c > index 07775539b321..b1b48c2016f7 100644 > --- a/drivers/pci/controller/dwc/pcie-armada8k.c > +++ b/drivers/pci/controller/dwc/pcie-armada8k.c > @@ -21,6 +21,8 @@ > #include <linux/platform_device.h> > #include <linux/resource.h> > #include <linux/of_pci.h> > +#include <linux/mfd/syscon.h> > +#include <linux/regmap.h> > > #include "pcie-designware.h" > > @@ -32,6 +34,9 @@ struct armada8k_pcie { > struct clk *clk_reg; > struct phy *phy[ARMADA8K_PCIE_MAX_LANES]; > unsigned int phy_count; > + struct regmap *sysctrl_base; > + u32 mac_rest_bitmask; > + struct work_struct recover_link_work; > }; > > #define PCIE_VENDOR_REGS_OFFSET 0x8000 > @@ -72,6 +77,8 @@ struct armada8k_pcie { > #define AX_USER_DOMAIN_MASK 0x3 > #define AX_USER_DOMAIN_SHIFT 4 > > +#define UNIT_SOFT_RESET_CONFIG_REG 0x268 > + > #define to_armada8k_pcie(x) dev_get_drvdata((x)->dev) > > static void armada8k_pcie_disable_phys(struct armada8k_pcie *pcie) > @@ -216,6 +223,65 @@ static int armada8k_pcie_host_init(struct dw_pcie_rp *pp) > return 0; > } > > +static void armada8k_pcie_recover_link(struct work_struct *ws) > +{ > + struct armada8k_pcie *pcie = container_of(ws, struct armada8k_pcie, recover_link_work); > + struct dw_pcie_rp *pp = &pcie->pci->pp; > + struct pci_bus *bus = pp->bridge->bus; > + struct pci_dev *root_port; > + int ret; > + > + root_port = pci_get_slot(bus, 0); > + if (!root_port) { > + dev_err(pcie->pci->dev, "failed to get root port\n"); > + return; > + } > + pci_lock_rescan_remove(); > + pci_stop_and_remove_bus_device(root_port); Add blank line. > + /* > + * Sleep needed to make sure all pcie transactions and access > + * are flushed before resetting the mac > + */ > + msleep(100); s/pcie/PCIe/ s/mac/MAC/ (also below) What PCIe spec parameter is the 100ms? If we don't already have a #define for it, add one in drivers/pci/pci.h with spec citation. > + /* Reset mac */ > + regmap_update_bits_base(pcie->sysctrl_base, UNIT_SOFT_RESET_CONFIG_REG, > + pcie->mac_rest_bitmask, 0, NULL, false, true); > + udelay(1); > + regmap_update_bits_base(pcie->sysctrl_base, UNIT_SOFT_RESET_CONFIG_REG, > + pcie->mac_rest_bitmask, pcie->mac_rest_bitmask, > + NULL, false, true); > + udelay(1); > + > + ret = dw_pcie_setup_rc(pp); > + if (ret) > + goto fail; > + > + ret = armada8k_pcie_host_init(pp); > + if (ret) { > + dev_err(pcie->pci->dev, "failed to initialize host: %d\n", ret); > + goto fail; > + } > + > + if (!dw_pcie_link_up(pcie->pci)) { > + ret = dw_pcie_start_link(pcie->pci); > + if (ret) > + goto fail; > + } > + > + /* Wait until the link becomes active again */ > + if (dw_pcie_wait_for_link(pcie->pci)) > + dev_err(pcie->pci->dev, "Link not up after reconfiguration\n"); > + > + bus = NULL; > + while ((bus = pci_find_next_bus(bus)) != NULL) > + pci_rescan_bus(bus); > + > +fail: > + pci_unlock_rescan_remove(); > + pci_dev_put(root_port); > +} > + > static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) > { > struct armada8k_pcie *pcie = arg; > @@ -253,6 +319,9 @@ static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) > * initiate a link retrain. If link retrains were > * possible, that is. > */ > + if (pcie->sysctrl_base && pcie->mac_rest_bitmask) > + schedule_work(&pcie->recover_link_work); > + > dev_dbg(pci->dev, "%s: link went down\n", __func__); > } > > @@ -322,6 +391,8 @@ static int armada8k_pcie_probe(struct platform_device *pdev) > > pcie->pci = pci; > > + INIT_WORK(&pcie->recover_link_work, armada8k_pcie_recover_link); > + > pcie->clk = devm_clk_get(dev, NULL); > if (IS_ERR(pcie->clk)) > return PTR_ERR(pcie->clk); > @@ -349,6 +420,19 @@ static int armada8k_pcie_probe(struct platform_device *pdev) > goto fail_clkreg; > } > > + pcie->sysctrl_base = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, > + "marvell,system-controller"); > + if (IS_ERR(pcie->sysctrl_base)) { > + dev_warn(dev, "failed to find marvell,system-controller\n"); > + pcie->sysctrl_base = 0x0; > + } > + > + ret = of_property_read_u32(pdev->dev.of_node, "marvell,mac-reset-bit-mask", > + &pcie->mac_rest_bitmask); > + if (ret < 0) { > + dev_warn(dev, "couldn't find mac reset bit mask: %d\n", ret); > + pcie->mac_rest_bitmask = 0x0; > + } > ret = armada8k_pcie_setup_phys(pcie); > if (ret) > goto fail_clkreg; > -- > 2.25.1 >
On November 13, 2024 3:02:55 AM GMT+05:30, Bjorn Helgaas <helgaas@kernel.org> wrote: >In subject: > > PCI: armada8k: Add link-down handling > >On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar Maheshbhai Patel wrote: >> In PCIE ISR routine caused by RST_LINK_DOWN >> we schedule work to handle the link-down procedure. >> Link-down procedure will: >> 1. Remove PCIe bus >> 2. Reset the MAC >> 3. Reconfigure link back up >> 4. Rescan PCIe bus > >s/PCIE/PCIe/ > >Rewrap to fill 75 columns. > >I assume this basically removes a Root Port (and the hierarchy below >it) if the link goes down, and then resets the MAC and tries to bring >up the link and enumerate the hierarchy again. > >No other drivers do this, so why does armada8k need it? Is this to >work around some unreliable link? Certainly Qcom IPs have this same feature and I was also looking to implement it. But the link down should not be handled by this in the controller driver. Instead, it should be tied to bus reset in the core and the reset should be done through a callback implemented in the controller drivers. This way, the reset cannot happen in the back of PCI core and client drivers. That said, the Link down IRQ received by this driver should also be propagated back to the PCI core and the core should then call the callback to reset the bus that I mentioned above. > >I would think this would be reported via AER and possibly handled >there already, but apparently not? No, these are not reported via AER (atleast on Qcom platforms). We have a global_irq that fires when Link down happens. - Mani > >> Signed-off-by: Jenishkumar Maheshbhai Patel <jpatel2@marvell.com> >> --- >> drivers/pci/controller/dwc/pcie-armada8k.c | 84 ++++++++++++++++++++++ >> 1 file changed, 84 insertions(+) >> >> diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c >> index 07775539b321..b1b48c2016f7 100644 >> --- a/drivers/pci/controller/dwc/pcie-armada8k.c >> +++ b/drivers/pci/controller/dwc/pcie-armada8k.c >> @@ -21,6 +21,8 @@ >> #include <linux/platform_device.h> >> #include <linux/resource.h> >> #include <linux/of_pci.h> >> +#include <linux/mfd/syscon.h> >> +#include <linux/regmap.h> >> >> #include "pcie-designware.h" >> >> @@ -32,6 +34,9 @@ struct armada8k_pcie { >> struct clk *clk_reg; >> struct phy *phy[ARMADA8K_PCIE_MAX_LANES]; >> unsigned int phy_count; >> + struct regmap *sysctrl_base; >> + u32 mac_rest_bitmask; >> + struct work_struct recover_link_work; >> }; >> >> #define PCIE_VENDOR_REGS_OFFSET 0x8000 >> @@ -72,6 +77,8 @@ struct armada8k_pcie { >> #define AX_USER_DOMAIN_MASK 0x3 >> #define AX_USER_DOMAIN_SHIFT 4 >> >> +#define UNIT_SOFT_RESET_CONFIG_REG 0x268 >> + >> #define to_armada8k_pcie(x) dev_get_drvdata((x)->dev) >> >> static void armada8k_pcie_disable_phys(struct armada8k_pcie *pcie) >> @@ -216,6 +223,65 @@ static int armada8k_pcie_host_init(struct dw_pcie_rp *pp) >> return 0; >> } >> >> +static void armada8k_pcie_recover_link(struct work_struct *ws) >> +{ >> + struct armada8k_pcie *pcie = container_of(ws, struct armada8k_pcie, recover_link_work); >> + struct dw_pcie_rp *pp = &pcie->pci->pp; >> + struct pci_bus *bus = pp->bridge->bus; >> + struct pci_dev *root_port; >> + int ret; >> + >> + root_port = pci_get_slot(bus, 0); >> + if (!root_port) { >> + dev_err(pcie->pci->dev, "failed to get root port\n"); >> + return; >> + } >> + pci_lock_rescan_remove(); >> + pci_stop_and_remove_bus_device(root_port); > >Add blank line. > >> + /* >> + * Sleep needed to make sure all pcie transactions and access >> + * are flushed before resetting the mac >> + */ >> + msleep(100); > >s/pcie/PCIe/ >s/mac/MAC/ (also below) > >What PCIe spec parameter is the 100ms? If we don't already have a >#define for it, add one in drivers/pci/pci.h with spec citation. > >> + /* Reset mac */ >> + regmap_update_bits_base(pcie->sysctrl_base, UNIT_SOFT_RESET_CONFIG_REG, >> + pcie->mac_rest_bitmask, 0, NULL, false, true); >> + udelay(1); >> + regmap_update_bits_base(pcie->sysctrl_base, UNIT_SOFT_RESET_CONFIG_REG, >> + pcie->mac_rest_bitmask, pcie->mac_rest_bitmask, >> + NULL, false, true); >> + udelay(1); >> + >> + ret = dw_pcie_setup_rc(pp); >> + if (ret) >> + goto fail; >> + >> + ret = armada8k_pcie_host_init(pp); >> + if (ret) { >> + dev_err(pcie->pci->dev, "failed to initialize host: %d\n", ret); >> + goto fail; >> + } >> + >> + if (!dw_pcie_link_up(pcie->pci)) { >> + ret = dw_pcie_start_link(pcie->pci); >> + if (ret) >> + goto fail; >> + } >> + >> + /* Wait until the link becomes active again */ >> + if (dw_pcie_wait_for_link(pcie->pci)) >> + dev_err(pcie->pci->dev, "Link not up after reconfiguration\n"); >> + >> + bus = NULL; >> + while ((bus = pci_find_next_bus(bus)) != NULL) >> + pci_rescan_bus(bus); >> + >> +fail: >> + pci_unlock_rescan_remove(); >> + pci_dev_put(root_port); >> +} >> + >> static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) >> { >> struct armada8k_pcie *pcie = arg; >> @@ -253,6 +319,9 @@ static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) >> * initiate a link retrain. If link retrains were >> * possible, that is. >> */ >> + if (pcie->sysctrl_base && pcie->mac_rest_bitmask) >> + schedule_work(&pcie->recover_link_work); >> + >> dev_dbg(pci->dev, "%s: link went down\n", __func__); >> } >> >> @@ -322,6 +391,8 @@ static int armada8k_pcie_probe(struct platform_device *pdev) >> >> pcie->pci = pci; >> >> + INIT_WORK(&pcie->recover_link_work, armada8k_pcie_recover_link); >> + >> pcie->clk = devm_clk_get(dev, NULL); >> if (IS_ERR(pcie->clk)) >> return PTR_ERR(pcie->clk); >> @@ -349,6 +420,19 @@ static int armada8k_pcie_probe(struct platform_device *pdev) >> goto fail_clkreg; >> } >> >> + pcie->sysctrl_base = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, >> + "marvell,system-controller"); >> + if (IS_ERR(pcie->sysctrl_base)) { >> + dev_warn(dev, "failed to find marvell,system-controller\n"); >> + pcie->sysctrl_base = 0x0; >> + } >> + >> + ret = of_property_read_u32(pdev->dev.of_node, "marvell,mac-reset-bit-mask", >> + &pcie->mac_rest_bitmask); >> + if (ret < 0) { >> + dev_warn(dev, "couldn't find mac reset bit mask: %d\n", ret); >> + pcie->mac_rest_bitmask = 0x0; >> + } >> ret = armada8k_pcie_setup_phys(pcie); >> if (ret) >> goto fail_clkreg; >> -- >> 2.25.1 >> மணிவண்ணன் சதாசிவம்
> On November 13, 2024 3:02:55 AM GMT+05:30, Bjorn Helgaas > <mailto:helgaas@kernel.org> wrote: > >In subject: > > > > PCI: armada8k: Add link-down handling > > > >On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar Maheshbhai > Patel wrote: > >> In PCIE ISR routine caused by RST_LINK_DOWN we schedule work to > >> handle the link-down procedure. > >> Link-down procedure will: > >> 1. Remove PCIe bus > >> 2. Reset the MAC > >> 3. Reconfigure link back up > >> 4. Rescan PCIe bus > > > >s/PCIE/PCIe/ > > > >Rewrap to fill 75 columns. > > > >I assume this basically removes a Root Port (and the hierarchy below > >it) if the link goes down, and then resets the MAC and tries to bring > >up the link and enumerate the hierarchy again. > > > >No other drivers do this, so why does armada8k need it? Is this to > >work around some unreliable link? > > Certainly Qcom IPs have this same feature and I was also looking to implement > it. But the link down should not be handled by this in the controller driver. > > Instead, it should be tied to bus reset in the core and the reset should be done > through a callback implemented in the controller drivers. This way, the reset > cannot happen in the back of PCI core and client drivers. > > That said, the Link down IRQ received by this driver should also be propagated > back to the PCI core and the core should then call the callback to reset the bus > that I mentioned above. > It's more than a work-around for the unreliable link. A few customers may have such application - independent power supply to the device with dedicated reset GPIO to #PRST. In this way, the power cycle and warm reset of RC and EP won't have impact on each other. However, it may lead into the PCI driver not aware of the link down when an unexpected power down or reset occurs on the device. We cannot assume the link will be recovered soon. The worse thing is the driver may continue access to the device, which may hang the bus. Since the device is no longer present on the bus, it's better to remove it. Besides, in order to bring up the link, the only way is to reset the MAC, which starts over the state machine of LTSSM. Well, we also noticed that there is no other driver that did this. I agree it is not necessary if the power cycle or warm reset of the device is done gracefully. The user can remove the device prior to the power cycle/reset. And do the rescan after the link is recovered. However, the unexpected power down is still possible. Please enlighten me if there is any better approach to handle such unexpected link down. In the meanwhile, I am quite interested the callback implementation suggested by Mani. But I am sure if we have such infrastructure right there. Mani, could you please elaborate a bit more, or is there any examples in the existing code and patches. > > > >I would think this would be reported via AER and possibly handled there > >already, but apparently not? > > No, these are not reported via AER (atleast on Qcom platforms). We have a > global_irq that fires when Link down happens. > > - Mani > No. Although AER is reported, it is consider as a recoverable error somehow. Like QCOM platform, the link down is a global irq. The difference is that Armada8k don't have an irq for link up. > > > >> Signed-off-by: Jenishkumar Maheshbhai Patel > >> <mailto:jpatel2@marvell.com> > >> --- > >> drivers/pci/controller/dwc/pcie-armada8k.c | 84 > >> ++++++++++++++++++++++ > >> 1 file changed, 84 insertions(+) > >> > >> diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c > >> b/drivers/pci/controller/dwc/pcie-armada8k.c > >> index 07775539b321..b1b48c2016f7 100644 > >> --- a/drivers/pci/controller/dwc/pcie-armada8k.c > >> +++ b/drivers/pci/controller/dwc/pcie-armada8k.c > >> @@ -21,6 +21,8 @@ > >> #include <linux/platform_device.h> > >> #include <linux/resource.h> > >> #include <linux/of_pci.h> > >> +#include <linux/mfd/syscon.h> > >> +#include <linux/regmap.h> > >> > >> #include "pcie-designware.h" > >> > >> @@ -32,6 +34,9 @@ struct armada8k_pcie { > >> struct clk *clk_reg; > >> struct phy *phy[ARMADA8K_PCIE_MAX_LANES]; > >> unsigned int phy_count; > >> + struct regmap *sysctrl_base; > >> + u32 mac_rest_bitmask; > >> + struct work_struct recover_link_work; > >> }; > >> > >> #define PCIE_VENDOR_REGS_OFFSET 0x8000 > >> @@ -72,6 +77,8 @@ struct armada8k_pcie { > >> #define AX_USER_DOMAIN_MASK 0x3 > >> #define AX_USER_DOMAIN_SHIFT 4 > >> > >> +#define UNIT_SOFT_RESET_CONFIG_REG 0x268 > >> + > >> #define to_armada8k_pcie(x) dev_get_drvdata((x)->dev) > >> > >> static void armada8k_pcie_disable_phys(struct armada8k_pcie *pcie) > >> @@ -216,6 +223,65 @@ static int armada8k_pcie_host_init(struct > dw_pcie_rp *pp) > >> return 0; > >> } > >> > >> +static void armada8k_pcie_recover_link(struct work_struct *ws) { > >> + struct armada8k_pcie *pcie = container_of(ws, struct armada8k_pcie, > recover_link_work); > >> + struct dw_pcie_rp *pp = &pcie->pci->pp; > >> + struct pci_bus *bus = pp->bridge->bus; > >> + struct pci_dev *root_port; > >> + int ret; > >> + > >> + root_port = pci_get_slot(bus, 0); > >> + if (!root_port) { > >> + dev_err(pcie->pci->dev, "failed to get root port\n"); > >> + return; > >> + } > >> + pci_lock_rescan_remove(); > >> + pci_stop_and_remove_bus_device(root_port); > > > >Add blank line. > > > >> + /* > >> + * Sleep needed to make sure all pcie transactions and access > >> + * are flushed before resetting the mac > >> + */ > >> + msleep(100); > > > >s/pcie/PCIe/ > >s/mac/MAC/ (also below) > > > >What PCIe spec parameter is the 100ms? If we don't already have a > >#define for it, add one in drivers/pci/pci.h with spec citation. > > > >> + /* Reset mac */ > >> + regmap_update_bits_base(pcie->sysctrl_base, > UNIT_SOFT_RESET_CONFIG_REG, > >> + pcie->mac_rest_bitmask, 0, NULL, false, true); > >> + udelay(1); > >> + regmap_update_bits_base(pcie->sysctrl_base, > UNIT_SOFT_RESET_CONFIG_REG, > >> + pcie->mac_rest_bitmask, pcie- > >mac_rest_bitmask, > >> + NULL, false, true); > >> + udelay(1); > >> + > >> + ret = dw_pcie_setup_rc(pp); > >> + if (ret) > >> + goto fail; > >> + > >> + ret = armada8k_pcie_host_init(pp); > >> + if (ret) { > >> + dev_err(pcie->pci->dev, "failed to initialize host: %d\n", ret); > >> + goto fail; > >> + } > >> + > >> + if (!dw_pcie_link_up(pcie->pci)) { > >> + ret = dw_pcie_start_link(pcie->pci); > >> + if (ret) > >> + goto fail; > >> + } > >> + > >> + /* Wait until the link becomes active again */ > >> + if (dw_pcie_wait_for_link(pcie->pci)) > >> + dev_err(pcie->pci->dev, "Link not up after reconfiguration\n"); > >> + > >> + bus = NULL; > >> + while ((bus = pci_find_next_bus(bus)) != NULL) > >> + pci_rescan_bus(bus); > >> + > >> +fail: > >> + pci_unlock_rescan_remove(); > >> + pci_dev_put(root_port); > >> +} > >> + > >> static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) { > >> struct armada8k_pcie *pcie = arg; > >> @@ -253,6 +319,9 @@ static irqreturn_t armada8k_pcie_irq_handler(int > irq, void *arg) > >> * initiate a link retrain. If link retrains were > >> * possible, that is. > >> */ > >> + if (pcie->sysctrl_base && pcie->mac_rest_bitmask) > >> + schedule_work(&pcie->recover_link_work); > >> + > >> dev_dbg(pci->dev, "%s: link went down\n", __func__); > >> } > >> > >> @@ -322,6 +391,8 @@ static int armada8k_pcie_probe(struct > >> platform_device *pdev) > >> > >> pcie->pci = pci; > >> > >> + INIT_WORK(&pcie->recover_link_work, armada8k_pcie_recover_link); > >> + > >> pcie->clk = devm_clk_get(dev, NULL); > >> if (IS_ERR(pcie->clk)) > >> return PTR_ERR(pcie->clk); > >> @@ -349,6 +420,19 @@ static int armada8k_pcie_probe(struct > platform_device *pdev) > >> goto fail_clkreg; > >> } > >> > >> + pcie->sysctrl_base = syscon_regmap_lookup_by_phandle(pdev- > >dev.of_node, > >> + "marvell,system- > controller"); > >> + if (IS_ERR(pcie->sysctrl_base)) { > >> + dev_warn(dev, "failed to find marvell,system-controller\n"); > >> + pcie->sysctrl_base = 0x0; > >> + } > >> + > >> + ret = of_property_read_u32(pdev->dev.of_node, "marvell,mac-reset- > bit-mask", > >> + &pcie->mac_rest_bitmask); > >> + if (ret < 0) { > >> + dev_warn(dev, "couldn't find mac reset bit mask: %d\n", ret); > >> + pcie->mac_rest_bitmask = 0x0; > >> + } > >> ret = armada8k_pcie_setup_phys(pcie); > >> if (ret) > >> goto fail_clkreg; > >> -- > >> 2.25.1 > >> > > மணிவண்ணன் சதாசிவம்
> Overall, your recent patches look like they're part of a series - they are > dependent on each other, but you haven't sent them as a series. They need to > be sent as a series so people know what order these patches should be applied > in, and that they're all related. > Thanks for your time and comments. I will resend these patches as a series with the fixes. > On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar Maheshbhai Patel > wrote: > > In PCIE ISR routine caused by RST_LINK_DOWN we schedule work to handle > > the link-down procedure. > > Link-down procedure will: > > 1. Remove PCIe bus > > 2. Reset the MAC > > 3. Reconfigure link back up > > 4. Rescan PCIe bus > > > > Signed-off-by: Jenishkumar Maheshbhai Patel > > <mailto:jpatel2@marvell.com> > > --- > > drivers/pci/controller/dwc/pcie-armada8k.c | 84 > > ++++++++++++++++++++++ > > 1 file changed, 84 insertions(+) > > > > diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c > > b/drivers/pci/controller/dwc/pcie-armada8k.c > > index 07775539b321..b1b48c2016f7 100644 > > --- a/drivers/pci/controller/dwc/pcie-armada8k.c > > +++ b/drivers/pci/controller/dwc/pcie-armada8k.c > > @@ -21,6 +21,8 @@ > > #include <linux/platform_device.h> > > #include <linux/resource.h> > > #include <linux/of_pci.h> > > +#include <linux/mfd/syscon.h> > > +#include <linux/regmap.h> > > > > #include "pcie-designware.h" > > > > @@ -32,6 +34,9 @@ struct armada8k_pcie { > > struct clk *clk_reg; > > struct phy *phy[ARMADA8K_PCIE_MAX_LANES]; > > unsigned int phy_count; > > + struct regmap *sysctrl_base; > > + u32 mac_rest_bitmask; > > + struct work_struct recover_link_work; > > }; > > > > #define PCIE_VENDOR_REGS_OFFSET 0x8000 > > @@ -72,6 +77,8 @@ struct armada8k_pcie { > > #define AX_USER_DOMAIN_MASK 0x3 > > #define AX_USER_DOMAIN_SHIFT 4 > > > > +#define UNIT_SOFT_RESET_CONFIG_REG 0x268 > > + > > #define to_armada8k_pcie(x) dev_get_drvdata((x)->dev) > > > > static void armada8k_pcie_disable_phys(struct armada8k_pcie *pcie) @@ > > -216,6 +223,65 @@ static int armada8k_pcie_host_init(struct dw_pcie_rp > *pp) > > return 0; > > } > > > > +static void armada8k_pcie_recover_link(struct work_struct *ws) { > > + struct armada8k_pcie *pcie = container_of(ws, struct armada8k_pcie, > recover_link_work); > > + struct dw_pcie_rp *pp = &pcie->pci->pp; > > + struct pci_bus *bus = pp->bridge->bus; > > + struct pci_dev *root_port; > > + int ret; > > + > > + root_port = pci_get_slot(bus, 0); > > + if (!root_port) { > > + dev_err(pcie->pci->dev, "failed to get root port\n"); > > + return; > > + } > > + pci_lock_rescan_remove(); > > + pci_stop_and_remove_bus_device(root_port); > > + /* > > + * Sleep needed to make sure all pcie transactions and access > > + * are flushed before resetting the mac > > + */ > > + msleep(100); > > + > > + /* Reset mac */ > > + regmap_update_bits_base(pcie->sysctrl_base, > UNIT_SOFT_RESET_CONFIG_REG, > > + pcie->mac_rest_bitmask, 0, NULL, false, true); > > + udelay(1); > > + regmap_update_bits_base(pcie->sysctrl_base, > UNIT_SOFT_RESET_CONFIG_REG, > > + pcie->mac_rest_bitmask, pcie- > >mac_rest_bitmask, > > + NULL, false, true); > > + udelay(1); > > + > > + ret = dw_pcie_setup_rc(pp); > > + if (ret) > > + goto fail; > > + > > + ret = armada8k_pcie_host_init(pp); > > + if (ret) { > > + dev_err(pcie->pci->dev, "failed to initialize host: %d\n", ret); > > + goto fail; > > + } > > + > > + if (!dw_pcie_link_up(pcie->pci)) { > > + ret = dw_pcie_start_link(pcie->pci); > > + if (ret) > > + goto fail; > > + } > > + > > + /* Wait until the link becomes active again */ > > + if (dw_pcie_wait_for_link(pcie->pci)) > > + dev_err(pcie->pci->dev, "Link not up after reconfiguration\n"); > > + > > + bus = NULL; > > + while ((bus = pci_find_next_bus(bus)) != NULL) > > + pci_rescan_bus(bus); > > + > > +fail: > > + pci_unlock_rescan_remove(); > > + pci_dev_put(root_port); > > +} > > + > > static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) { > > struct armada8k_pcie *pcie = arg; > > @@ -253,6 +319,9 @@ static irqreturn_t armada8k_pcie_irq_handler(int > irq, void *arg) > > * initiate a link retrain. If link retrains were > > * possible, that is. > > */ > > + if (pcie->sysctrl_base && pcie->mac_rest_bitmask) > > + schedule_work(&pcie->recover_link_work); > > + > > dev_dbg(pci->dev, "%s: link went down\n", __func__); > > } > > > > @@ -322,6 +391,8 @@ static int armada8k_pcie_probe(struct > > platform_device *pdev) > > > > pcie->pci = pci; > > > > + INIT_WORK(&pcie->recover_link_work, armada8k_pcie_recover_link); > > + > > pcie->clk = devm_clk_get(dev, NULL); > > if (IS_ERR(pcie->clk)) > > return PTR_ERR(pcie->clk); > > @@ -349,6 +420,19 @@ static int armada8k_pcie_probe(struct > platform_device *pdev) > > goto fail_clkreg; > > } > > > > + pcie->sysctrl_base = syscon_regmap_lookup_by_phandle(pdev- > >dev.of_node, > > + "marvell,system- > controller"); > > + if (IS_ERR(pcie->sysctrl_base)) { > > + dev_warn(dev, "failed to find marvell,system-controller\n"); > > + pcie->sysctrl_base = 0x0; > > + } > > + > > + ret = of_property_read_u32(pdev->dev.of_node, "marvell,mac-reset- > bit-mask", > > + &pcie->mac_rest_bitmask); > > + if (ret < 0) { > > + dev_warn(dev, "couldn't find mac reset bit mask: %d\n", ret); > > + pcie->mac_rest_bitmask = 0x0; > > + } > > ret = armada8k_pcie_setup_phys(pcie); > > if (ret) > > goto fail_clkreg; > > -- > > 2.25.1 > > > > > > > > -- > RMK's Patch system: https://urldefense.proofpoint.com/v2/url?u=https- > 3A__www.armlinux.org.uk_developer_patches_&d=DwIBAg&c=nKjWec2b6R > 0mOyPaz7xtfQ&r=sXDQZu4GyqNVDlFUXakSGJl0Dh81ZIPlU26YS4KHGIA&m= > 9VAHsMcDLVnsayEXDyN5CmlJpQPhncBHZIUVgNw- > LAYA4ZffMKdKDAHrMSbXfbDf&s=qHNsIk- > qZGjDWyWZH5VcWD3yg3uhMXB6DW6AGIqRQTg&e= > FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
+ Niklas (who was interested in link down handling) On Sat, Feb 01, 2025 at 11:05:56PM +0000, Wilson Ding wrote: > > On November 13, 2024 3:02:55 AM GMT+05:30, Bjorn Helgaas > > <mailto:helgaas@kernel.org> wrote: > > >In subject: > > > > > > PCI: armada8k: Add link-down handling > > > > > >On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar Maheshbhai > > Patel wrote: > > >> In PCIE ISR routine caused by RST_LINK_DOWN we schedule work to > > >> handle the link-down procedure. > > >> Link-down procedure will: > > >> 1. Remove PCIe bus > > >> 2. Reset the MAC > > >> 3. Reconfigure link back up > > >> 4. Rescan PCIe bus > > > > > >s/PCIE/PCIe/ > > > > > >Rewrap to fill 75 columns. > > > > > >I assume this basically removes a Root Port (and the hierarchy below > > >it) if the link goes down, and then resets the MAC and tries to bring > > >up the link and enumerate the hierarchy again. > > > > > >No other drivers do this, so why does armada8k need it? Is this to > > >work around some unreliable link? > > > > Certainly Qcom IPs have this same feature and I was also looking to implement > > it. But the link down should not be handled by this in the controller driver. > > > > Instead, it should be tied to bus reset in the core and the reset should be done > > through a callback implemented in the controller drivers. This way, the reset > > cannot happen in the back of PCI core and client drivers. > > > > That said, the Link down IRQ received by this driver should also be propagated > > back to the PCI core and the core should then call the callback to reset the bus > > that I mentioned above. > > > > It's more than a work-around for the unreliable link. A few customers may have > such application - independent power supply to the device with dedicated reset > GPIO to #PRST. In this way, the power cycle and warm reset of RC and EP won't > have impact on each other. However, it may lead into the PCI driver not aware > of the link down when an unexpected power down or reset occurs on the device. > We cannot assume the link will be recovered soon. The worse thing is the driver > may continue access to the device, which may hang the bus. Since the device > is no longer present on the bus, it's better to remove it. Besides, in order to bring > up the link, the only way is to reset the MAC, which starts over the state machine > of LTSSM. > > Well, we also noticed that there is no other driver that did this. I agree it is not > necessary if the power cycle or warm reset of the device is done gracefully. The > user can remove the device prior to the power cycle/reset. And do the rescan > after the link is recovered. However, the unexpected power down is still possible. > Please enlighten me if there is any better approach to handle such unexpected > link down. > There is no issue in retraining the link. My concern is that, the retrain should not happen autonomously in the controller driver. PCI core should be made informed of it. More below. > In the meanwhile, I am quite interested the callback implementation suggested > by Mani. But I am sure if we have such infrastructure right there. Mani, could > you please elaborate a bit more, or is there any examples in the existing code > and patches. > There is no such implementation available right now. This idea is on my mind for quite some time, but never had time to do it. Maybe this gives me motivation to do so. Niklas: Did you get a chance to look into this? Else, let me know. I'll take a stab at it. - Mani
> -----Original Message----- > From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> > Sent: Friday, February 7, 2025 9:58 AM > To: Wilson Ding <dingwei@marvell.com>; cassel@kernel.org > Cc: Bjorn Helgaas <helgaas@kernel.org>; lpieralisi@kernel.org; > thomas.petazzoni@bootlin.com; kw@linux.com; robh@kernel.org; > bhelgaas@google.com; linux-pci@vger.kernel.org; linux-arm- > kernel@lists.infradead.org; linux-kernel@vger.kernel.org; Sanghoon Lee > <salee@marvell.com> > Subject: [EXTERNAL] Re: [PATCH 1/1] PCI: armada8k: Add link-down handle > > + Niklas (who was interested in link down handling) On Sat, Feb 01, 2025 > + at 11: 05: 56PM +0000, Wilson Ding wrote: > > On November 13, 2024 3: > + 02: 55 AM GMT+05: 30, Bjorn Helgaas > > <mailto: helgaas@ kernel. org> > + wrote: > > > > + Niklas (who was interested in link down handling) > > On Sat, Feb 01, 2025 at 11:05:56PM +0000, Wilson Ding wrote: > > > On November 13, 2024 3:02:55 AM GMT+05:30, Bjorn Helgaas > > > <mailto:helgaas@kernel.org> wrote: > > > >In subject: > > > > > > > > PCI: armada8k: Add link-down handling > > > > > > > >On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar Maheshbhai > > > Patel wrote: > > > >> In PCIE ISR routine caused by RST_LINK_DOWN we schedule work to > > > >> handle the link-down procedure. > > > >> Link-down procedure will: > > > >> 1. Remove PCIe bus > > > >> 2. Reset the MAC > > > >> 3. Reconfigure link back up > > > >> 4. Rescan PCIe bus > > > > > > > >s/PCIE/PCIe/ > > > > > > > >Rewrap to fill 75 columns. > > > > > > > >I assume this basically removes a Root Port (and the hierarchy > > > >below > > > >it) if the link goes down, and then resets the MAC and tries to > > > >bring up the link and enumerate the hierarchy again. > > > > > > > >No other drivers do this, so why does armada8k need it? Is this to > > > >work around some unreliable link? > > > > > > Certainly Qcom IPs have this same feature and I was also looking to > > > implement it. But the link down should not be handled by this in the > controller driver. > > > > > > Instead, it should be tied to bus reset in the core and the reset > > > should be done through a callback implemented in the controller > > > drivers. This way, the reset cannot happen in the back of PCI core and client > drivers. > > > > > > That said, the Link down IRQ received by this driver should also be > > > propagated back to the PCI core and the core should then call the > > > callback to reset the bus that I mentioned above. > > > > > > > It's more than a work-around for the unreliable link. A few customers > > may have such application - independent power supply to the device > > with dedicated reset GPIO to #PRST. In this way, the power cycle and > > warm reset of RC and EP won't have impact on each other. However, it > > may lead into the PCI driver not aware of the link down when an unexpected > power down or reset occurs on the device. > > We cannot assume the link will be recovered soon. The worse thing is > > the driver may continue access to the device, which may hang the bus. > > Since the device is no longer present on the bus, it's better to > > remove it. Besides, in order to bring up the link, the only way is to > > reset the MAC, which starts over the state machine of LTSSM. > > > > Well, we also noticed that there is no other driver that did this. I > > agree it is not necessary if the power cycle or warm reset of the > > device is done gracefully. The user can remove the device prior to the > > power cycle/reset. And do the rescan after the link is recovered. However, > the unexpected power down is still possible. > > Please enlighten me if there is any better approach to handle such > > unexpected link down. > > > > There is no issue in retraining the link. My concern is that, the retrain should > not happen autonomously in the controller driver. PCI core should be made > informed of it. More below. > Do you mean - pass the link down/up events to PCI core - remove the device or hierarchy by PCI core upon link down - initiate the link retraining in PCI core by calling the platform retrain callbacks - rescan the bus once link is recovered > > In the meanwhile, I am quite interested the callback implementation > > suggested by Mani. But I am sure if we have such infrastructure right > > there. Mani, could you please elaborate a bit more, or is there any > > examples in the existing code and patches. > > > > There is no such implementation available right now. This idea is on my mind > for quite some time, but never had time to do it. Maybe this gives me > motivation to do so. > > Niklas: Did you get a chance to look into this? Else, let me know. I'll take a stab > at it. > > - Mani > > -- > மணிவண்ணன் சதாசிவம்
On Fri, Feb 07, 2025 at 11:27:59PM +0530, Manivannan Sadhasivam wrote: > + Niklas (who was interested in link down handling) I am still interested :) > On Sat, Feb 01, 2025 at 11:05:56PM +0000, Wilson Ding wrote: > > > In the meanwhile, I am quite interested the callback implementation suggested > > by Mani. But I am sure if we have such infrastructure right there. Mani, could > > you please elaborate a bit more, or is there any examples in the existing code > > and patches. > > > > There is no such implementation available right now. This idea is on my mind for > quite some time, but never had time to do it. Maybe this gives me motivation to > do so. > > Niklas: Did you get a chance to look into this? Else, let me know. I'll take a > stab at it. I just remember talking to you how we would want such a callback to work, but unfortunately I haven't had any time to look into this. (I guess too many other things with higher prio.) Feel free to CC me on patches, and I will do my best to test :) Kind regards, Niklas
On Fri, Feb 07, 2025 at 06:46:22PM +0000, Wilson Ding wrote: > > > > -----Original Message----- > > From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> > > Sent: Friday, February 7, 2025 9:58 AM > > To: Wilson Ding <dingwei@marvell.com>; cassel@kernel.org > > Cc: Bjorn Helgaas <helgaas@kernel.org>; lpieralisi@kernel.org; > > thomas.petazzoni@bootlin.com; kw@linux.com; robh@kernel.org; > > bhelgaas@google.com; linux-pci@vger.kernel.org; linux-arm- > > kernel@lists.infradead.org; linux-kernel@vger.kernel.org; Sanghoon Lee > > <salee@marvell.com> > > Subject: [EXTERNAL] Re: [PATCH 1/1] PCI: armada8k: Add link-down handle > > > > + Niklas (who was interested in link down handling) On Sat, Feb 01, 2025 > > + at 11: 05: 56PM +0000, Wilson Ding wrote: > > On November 13, 2024 3: > > + 02: 55 AM GMT+05: 30, Bjorn Helgaas > > <mailto: helgaas@ kernel. org> > > + wrote: > > > > > > + Niklas (who was interested in link down handling) > > > > On Sat, Feb 01, 2025 at 11:05:56PM +0000, Wilson Ding wrote: > > > > On November 13, 2024 3:02:55 AM GMT+05:30, Bjorn Helgaas > > > > <mailto:helgaas@kernel.org> wrote: > > > > >In subject: > > > > > > > > > > PCI: armada8k: Add link-down handling > > > > > > > > > >On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar Maheshbhai > > > > Patel wrote: > > > > >> In PCIE ISR routine caused by RST_LINK_DOWN we schedule work to > > > > >> handle the link-down procedure. > > > > >> Link-down procedure will: > > > > >> 1. Remove PCIe bus > > > > >> 2. Reset the MAC > > > > >> 3. Reconfigure link back up > > > > >> 4. Rescan PCIe bus > > > > > > > > > >s/PCIE/PCIe/ > > > > > > > > > >Rewrap to fill 75 columns. > > > > > > > > > >I assume this basically removes a Root Port (and the hierarchy > > > > >below > > > > >it) if the link goes down, and then resets the MAC and tries to > > > > >bring up the link and enumerate the hierarchy again. > > > > > > > > > >No other drivers do this, so why does armada8k need it? Is this to > > > > >work around some unreliable link? > > > > > > > > Certainly Qcom IPs have this same feature and I was also looking to > > > > implement it. But the link down should not be handled by this in the > > controller driver. > > > > > > > > Instead, it should be tied to bus reset in the core and the reset > > > > should be done through a callback implemented in the controller > > > > drivers. This way, the reset cannot happen in the back of PCI core and client > > drivers. > > > > > > > > That said, the Link down IRQ received by this driver should also be > > > > propagated back to the PCI core and the core should then call the > > > > callback to reset the bus that I mentioned above. > > > > > > > > > > It's more than a work-around for the unreliable link. A few customers > > > may have such application - independent power supply to the device > > > with dedicated reset GPIO to #PRST. In this way, the power cycle and > > > warm reset of RC and EP won't have impact on each other. However, it > > > may lead into the PCI driver not aware of the link down when an unexpected > > power down or reset occurs on the device. > > > We cannot assume the link will be recovered soon. The worse thing is > > > the driver may continue access to the device, which may hang the bus. > > > Since the device is no longer present on the bus, it's better to > > > remove it. Besides, in order to bring up the link, the only way is to > > > reset the MAC, which starts over the state machine of LTSSM. > > > > > > Well, we also noticed that there is no other driver that did this. I > > > agree it is not necessary if the power cycle or warm reset of the > > > device is done gracefully. The user can remove the device prior to the > > > power cycle/reset. And do the rescan after the link is recovered. However, > > the unexpected power down is still possible. > > > Please enlighten me if there is any better approach to handle such > > > unexpected link down. > > > > > > > There is no issue in retraining the link. My concern is that, the retrain should > > not happen autonomously in the controller driver. PCI core should be made > > informed of it. More below. > > > > Do you mean > - pass the link down/up events to PCI core > - remove the device or hierarchy by PCI core upon link down > - initiate the link retraining in PCI core by calling the platform retrain callbacks > - rescan the bus once link is recovered > Yeah. This is what I came up with quickly: ``` diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b6536ed599c3..561eeb464220 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -706,6 +706,33 @@ void pci_free_host_bridge(struct pci_host_bridge *bridge) } EXPORT_SYMBOL(pci_free_host_bridge); +void pci_host_bridge_handle_link_down(struct pci_host_bridge *bridge) +{ + struct pci_bus *bus = bridge->bus; + struct pci_dev *child, *tmp; + int ret; + + pci_lock_rescan_remove(); + + /* Knock the devices off bus since we cannot access them */ + list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) + pci_stop_and_remove_bus_device(child); + + /* Now retrain the link in a controller specific way to bring it back */ + if (bus->ops->retrain_link) { + ret = bus->ops->retrain_link(bus); + if (ret) { + dev_err(&bridge->dev, "Failed to retrain the link!\n"); + pci_unlock_rescan_remove(); + return; + } + } + + pci_rescan_bus(bus); + pci_unlock_rescan_remove(); +} +EXPORT_SYMBOL(pci_host_bridge_handle_link_down); + /* Indexed by PCI_X_SSTATUS_FREQ (secondary bus mode and frequency) */ static const unsigned char pcix_bus_speed[] = { PCI_SPEED_UNKNOWN, /* 0 */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 47b31ad724fa..1c6f18a51bdd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -637,6 +637,7 @@ struct pci_host_bridge *pci_alloc_host_bridge(size_t priv); struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev, size_t priv); void pci_free_host_bridge(struct pci_host_bridge *bridge); +void pci_host_bridge_handle_link_down(struct pci_host_bridge *bridge); struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus); void pci_set_host_bridge_release(struct pci_host_bridge *bridge, @@ -804,6 +805,7 @@ struct pci_ops { void __iomem *(*map_bus)(struct pci_bus *bus, unsigned int devfn, int where); int (*read)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val); int (*write)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val); + int (*retrain_link)(struct pci_bus *bus); }; /* ``` Your controller driver has to call pci_host_bridge_handle_link_down() during the link down event (make it threaded if not done already). Then you should also populate the pci_ops::retrain_link() callback with the function that retrains the broken link. Finally, the bus will be rescanned to enumerate the devices. I do have plans to plug this retrain callback to one of the bus_reset() functions in the future so that we can bring the link back while doing bus level reset (uncorrectable AERs and such). But this will do the job for now. I will send a series on Monday with Qcom driver as a reference. - Mani
> -----Original Message----- > From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> > Sent: Saturday, February 8, 2025 2:36 AM > To: Wilson Ding <dingwei@marvell.com> > Cc: cassel@kernel.org; Bjorn Helgaas <helgaas@kernel.org>; > lpieralisi@kernel.org; thomas.petazzoni@bootlin.com; kw@linux.com; > robh@kernel.org; bhelgaas@google.com; linux-pci@vger.kernel.org; linux- > arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org; Sanghoon Lee > <salee@marvell.com> > Subject: Re: [EXTERNAL] Re: [PATCH 1/1] PCI: armada8k: Add link-down > handle > > On Fri, Feb 07, 2025 at 06: 46: 22PM +0000, Wilson Ding wrote: > > > > ----- > Original Message----- > > From: Manivannan Sadhasivam > <manivannan. sadhasivam@ linaro. org> > > Sent: Friday, February 7, 2025 > 9: 58 AM > > On Fri, Feb 07, 2025 at 06:46:22PM +0000, Wilson Ding wrote: > > > > > > > -----Original Message----- > > > From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> > > > Sent: Friday, February 7, 2025 9:58 AM > > > To: Wilson Ding <dingwei@marvell.com>; cassel@kernel.org > > > Cc: Bjorn Helgaas <helgaas@kernel.org>; lpieralisi@kernel.org; > > > thomas.petazzoni@bootlin.com; kw@linux.com; robh@kernel.org; > > > bhelgaas@google.com; linux-pci@vger.kernel.org; linux-arm- > > > kernel@lists.infradead.org; linux-kernel@vger.kernel.org; Sanghoon > > > Lee <salee@marvell.com> > > > Subject: [EXTERNAL] Re: [PATCH 1/1] PCI: armada8k: Add link-down > > > handle > > > > > > + Niklas (who was interested in link down handling) On Sat, Feb 01, > > > + 2025 at 11: 05: 56PM +0000, Wilson Ding wrote: > > On November 13, > 2024 3: > > > + 02: 55 AM GMT+05: 30, Bjorn Helgaas > > <mailto: helgaas@ kernel. > > > + org> > > > + wrote: > > > > > > > > + Niklas (who was interested in link down handling) > > > > > > On Sat, Feb 01, 2025 at 11:05:56PM +0000, Wilson Ding wrote: > > > > > On November 13, 2024 3:02:55 AM GMT+05:30, Bjorn Helgaas > > > > > <mailto:helgaas@kernel.org> wrote: > > > > > >In subject: > > > > > > > > > > > > PCI: armada8k: Add link-down handling > > > > > > > > > > > >On Mon, Nov 11, 2024 at 10:48:13PM -0800, Jenishkumar > > > > > >Maheshbhai > > > > > Patel wrote: > > > > > >> In PCIE ISR routine caused by RST_LINK_DOWN we schedule work > > > > > >> to handle the link-down procedure. > > > > > >> Link-down procedure will: > > > > > >> 1. Remove PCIe bus > > > > > >> 2. Reset the MAC > > > > > >> 3. Reconfigure link back up > > > > > >> 4. Rescan PCIe bus > > > > > > > > > > > >s/PCIE/PCIe/ > > > > > > > > > > > >Rewrap to fill 75 columns. > > > > > > > > > > > >I assume this basically removes a Root Port (and the hierarchy > > > > > >below > > > > > >it) if the link goes down, and then resets the MAC and tries to > > > > > >bring up the link and enumerate the hierarchy again. > > > > > > > > > > > >No other drivers do this, so why does armada8k need it? Is > > > > > >this to work around some unreliable link? > > > > > > > > > > Certainly Qcom IPs have this same feature and I was also looking > > > > > to implement it. But the link down should not be handled by this > > > > > in the > > > controller driver. > > > > > > > > > > Instead, it should be tied to bus reset in the core and the > > > > > reset should be done through a callback implemented in the > > > > > controller drivers. This way, the reset cannot happen in the > > > > > back of PCI core and client > > > drivers. > > > > > > > > > > That said, the Link down IRQ received by this driver should also > > > > > be propagated back to the PCI core and the core should then call > > > > > the callback to reset the bus that I mentioned above. > > > > > > > > > > > > > It's more than a work-around for the unreliable link. A few > > > > customers may have such application - independent power supply to > > > > the device with dedicated reset GPIO to #PRST. In this way, the > > > > power cycle and warm reset of RC and EP won't have impact on each > > > > other. However, it may lead into the PCI driver not aware of the > > > > link down when an unexpected > > > power down or reset occurs on the device. > > > > We cannot assume the link will be recovered soon. The worse thing > > > > is the driver may continue access to the device, which may hang the bus. > > > > Since the device is no longer present on the bus, it's better to > > > > remove it. Besides, in order to bring up the link, the only way is > > > > to reset the MAC, which starts over the state machine of LTSSM. > > > > > > > > Well, we also noticed that there is no other driver that did this. > > > > I agree it is not necessary if the power cycle or warm reset of > > > > the device is done gracefully. The user can remove the device > > > > prior to the power cycle/reset. And do the rescan after the link > > > > is recovered. However, > > > the unexpected power down is still possible. > > > > Please enlighten me if there is any better approach to handle such > > > > unexpected link down. > > > > > > > > > > There is no issue in retraining the link. My concern is that, the > > > retrain should not happen autonomously in the controller driver. PCI > > > core should be made informed of it. More below. > > > > > > > Do you mean > > - pass the link down/up events to PCI core > > - remove the device or hierarchy by PCI core upon link down > > - initiate the link retraining in PCI core by calling the platform > > retrain callbacks > > - rescan the bus once link is recovered > > > > Yeah. This is what I came up with quickly: > > ``` > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index > b6536ed599c3..561eeb464220 100644 > --- a/drivers/pci/probe.c > +++ b/drivers/pci/probe.c > @@ -706,6 +706,33 @@ void pci_free_host_bridge(struct pci_host_bridge > *bridge) } EXPORT_SYMBOL(pci_free_host_bridge); > > +void pci_host_bridge_handle_link_down(struct pci_host_bridge *bridge) { > + struct pci_bus *bus = bridge->bus; > + struct pci_dev *child, *tmp; > + int ret; > + > + pci_lock_rescan_remove(); > + > + /* Knock the devices off bus since we cannot access them */ > + list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) > + pci_stop_and_remove_bus_device(child); > + > + /* Now retrain the link in a controller specific way to bring it back */ > + if (bus->ops->retrain_link) { > + ret = bus->ops->retrain_link(bus); > + if (ret) { > + dev_err(&bridge->dev, "Failed to retrain the link!\n"); > + pci_unlock_rescan_remove(); > + return; > + } > + } > + > + pci_rescan_bus(bus); > + pci_unlock_rescan_remove(); > +} > +EXPORT_SYMBOL(pci_host_bridge_handle_link_down); > + > /* Indexed by PCI_X_SSTATUS_FREQ (secondary bus mode and frequency) */ > static const unsigned char pcix_bus_speed[] = { > PCI_SPEED_UNKNOWN, /* 0 */ > diff --git a/include/linux/pci.h b/include/linux/pci.h index > 47b31ad724fa..1c6f18a51bdd 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -637,6 +637,7 @@ struct pci_host_bridge *pci_alloc_host_bridge(size_t > priv); struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev, > size_t priv); void pci_free_host_bridge(struct > pci_host_bridge *bridge); > +void pci_host_bridge_handle_link_down(struct pci_host_bridge *bridge); > struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus); > > void pci_set_host_bridge_release(struct pci_host_bridge *bridge, @@ -804,6 > +805,7 @@ struct pci_ops { > void __iomem *(*map_bus)(struct pci_bus *bus, unsigned int devfn, int > where); > int (*read)(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 > *val); > int (*write)(struct pci_bus *bus, unsigned int devfn, int where, int size, > u32 val); > + int (*retrain_link)(struct pci_bus *bus); > }; > > /* > ``` > > Your controller driver has to call pci_host_bridge_handle_link_down() during > the link down event (make it threaded if not done already). Then you should > also populate the pci_ops::retrain_link() callback with the function that > retrains the broken link. Finally, the bus will be rescanned to enumerate the > devices. > > I do have plans to plug this retrain callback to one of the bus_reset() functions > in the future so that we can bring the link back while doing bus level reset > (uncorrectable AERs and such). But this will do the job for now. > > I will send a series on Monday with Qcom driver as a reference. > > - Mani > Sounds great. Looking forward to your patches. Thanks a lot! > -- > மணிவண்ணன் சதாசிவம்
diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c index 07775539b321..b1b48c2016f7 100644 --- a/drivers/pci/controller/dwc/pcie-armada8k.c +++ b/drivers/pci/controller/dwc/pcie-armada8k.c @@ -21,6 +21,8 @@ #include <linux/platform_device.h> #include <linux/resource.h> #include <linux/of_pci.h> +#include <linux/mfd/syscon.h> +#include <linux/regmap.h> #include "pcie-designware.h" @@ -32,6 +34,9 @@ struct armada8k_pcie { struct clk *clk_reg; struct phy *phy[ARMADA8K_PCIE_MAX_LANES]; unsigned int phy_count; + struct regmap *sysctrl_base; + u32 mac_rest_bitmask; + struct work_struct recover_link_work; }; #define PCIE_VENDOR_REGS_OFFSET 0x8000 @@ -72,6 +77,8 @@ struct armada8k_pcie { #define AX_USER_DOMAIN_MASK 0x3 #define AX_USER_DOMAIN_SHIFT 4 +#define UNIT_SOFT_RESET_CONFIG_REG 0x268 + #define to_armada8k_pcie(x) dev_get_drvdata((x)->dev) static void armada8k_pcie_disable_phys(struct armada8k_pcie *pcie) @@ -216,6 +223,65 @@ static int armada8k_pcie_host_init(struct dw_pcie_rp *pp) return 0; } +static void armada8k_pcie_recover_link(struct work_struct *ws) +{ + struct armada8k_pcie *pcie = container_of(ws, struct armada8k_pcie, recover_link_work); + struct dw_pcie_rp *pp = &pcie->pci->pp; + struct pci_bus *bus = pp->bridge->bus; + struct pci_dev *root_port; + int ret; + + root_port = pci_get_slot(bus, 0); + if (!root_port) { + dev_err(pcie->pci->dev, "failed to get root port\n"); + return; + } + pci_lock_rescan_remove(); + pci_stop_and_remove_bus_device(root_port); + /* + * Sleep needed to make sure all pcie transactions and access + * are flushed before resetting the mac + */ + msleep(100); + + /* Reset mac */ + regmap_update_bits_base(pcie->sysctrl_base, UNIT_SOFT_RESET_CONFIG_REG, + pcie->mac_rest_bitmask, 0, NULL, false, true); + udelay(1); + regmap_update_bits_base(pcie->sysctrl_base, UNIT_SOFT_RESET_CONFIG_REG, + pcie->mac_rest_bitmask, pcie->mac_rest_bitmask, + NULL, false, true); + udelay(1); + + ret = dw_pcie_setup_rc(pp); + if (ret) + goto fail; + + ret = armada8k_pcie_host_init(pp); + if (ret) { + dev_err(pcie->pci->dev, "failed to initialize host: %d\n", ret); + goto fail; + } + + if (!dw_pcie_link_up(pcie->pci)) { + ret = dw_pcie_start_link(pcie->pci); + if (ret) + goto fail; + } + + /* Wait until the link becomes active again */ + if (dw_pcie_wait_for_link(pcie->pci)) + dev_err(pcie->pci->dev, "Link not up after reconfiguration\n"); + + bus = NULL; + while ((bus = pci_find_next_bus(bus)) != NULL) + pci_rescan_bus(bus); + +fail: + pci_unlock_rescan_remove(); + pci_dev_put(root_port); +} + static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) { struct armada8k_pcie *pcie = arg; @@ -253,6 +319,9 @@ static irqreturn_t armada8k_pcie_irq_handler(int irq, void *arg) * initiate a link retrain. If link retrains were * possible, that is. */ + if (pcie->sysctrl_base && pcie->mac_rest_bitmask) + schedule_work(&pcie->recover_link_work); + dev_dbg(pci->dev, "%s: link went down\n", __func__); } @@ -322,6 +391,8 @@ static int armada8k_pcie_probe(struct platform_device *pdev) pcie->pci = pci; + INIT_WORK(&pcie->recover_link_work, armada8k_pcie_recover_link); + pcie->clk = devm_clk_get(dev, NULL); if (IS_ERR(pcie->clk)) return PTR_ERR(pcie->clk); @@ -349,6 +420,19 @@ static int armada8k_pcie_probe(struct platform_device *pdev) goto fail_clkreg; } + pcie->sysctrl_base = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "marvell,system-controller"); + if (IS_ERR(pcie->sysctrl_base)) { + dev_warn(dev, "failed to find marvell,system-controller\n"); + pcie->sysctrl_base = 0x0; + } + + ret = of_property_read_u32(pdev->dev.of_node, "marvell,mac-reset-bit-mask", + &pcie->mac_rest_bitmask); + if (ret < 0) { + dev_warn(dev, "couldn't find mac reset bit mask: %d\n", ret); + pcie->mac_rest_bitmask = 0x0; + } ret = armada8k_pcie_setup_phys(pcie); if (ret) goto fail_clkreg;
In PCIE ISR routine caused by RST_LINK_DOWN we schedule work to handle the link-down procedure. Link-down procedure will: 1. Remove PCIe bus 2. Reset the MAC 3. Reconfigure link back up 4. Rescan PCIe bus Signed-off-by: Jenishkumar Maheshbhai Patel <jpatel2@marvell.com> --- drivers/pci/controller/dwc/pcie-armada8k.c | 84 ++++++++++++++++++++++ 1 file changed, 84 insertions(+)