@@ -17669,6 +17669,12 @@ F: include/linux/of_pci.h
F: include/linux/pci*
F: include/uapi/linux/pci*
+PCIE BANDWIDTH CONTROLLER
+M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+L: linux-pci@vger.kernel.org
+S: Supported
+F: drivers/pci/pcie/bwctrl.c
+
PCIE DRIVER FOR AMAZON ANNAPURNA LABS
M: Jonathan Chocron <jonnyc@amazon.com>
L: linux-pci@vger.kernel.org
@@ -19,6 +19,8 @@
#include <linux/types.h>
#include <linux/pm_runtime.h>
#include <linux/pci.h>
+
+#include "../pci.h"
#include "pciehp.h"
/* The following routines constitute the bulk of the
@@ -127,6 +129,9 @@ static void remove_board(struct controller *ctrl, bool safe_removal)
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
INDICATOR_NOOP);
+
+ /* Don't carry LBMS indications across */
+ pcie_reset_lbms_count(ctrl->pcie->port);
}
static int pciehp_enable_slot(struct controller *ctrl);
@@ -4724,7 +4724,7 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt)
* to track link speed or width changes made by hardware itself
* in attempt to correct unreliable link operation.
*/
- pcie_capability_write_word(pdev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
+ pcie_reset_lbms_count(pdev);
return rc;
}
@@ -671,6 +671,21 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
static inline void pcie_ecrc_get_policy(char *str) { }
#endif
+#ifdef CONFIG_PCIE_BWCTRL
+void pcie_reset_lbms_count(struct pci_dev *port);
+int pcie_lbms_count(struct pci_dev *port, unsigned long *val);
+#elif defined(CONFIG_PCI)
+static inline void pcie_reset_lbms_count(struct pci_dev *port)
+{
+ pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
+}
+
+static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
struct pci_dev_reset_methods {
u16 vendor;
u16 device;
@@ -146,6 +146,18 @@ config PCIE_PTM
This is only useful if you have devices that support PTM, but it
is safe to enable even if you don't.
+config PCIE_BWCTRL
+ bool "PCI Express Bandwidth Controller"
+ depends on PCIEPORTBUS
+ help
+ This enables PCI Express Bandwidth Controller. The Bandwidth
+ Controller allows controlling PCIe Link Speed and listens for Link
+ Bandwidth Change Notifications. The current Link Speed is available
+ through /sys/bus/pci/devices/.../current_link_speed.
+
+ If you know Link Width or Speed changes occur (e.g., to correct
+ unreliable links), you may answer Y.
+
config PCIE_EDR
bool "PCI Express Error Disconnect Recover support"
depends on PCIE_DPC && ACPI
@@ -12,4 +12,5 @@ obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o
obj-$(CONFIG_PCIE_PME) += pme.o
obj-$(CONFIG_PCIE_DPC) += dpc.o
obj-$(CONFIG_PCIE_PTM) += ptm.o
+obj-$(CONFIG_PCIE_BWCTRL) += bwctrl.o
obj-$(CONFIG_PCIE_EDR) += edr.o
new file mode 100644
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PCIe bandwidth controller
+ *
+ * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
+ *
+ * Copyright (C) 2019 Dell Inc
+ * Copyright (C) 2023-2024 Intel Corporation
+ *
+ * This service port driver hooks into the Bandwidth Notification interrupt
+ * watching for changes or links becoming degraded in operation. It updates
+ * the cached Current Link Speed that is exposed to user space through sysfs.
+ */
+
+#define dev_fmt(fmt) "bwctrl: " fmt
+
+#include <linux/atomic.h>
+#include <linux/cleanup.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "../pci.h"
+#include "portdrv.h"
+
+/**
+ * struct pcie_bwctrl_data - PCIe bandwidth controller
+ * @lbms_count: Count for LBMS (since last reset)
+ */
+struct pcie_bwctrl_data {
+ atomic_t lbms_count;
+};
+
+/* Prevents port removal during link speed changes and LBMS count accessors */
+static DECLARE_RWSEM(pcie_bwctrl_remove_rwsem);
+
+static void pcie_bwnotif_enable(struct pcie_device *srv)
+{
+ struct pcie_bwctrl_data *data = get_service_data(srv);
+ struct pci_dev *port = srv->port;
+ u16 link_status;
+ int ret;
+
+ /* Count LBMS seen so far as one */
+ ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
+ if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
+ atomic_inc(&data->lbms_count);
+
+ pcie_capability_set_word(port, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
+ pcie_capability_write_word(port, PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
+
+ /*
+ * Update after enabling notifications & clearing status bits ensures
+ * link speed is up to date.
+ */
+ pcie_update_link_speed(port->subordinate);
+}
+
+static void pcie_bwnotif_disable(struct pci_dev *port)
+{
+ pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
+}
+
+static irqreturn_t pcie_bwnotif_irq_thread(int irq, void *context)
+{
+ struct pcie_device *srv = context;
+ struct pcie_bwctrl_data *data = get_service_data(srv);
+ struct pci_dev *port = srv->port;
+ u16 link_status, events;
+ int ret;
+
+ ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
+ if (ret != PCIBIOS_SUCCESSFUL)
+ return IRQ_NONE;
+
+ events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
+ if (!events)
+ return IRQ_NONE;
+
+ if (events & PCI_EXP_LNKSTA_LBMS)
+ atomic_inc(&data->lbms_count);
+
+ pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
+
+ /*
+ * Interrupts will not be triggered from any further Link Speed
+ * change until LBMS is cleared by the write. Therefore, re-read the
+ * speed (inside pcie_update_link_speed()) after LBMS has been
+ * cleared to avoid missing link speed changes.
+ */
+ pcie_update_link_speed(port->subordinate);
+
+ return IRQ_HANDLED;
+}
+
+void pcie_reset_lbms_count(struct pci_dev *port)
+{
+ struct pcie_bwctrl_data *data;
+
+ guard(rwsem_read)(&pcie_bwctrl_remove_rwsem);
+ data = port->link_bwctrl;
+ if (data)
+ atomic_set(&data->lbms_count, 0);
+ else
+ pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
+}
+
+int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
+{
+ struct pcie_bwctrl_data *data;
+
+ guard(rwsem_read)(&pcie_bwctrl_remove_rwsem);
+ data = port->link_bwctrl;
+ if (!data)
+ return -ENOTTY;
+
+ *val = atomic_read(&data->lbms_count);
+
+ return 0;
+}
+
+static int pcie_bwnotif_probe(struct pcie_device *srv)
+{
+ struct pci_dev *port = srv->port;
+ int ret;
+
+ struct pcie_bwctrl_data *data __free(kfree) =
+ kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ set_service_data(srv, data);
+
+ ret = request_threaded_irq(srv->irq, NULL, pcie_bwnotif_irq_thread,
+ IRQF_SHARED | IRQF_ONESHOT, "PCIe bwctrl", srv);
+ if (ret)
+ return ret;
+
+ port->link_bwctrl = no_free_ptr(data);
+ pcie_bwnotif_enable(srv);
+
+ pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
+
+ return 0;
+}
+
+static void pcie_bwnotif_remove(struct pcie_device *srv)
+{
+ struct pcie_bwctrl_data *data = get_service_data(srv);
+
+ pcie_bwnotif_disable(srv->port);
+ scoped_guard(rwsem_write, &pcie_bwctrl_remove_rwsem)
+ srv->port->link_bwctrl = NULL;
+
+ free_irq(srv->irq, srv);
+ kfree(data);
+}
+
+static int pcie_bwnotif_suspend(struct pcie_device *srv)
+{
+ pcie_bwnotif_disable(srv->port);
+ return 0;
+}
+
+static int pcie_bwnotif_resume(struct pcie_device *srv)
+{
+ pcie_bwnotif_enable(srv);
+ return 0;
+}
+
+static struct pcie_port_service_driver pcie_bwctrl_driver = {
+ .name = "pcie_bwctrl",
+ .port_type = PCIE_ANY_PORT,
+ .service = PCIE_PORT_SERVICE_BWCTRL,
+ .probe = pcie_bwnotif_probe,
+ .suspend = pcie_bwnotif_suspend,
+ .resume = pcie_bwnotif_resume,
+ .remove = pcie_bwnotif_remove,
+};
+
+int __init pcie_bwctrl_init(void)
+{
+ return pcie_port_service_register(&pcie_bwctrl_driver);
+}
@@ -68,7 +68,7 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask,
*/
if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
- PCIE_PORT_SERVICE_BWNOTIF)) {
+ PCIE_PORT_SERVICE_BWCTRL)) {
pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16);
*pme = FIELD_GET(PCI_EXP_FLAGS_IRQ, reg16);
nvec = *pme + 1;
@@ -150,11 +150,11 @@ static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask)
/* PME, hotplug and bandwidth notification share an MSI/MSI-X vector */
if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
- PCIE_PORT_SERVICE_BWNOTIF)) {
+ PCIE_PORT_SERVICE_BWCTRL)) {
pcie_irq = pci_irq_vector(dev, pme);
irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq;
irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq;
- irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq;
+ irqs[PCIE_PORT_SERVICE_BWCTRL_SHIFT] = pcie_irq;
}
if (mask & PCIE_PORT_SERVICE_AER)
@@ -271,7 +271,7 @@ static int get_port_device_capability(struct pci_dev *dev)
pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap);
if (linkcap & PCI_EXP_LNKCAP_LBNC)
- services |= PCIE_PORT_SERVICE_BWNOTIF;
+ services |= PCIE_PORT_SERVICE_BWCTRL;
}
return services;
@@ -828,6 +828,7 @@ static void __init pcie_init_services(void)
pcie_aer_init();
pcie_pme_init();
pcie_dpc_init();
+ pcie_bwctrl_init();
pcie_hp_init();
}
@@ -20,8 +20,8 @@
#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT)
#define PCIE_PORT_SERVICE_DPC_SHIFT 3 /* Downstream Port Containment */
#define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT)
-#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT 4 /* Bandwidth notification */
-#define PCIE_PORT_SERVICE_BWNOTIF (1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT)
+#define PCIE_PORT_SERVICE_BWCTRL_SHIFT 4 /* Bandwidth Controller (notifications) */
+#define PCIE_PORT_SERVICE_BWCTRL (1 << PCIE_PORT_SERVICE_BWCTRL_SHIFT)
#define PCIE_PORT_DEVICE_MAXSERVICES 5
@@ -51,6 +51,12 @@ int pcie_dpc_init(void);
static inline int pcie_dpc_init(void) { return 0; }
#endif
+#ifdef CONFIG_PCIE_BWCTRL
+int pcie_bwctrl_init(void);
+#else
+static inline int pcie_bwctrl_init(void) { return 0; }
+#endif
+
/* Port Type */
#define PCIE_ANY_PORT (~0)
@@ -35,7 +35,14 @@
static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta)
{
- return lnksta & PCI_EXP_LNKSTA_LBMS;
+ unsigned long count;
+ int ret;
+
+ ret = pcie_lbms_count(dev, &count);
+ if (ret < 0)
+ return lnksta & PCI_EXP_LNKSTA_LBMS;
+
+ return count > 0;
}
/*
@@ -314,6 +314,7 @@ struct pci_vpd {
};
struct irq_affinity;
+struct pcie_bwctrl_data;
struct pcie_link_state;
struct pci_sriov;
struct pci_p2pdma;
@@ -502,6 +503,9 @@ struct pci_dev {
unsigned int dpc_rp_extensions:1;
u8 dpc_rp_log_size;
#endif
+#ifdef CONFIG_PCIE_BWCTRL
+ struct pcie_bwctrl_data *link_bwctrl;
+#endif
#ifdef CONFIG_PCI_ATS
union {
struct pci_sriov *sriov; /* PF: SR-IOV info */