@@ -8,6 +8,8 @@
#define ASM_POWERPC_EEH_EVENT_H
#ifdef __KERNEL__
+#include <linux/workqueue.h>
+
/*
* structure holding pci controller data that describes a
* change in the isolation status of a PCI slot. A pointer
@@ -15,15 +17,20 @@
* callback.
*/
struct eeh_event {
+ struct work_struct work;
struct list_head list; /* to form event queue */
struct eeh_pe *pe; /* EEH PE */
};
+extern spinlock_t eeh_eventlist_lock;
+
int eeh_event_init(void);
+int eeh_phb_event(struct eeh_pe *pe);
int eeh_send_failure_event(struct eeh_pe *pe);
int __eeh_send_failure_event(struct eeh_pe *pe);
void eeh_remove_event(struct eeh_pe *pe, bool force);
void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_normal_event_work(struct work_struct *work);
void eeh_handle_special_event(void);
#endif /* __KERNEL__ */
@@ -138,6 +138,10 @@ struct pci_controller {
/* iommu_ops support */
struct iommu_device iommu;
+
+ bool eeh_in_progress;
+ struct list_head eeh_eventlist;
+ spinlock_t eeh_eventlist_lock;
};
/* These are used for config access before all the PCI probing
@@ -1116,6 +1116,30 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
+void eeh_handle_normal_event_work(struct work_struct *work)
+{
+ unsigned long flags;
+ struct eeh_event *event = container_of(work, struct eeh_event, work);
+ struct pci_controller *phb = event->pe->phb;
+
+ eeh_handle_normal_event(event->pe);
+
+ kfree(event);
+ spin_lock_irqsave(&phb->eeh_eventlist_lock, flags);
+ WARN_ON_ONCE(!phb->eeh_in_progress);
+ if (list_empty(&phb->eeh_eventlist)) {
+ phb->eeh_in_progress = false;
+ pr_debug("EEH: No more work to do\n");
+ } else {
+ pr_warn("EEH: More work to do\n");
+ event = list_entry(phb->eeh_eventlist.next,
+ struct eeh_event, list);
+ list_del(&event->list);
+ queue_work(system_unbound_wq, &event->work);
+ }
+ spin_unlock_irqrestore(&phb->eeh_eventlist_lock, flags);
+}
+
/**
* eeh_handle_special_event - Handle EEH events without a specific failing PE
*
@@ -1185,8 +1209,7 @@ void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
- eeh_handle_normal_event(pe);
+ eeh_phb_event(pe);
} else {
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
@@ -22,7 +22,7 @@
* work-queue, where a worker thread can drive recovery.
*/
-static DEFINE_SPINLOCK(eeh_eventlist_lock);
+DEFINE_SPINLOCK(eeh_eventlist_lock);
static DECLARE_COMPLETION(eeh_eventlist_event);
static LIST_HEAD(eeh_eventlist);
@@ -91,6 +91,42 @@ int eeh_event_init(void)
return 0;
}
+int eeh_phb_event(struct eeh_pe *pe)
+{
+ struct eeh_event *event;
+ unsigned long flags;
+ struct pci_controller *phb;
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event)
+ return -ENOMEM;
+
+ if (pe) {
+ phb = pe->phb;
+ event->pe = pe;
+ INIT_WORK(&event->work, eeh_handle_normal_event_work);
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ pr_err("EEH: EVENT=ERROR_DETECTED PHB=%#x PE=%#x\n",
+ phb->global_number, pe->addr);
+ spin_lock_irqsave(&phb->eeh_eventlist_lock, flags);
+ if (phb->eeh_in_progress) {
+ pr_info("EEH: EEH already in progress on this PHB, queueing.\n");
+ list_add(&event->list, &phb->eeh_eventlist);
+ } else {
+ pr_info("EEH: Beginning recovery on this PHB.\n");
+ WARN_ON_ONCE(!list_empty(&phb->eeh_eventlist));
+ phb->eeh_in_progress = true;
+ queue_work(system_unbound_wq, &event->work);
+ }
+ spin_unlock_irqrestore(&phb->eeh_eventlist_lock, flags);
+ } else {
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ list_add(&event->list, &eeh_eventlist);
+ complete(&eeh_eventlist_event);
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+ }
+ return 0;
+}
/**
* eeh_send_failure_event - Generate a PCI error event
* @pe: EEH PE
@@ -101,16 +137,6 @@ int eeh_event_init(void)
*/
int __eeh_send_failure_event(struct eeh_pe *pe)
{
- unsigned long flags;
- struct eeh_event *event;
-
- event = kzalloc(sizeof(*event), GFP_ATOMIC);
- if (!event) {
- pr_err("EEH: out of memory, event not handled\n");
- return -ENOMEM;
- }
- event->pe = pe;
-
/*
* Mark the PE as recovering before inserting it in the queue.
* This prevents the PE from being free()ed by a hotplug driver
@@ -128,16 +154,7 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
}
-
- /* We may or may not be called in an interrupt context */
- spin_lock_irqsave(&eeh_eventlist_lock, flags);
- list_add(&event->list, &eeh_eventlist);
- spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-
- /* For EEH deamon to knick in */
- complete(&eeh_eventlist_event);
-
- return 0;
+ return eeh_phb_event(pe);
}
int eeh_send_failure_event(struct eeh_pe *pe)
@@ -81,6 +81,10 @@ int eeh_phb_pe_create(struct pci_controller *phb)
{
struct eeh_pe *pe;
+ phb->eeh_in_progress = false;
+ INIT_LIST_HEAD(&phb->eeh_eventlist);
+ spin_lock_init(&phb->eeh_eventlist_lock);
+
/* Allocate PHB PE */
pe = eeh_pe_alloc(phb, EEH_PE_PHB);
if (!pe) {
Currnetly, with a single event queue EEH recovery is entirely serialized and takes place within a single kernel thread. This can cause recovery to take a long time when there are many devices. Have the recovery event queue per PHB and allow the recovery to happen in parallel for all the PHBs. Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> --- v2: Include missing hunk, which modifies __eeh_send_failure_event. --- arch/powerpc/include/asm/eeh_event.h | 7 ++++ arch/powerpc/include/asm/pci-bridge.h | 4 ++ arch/powerpc/kernel/eeh_driver.c | 27 +++++++++++- arch/powerpc/kernel/eeh_event.c | 59 +++++++++++++++++---------- arch/powerpc/kernel/eeh_pe.c | 4 ++ 5 files changed, 78 insertions(+), 23 deletions(-)