Message ID | 1818424.8fNkf5pBy3@vostro.rjw.lan |
---|---|
State | Not Applicable |
Headers | show |
On Thu, 2013-07-18 at 01:32 +0200, Rafael J. Wysocki wrote: > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > The current implementation of acpiphp_check_bridge() is pretty dumb: > - It enables a slot if it's not enabled and the slot status is > ACPI_STA_ALL. > - It disables a slot if it's enabled and the slot status is not > ACPI_STA_ALL. > > This behavior is not sufficient to handle the Thunderbolt daisy > chaining case properly, however, because in that case the bus > behind the already enabled slot needs to be rescanned for new > devices. > > For this reason, modify acpiphp_check_bridge() so that slots are > disabled and stopped if they are not in the ACPI_STA_ALL state. > > For slots in the ACPI_STA_ALL state, devices behind them that don't > respond are trimmed using a new function, trim_stale_devices(), > introduced specifically for this purpose. That function walks > the given bus and checks each device on it. If the device doesn't > respond, it is assumed to be gone and is removed. > > Once all of the stale devices directy behind the slot have been > removed, acpiphp_check_bridge() will start looking for new devices > that might have appeared on the given bus. It will do that even if > the slot is already enabled (SLOT_ENABLED is set for it). > > In addition to that, make the bus check notification ignore > SLOT_ENABLED and go for enable_device() directly if bridge is NULL, > so that devices behind the slot are re-enumerated in that case too. > > This change is based on earlier patches from Kirill A Shutemov > and Mika Westerberg. > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com> > --- FYI, git bisect landed on this patch as the cause of my serial console dying on current upstream. Further debugging to come... Thanks, Alex > drivers/pci/hotplug/acpiphp_glue.c | 87 +++++++++++++++++++++++++------------ > 1 file changed, 60 insertions(+), 27 deletions(-) > > Index: linux-pm/drivers/pci/hotplug/acpiphp_glue.c > =================================================================== > --- linux-pm.orig/drivers/pci/hotplug/acpiphp_glue.c > +++ linux-pm/drivers/pci/hotplug/acpiphp_glue.c > @@ -46,6 +46,7 @@ > #include <linux/pci.h> > #include <linux/pci_hotplug.h> > #include <linux/pci-acpi.h> > +#include <linux/pm_runtime.h> > #include <linux/mutex.h> > #include <linux/slab.h> > #include <linux/acpi.h> > @@ -687,47 +688,75 @@ static unsigned int get_slot_status(stru > } > > /** > + * trim_stale_devices - remove PCI devices that are not responding. > + * @dev: PCI device to start walking the hierarchy from. > + */ > +static void trim_stale_devices(struct pci_dev *dev) > +{ > + acpi_handle handle = ACPI_HANDLE(&dev->dev); > + struct pci_bus *bus = dev->subordinate; > + bool alive = false; > + > + if (handle) { > + acpi_status status; > + unsigned long long sta; > + > + status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); > + alive = ACPI_SUCCESS(status) && sta == ACPI_STA_ALL; > + } > + if (!alive) { > + u32 v; > + > + /* Check if the device responds. */ > + alive = pci_bus_read_dev_vendor_id(dev->bus, dev->devfn, &v, 0); > + } > + if (!alive) { > + pci_stop_and_remove_bus_device(dev); > + if (handle) > + acpiphp_bus_trim(handle); > + } else if (bus) { > + struct pci_dev *child, *tmp; > + > + /* The device is a bridge. so check the bus below it. */ > + pm_runtime_get_sync(&dev->dev); > + list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) > + trim_stale_devices(child); > + > + pm_runtime_put(&dev->dev); > + } > +} > + > +/** > * acpiphp_check_bridge - re-enumerate devices > * @bridge: where to begin re-enumeration > * > * Iterate over all slots under this bridge and make sure that if a > * card is present they are enabled, and if not they are disabled. > */ > -static int acpiphp_check_bridge(struct acpiphp_bridge *bridge) > +static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) > { > struct acpiphp_slot *slot; > - int retval = 0; > - int enabled, disabled; > - > - enabled = disabled = 0; > > list_for_each_entry(slot, &bridge->slots, node) { > - unsigned int status = get_slot_status(slot); > - if (slot->flags & SLOT_ENABLED) { > - if (status == ACPI_STA_ALL) > - continue; > + struct pci_bus *bus = slot->bus; > + struct pci_dev *dev, *tmp; > > - retval = acpiphp_disable_and_eject_slot(slot); > - if (retval) > - goto err_exit; > + mutex_lock(&slot->crit_sect); > + /* wake up all functions */ > + if (get_slot_status(slot) == ACPI_STA_ALL) { > + /* remove stale devices if any */ > + list_for_each_entry_safe(dev, tmp, &bus->devices, > + bus_list) > + if (PCI_SLOT(dev->devfn) == slot->device) > + trim_stale_devices(dev); > > - disabled++; > + /* configure all functions */ > + enable_device(slot); > } else { > - if (status != ACPI_STA_ALL) > - continue; > - retval = acpiphp_enable_slot(slot); > - if (retval) { > - err("Error occurred in enabling\n"); > - goto err_exit; > - } > - enabled++; > + disable_device(slot); > } > + mutex_unlock(&slot->crit_sect); > } > - > - dbg("%s: %d enabled, %d disabled\n", __func__, enabled, disabled); > - > - err_exit: > - return retval; > } > > static void acpiphp_set_hpp_values(struct pci_bus *bus) > @@ -828,7 +857,11 @@ static void hotplug_event(acpi_handle ha > ACPI_UINT32_MAX, check_sub_bridges, > NULL, NULL, NULL); > } else { > - acpiphp_enable_slot(func->slot); > + struct acpiphp_slot *slot = func->slot; > + > + mutex_lock(&slot->crit_sect); > + enable_device(slot); > + mutex_unlock(&slot->crit_sect); > } > break; > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wednesday, September 04, 2013 02:36:34 PM Alex Williamson wrote: > On Thu, 2013-07-18 at 01:32 +0200, Rafael J. Wysocki wrote: > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > The current implementation of acpiphp_check_bridge() is pretty dumb: > > - It enables a slot if it's not enabled and the slot status is > > ACPI_STA_ALL. > > - It disables a slot if it's enabled and the slot status is not > > ACPI_STA_ALL. > > > > This behavior is not sufficient to handle the Thunderbolt daisy > > chaining case properly, however, because in that case the bus > > behind the already enabled slot needs to be rescanned for new > > devices. > > > > For this reason, modify acpiphp_check_bridge() so that slots are > > disabled and stopped if they are not in the ACPI_STA_ALL state. > > > > For slots in the ACPI_STA_ALL state, devices behind them that don't > > respond are trimmed using a new function, trim_stale_devices(), > > introduced specifically for this purpose. That function walks > > the given bus and checks each device on it. If the device doesn't > > respond, it is assumed to be gone and is removed. > > > > Once all of the stale devices directy behind the slot have been > > removed, acpiphp_check_bridge() will start looking for new devices > > that might have appeared on the given bus. It will do that even if > > the slot is already enabled (SLOT_ENABLED is set for it). > > > > In addition to that, make the bus check notification ignore > > SLOT_ENABLED and go for enable_device() directly if bridge is NULL, > > so that devices behind the slot are re-enumerated in that case too. > > > > This change is based on earlier patches from Kirill A Shutemov > > and Mika Westerberg. > > > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com> > > --- > > FYI, git bisect landed on this patch as the cause of my serial console > dying on current upstream. Further debugging to come... Thanks, Well, sorry about that. What exactly do you mean by "dying"? Rafael -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2013-09-05 at 00:54 +0200, Rafael J. Wysocki wrote: > On Wednesday, September 04, 2013 02:36:34 PM Alex Williamson wrote: > > On Thu, 2013-07-18 at 01:32 +0200, Rafael J. Wysocki wrote: > > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > > > The current implementation of acpiphp_check_bridge() is pretty dumb: > > > - It enables a slot if it's not enabled and the slot status is > > > ACPI_STA_ALL. > > > - It disables a slot if it's enabled and the slot status is not > > > ACPI_STA_ALL. > > > > > > This behavior is not sufficient to handle the Thunderbolt daisy > > > chaining case properly, however, because in that case the bus > > > behind the already enabled slot needs to be rescanned for new > > > devices. > > > > > > For this reason, modify acpiphp_check_bridge() so that slots are > > > disabled and stopped if they are not in the ACPI_STA_ALL state. > > > > > > For slots in the ACPI_STA_ALL state, devices behind them that don't > > > respond are trimmed using a new function, trim_stale_devices(), > > > introduced specifically for this purpose. That function walks > > > the given bus and checks each device on it. If the device doesn't > > > respond, it is assumed to be gone and is removed. > > > > > > Once all of the stale devices directy behind the slot have been > > > removed, acpiphp_check_bridge() will start looking for new devices > > > that might have appeared on the given bus. It will do that even if > > > the slot is already enabled (SLOT_ENABLED is set for it). > > > > > > In addition to that, make the bus check notification ignore > > > SLOT_ENABLED and go for enable_device() directly if bridge is NULL, > > > so that devices behind the slot are re-enumerated in that case too. > > > > > > This change is based on earlier patches from Kirill A Shutemov > > > and Mika Westerberg. > > > > > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com> > > > --- > > > > FYI, git bisect landed on this patch as the cause of my serial console > > dying on current upstream. Further debugging to come... Thanks, > > Well, sorry about that. > > What exactly do you mean by "dying"? Sorry, I was hoping to have more details quickly, but it's been a pain to debug. By dying I mean serial console output suddenly stops during kernel boot and nothing more comes out of it until after the system is rebooted. The problem happens when acpiphp_check_bridge() calls enable_slot(). The serial console dies somewhere down in acpiphp_bus_trim(). I think this is happening on the 00:1f ISA bridge, so there's a good chance the serial ports are described as somewhere under there. Thanks, Alex -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wednesday, September 04, 2013 05:12:14 PM Alex Williamson wrote: > On Thu, 2013-09-05 at 00:54 +0200, Rafael J. Wysocki wrote: > > On Wednesday, September 04, 2013 02:36:34 PM Alex Williamson wrote: > > > On Thu, 2013-07-18 at 01:32 +0200, Rafael J. Wysocki wrote: > > > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > > > > > The current implementation of acpiphp_check_bridge() is pretty dumb: > > > > - It enables a slot if it's not enabled and the slot status is > > > > ACPI_STA_ALL. > > > > - It disables a slot if it's enabled and the slot status is not > > > > ACPI_STA_ALL. > > > > > > > > This behavior is not sufficient to handle the Thunderbolt daisy > > > > chaining case properly, however, because in that case the bus > > > > behind the already enabled slot needs to be rescanned for new > > > > devices. > > > > > > > > For this reason, modify acpiphp_check_bridge() so that slots are > > > > disabled and stopped if they are not in the ACPI_STA_ALL state. > > > > > > > > For slots in the ACPI_STA_ALL state, devices behind them that don't > > > > respond are trimmed using a new function, trim_stale_devices(), > > > > introduced specifically for this purpose. That function walks > > > > the given bus and checks each device on it. If the device doesn't > > > > respond, it is assumed to be gone and is removed. > > > > > > > > Once all of the stale devices directy behind the slot have been > > > > removed, acpiphp_check_bridge() will start looking for new devices > > > > that might have appeared on the given bus. It will do that even if > > > > the slot is already enabled (SLOT_ENABLED is set for it). > > > > > > > > In addition to that, make the bus check notification ignore > > > > SLOT_ENABLED and go for enable_device() directly if bridge is NULL, > > > > so that devices behind the slot are re-enumerated in that case too. > > > > > > > > This change is based on earlier patches from Kirill A Shutemov > > > > and Mika Westerberg. > > > > > > > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com> > > > > --- > > > > > > FYI, git bisect landed on this patch as the cause of my serial console > > > dying on current upstream. Further debugging to come... Thanks, > > > > Well, sorry about that. > > > > What exactly do you mean by "dying"? > > Sorry, I was hoping to have more details quickly, but it's been a pain > to debug. By dying I mean serial console output suddenly stops during > kernel boot and nothing more comes out of it until after the system is > rebooted. The problem happens when acpiphp_check_bridge() calls > enable_slot(). The serial console dies somewhere down in > acpiphp_bus_trim(). I think this is happening on the 00:1f ISA bridge, > so there's a good chance the serial ports are described as somewhere > under there. Can you please check if that is the acpiphp_bus_trim() called by acpiphp_bus_add() or the other one called from trim_stale_devices()? Just add a dump_stack() or WARN_ON(1) to trim_stale_devices() next to the acpiphp_bus_trim() call and see if that triggers. I *think* it's the one in acpiphp_bus_add(), but it won't hurt to verify that. Thanks, Rafael -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2013-09-05 at 01:35 +0200, Rafael J. Wysocki wrote: > On Wednesday, September 04, 2013 05:12:14 PM Alex Williamson wrote: > > On Thu, 2013-09-05 at 00:54 +0200, Rafael J. Wysocki wrote: > > > On Wednesday, September 04, 2013 02:36:34 PM Alex Williamson wrote: > > > > On Thu, 2013-07-18 at 01:32 +0200, Rafael J. Wysocki wrote: > > > > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > > > > > > > The current implementation of acpiphp_check_bridge() is pretty dumb: > > > > > - It enables a slot if it's not enabled and the slot status is > > > > > ACPI_STA_ALL. > > > > > - It disables a slot if it's enabled and the slot status is not > > > > > ACPI_STA_ALL. > > > > > > > > > > This behavior is not sufficient to handle the Thunderbolt daisy > > > > > chaining case properly, however, because in that case the bus > > > > > behind the already enabled slot needs to be rescanned for new > > > > > devices. > > > > > > > > > > For this reason, modify acpiphp_check_bridge() so that slots are > > > > > disabled and stopped if they are not in the ACPI_STA_ALL state. > > > > > > > > > > For slots in the ACPI_STA_ALL state, devices behind them that don't > > > > > respond are trimmed using a new function, trim_stale_devices(), > > > > > introduced specifically for this purpose. That function walks > > > > > the given bus and checks each device on it. If the device doesn't > > > > > respond, it is assumed to be gone and is removed. > > > > > > > > > > Once all of the stale devices directy behind the slot have been > > > > > removed, acpiphp_check_bridge() will start looking for new devices > > > > > that might have appeared on the given bus. It will do that even if > > > > > the slot is already enabled (SLOT_ENABLED is set for it). > > > > > > > > > > In addition to that, make the bus check notification ignore > > > > > SLOT_ENABLED and go for enable_device() directly if bridge is NULL, > > > > > so that devices behind the slot are re-enumerated in that case too. > > > > > > > > > > This change is based on earlier patches from Kirill A Shutemov > > > > > and Mika Westerberg. > > > > > > > > > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > > Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com> > > > > > --- > > > > > > > > FYI, git bisect landed on this patch as the cause of my serial console > > > > dying on current upstream. Further debugging to come... Thanks, > > > > > > Well, sorry about that. > > > > > > What exactly do you mean by "dying"? > > > > Sorry, I was hoping to have more details quickly, but it's been a pain > > to debug. By dying I mean serial console output suddenly stops during > > kernel boot and nothing more comes out of it until after the system is > > rebooted. The problem happens when acpiphp_check_bridge() calls > > enable_slot(). The serial console dies somewhere down in > > acpiphp_bus_trim(). I think this is happening on the 00:1f ISA bridge, > > so there's a good chance the serial ports are described as somewhere > > under there. > > Can you please check if that is the acpiphp_bus_trim() called by > acpiphp_bus_add() or the other one called from trim_stale_devices()? > > Just add a dump_stack() or WARN_ON(1) to trim_stale_devices() next to > the acpiphp_bus_trim() call and see if that triggers. I *think* it's the one > in acpiphp_bus_add(), but it won't hurt to verify that. Here's the call path: [ 16.120824] [<ffffffff81627e6c>] dump_stack+0x55/0x76 [ 16.125979] [<ffffffff8162132e>] enable_slot+0x4ee/0x5e0 [ 16.131396] [<ffffffff813418fb>] ? trim_stale_devices+0x5b/0xf0 [ 16.137420] [<ffffffff81341b35>] acpiphp_check_bridge+0xd5/0x110 [ 16.143531] [<ffffffff81342acb>] hotplug_event+0x16b/0x260 [ 16.149115] [<ffffffff81072cd9>] ? process_one_work+0x189/0x540 [ 16.155136] [<ffffffff81342bf0>] hotplug_event_work+0x30/0x70 [ 16.160978] [<ffffffff81072d3b>] process_one_work+0x1eb/0x540 [ 16.166819] [<ffffffff81072cd9>] ? process_one_work+0x189/0x540 [ 16.172836] [<ffffffff8107353c>] worker_thread+0x11c/0x370 [ 16.178426] [<ffffffff81073420>] ? rescuer_thread+0x350/0x350 [ 16.184276] [<ffffffff8107b0ea>] kthread+0xea/0xf0 [ 16.189165] [<ffffffff8107b000>] ? kthread_create_on_node+0x160/0x160 [ 16.195700] [<ffffffff816395dc>] ret_from_fork+0x7c/0xb0 [ 16.201109] [<ffffffff8107b000>] ? kthread_create_on_node+0x160/0x160 The actual death of the serial console occurs in acpi_device_set_power() called from: enable_slot() acpiphp_bus_add() acpiphp_bus_trim() acpi_bus_trim() acpi_walk_namespace() acpi_bus_remove() acpi_device_unregister() acpi_device_set_power() I can't seem to get a path from the acpi devices in question there, so I have no idea what's getting trimmed here. It worries me quite a bit by introducing this trimming that apparently wasn't happening before though. Thanks, Alex -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2013-09-04 at 21:37 -0600, Alex Williamson wrote: > On Thu, 2013-09-05 at 01:35 +0200, Rafael J. Wysocki wrote: > > On Wednesday, September 04, 2013 05:12:14 PM Alex Williamson wrote: > > > On Thu, 2013-09-05 at 00:54 +0200, Rafael J. Wysocki wrote: > > > > On Wednesday, September 04, 2013 02:36:34 PM Alex Williamson wrote: > > > > > On Thu, 2013-07-18 at 01:32 +0200, Rafael J. Wysocki wrote: > > > > > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > > > > > > > > > The current implementation of acpiphp_check_bridge() is pretty dumb: > > > > > > - It enables a slot if it's not enabled and the slot status is > > > > > > ACPI_STA_ALL. > > > > > > - It disables a slot if it's enabled and the slot status is not > > > > > > ACPI_STA_ALL. > > > > > > > > > > > > This behavior is not sufficient to handle the Thunderbolt daisy > > > > > > chaining case properly, however, because in that case the bus > > > > > > behind the already enabled slot needs to be rescanned for new > > > > > > devices. > > > > > > > > > > > > For this reason, modify acpiphp_check_bridge() so that slots are > > > > > > disabled and stopped if they are not in the ACPI_STA_ALL state. > > > > > > > > > > > > For slots in the ACPI_STA_ALL state, devices behind them that don't > > > > > > respond are trimmed using a new function, trim_stale_devices(), > > > > > > introduced specifically for this purpose. That function walks > > > > > > the given bus and checks each device on it. If the device doesn't > > > > > > respond, it is assumed to be gone and is removed. > > > > > > > > > > > > Once all of the stale devices directy behind the slot have been > > > > > > removed, acpiphp_check_bridge() will start looking for new devices > > > > > > that might have appeared on the given bus. It will do that even if > > > > > > the slot is already enabled (SLOT_ENABLED is set for it). > > > > > > > > > > > > In addition to that, make the bus check notification ignore > > > > > > SLOT_ENABLED and go for enable_device() directly if bridge is NULL, > > > > > > so that devices behind the slot are re-enumerated in that case too. > > > > > > > > > > > > This change is based on earlier patches from Kirill A Shutemov > > > > > > and Mika Westerberg. > > > > > > > > > > > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > > > Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com> > > > > > > --- > > > > > > > > > > FYI, git bisect landed on this patch as the cause of my serial console > > > > > dying on current upstream. Further debugging to come... Thanks, > > > > > > > > Well, sorry about that. > > > > > > > > What exactly do you mean by "dying"? > > > > > > Sorry, I was hoping to have more details quickly, but it's been a pain > > > to debug. By dying I mean serial console output suddenly stops during > > > kernel boot and nothing more comes out of it until after the system is > > > rebooted. The problem happens when acpiphp_check_bridge() calls > > > enable_slot(). The serial console dies somewhere down in > > > acpiphp_bus_trim(). I think this is happening on the 00:1f ISA bridge, > > > so there's a good chance the serial ports are described as somewhere > > > under there. > > > > Can you please check if that is the acpiphp_bus_trim() called by > > acpiphp_bus_add() or the other one called from trim_stale_devices()? > > > > Just add a dump_stack() or WARN_ON(1) to trim_stale_devices() next to > > the acpiphp_bus_trim() call and see if that triggers. I *think* it's the one > > in acpiphp_bus_add(), but it won't hurt to verify that. > > Here's the call path: > > [ 16.120824] [<ffffffff81627e6c>] dump_stack+0x55/0x76 > [ 16.125979] [<ffffffff8162132e>] enable_slot+0x4ee/0x5e0 > [ 16.131396] [<ffffffff813418fb>] ? trim_stale_devices+0x5b/0xf0 > [ 16.137420] [<ffffffff81341b35>] acpiphp_check_bridge+0xd5/0x110 > [ 16.143531] [<ffffffff81342acb>] hotplug_event+0x16b/0x260 > [ 16.149115] [<ffffffff81072cd9>] ? process_one_work+0x189/0x540 > [ 16.155136] [<ffffffff81342bf0>] hotplug_event_work+0x30/0x70 > [ 16.160978] [<ffffffff81072d3b>] process_one_work+0x1eb/0x540 > [ 16.166819] [<ffffffff81072cd9>] ? process_one_work+0x189/0x540 > [ 16.172836] [<ffffffff8107353c>] worker_thread+0x11c/0x370 > [ 16.178426] [<ffffffff81073420>] ? rescuer_thread+0x350/0x350 > [ 16.184276] [<ffffffff8107b0ea>] kthread+0xea/0xf0 > [ 16.189165] [<ffffffff8107b000>] ? kthread_create_on_node+0x160/0x160 > [ 16.195700] [<ffffffff816395dc>] ret_from_fork+0x7c/0xb0 > [ 16.201109] [<ffffffff8107b000>] ? kthread_create_on_node+0x160/0x160 > > The actual death of the serial console occurs in acpi_device_set_power() > called from: > > enable_slot() > acpiphp_bus_add() > acpiphp_bus_trim() > acpi_bus_trim() > acpi_walk_namespace() > acpi_bus_remove() > acpi_device_unregister() > acpi_device_set_power() > > I can't seem to get a path from the acpi devices in question there, so I > have no idea what's getting trimmed here. It worries me quite a bit by > introducing this trimming that apparently wasn't happening before > though. Thanks, As suspected, the pnp.bus_id/id of the last device before the serial console dies is COM1/PNP0501. I also see all of these being trimmed out: MBRD/PNP0C02 DMAC/PNP0200 MATH/PNP0C04 PIC/PNP0000 HPET/PNP0103 RTC/PNP0B00 SPKR/PNP0800 TIME/PNP0100 LNK{A-H}/PNP0C0F This seems like a bad idea. I forgot to mention, the original hotplug_event is called with a device check on \_SB_.PCI0.PEX2. The box where I'm seeing this is a pretty generic X58 based Nehalem workstation (Lenovo S20). Thanks, Alex -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Index: linux-pm/drivers/pci/hotplug/acpiphp_glue.c =================================================================== --- linux-pm.orig/drivers/pci/hotplug/acpiphp_glue.c +++ linux-pm/drivers/pci/hotplug/acpiphp_glue.c @@ -46,6 +46,7 @@ #include <linux/pci.h> #include <linux/pci_hotplug.h> #include <linux/pci-acpi.h> +#include <linux/pm_runtime.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/acpi.h> @@ -687,47 +688,75 @@ static unsigned int get_slot_status(stru } /** + * trim_stale_devices - remove PCI devices that are not responding. + * @dev: PCI device to start walking the hierarchy from. + */ +static void trim_stale_devices(struct pci_dev *dev) +{ + acpi_handle handle = ACPI_HANDLE(&dev->dev); + struct pci_bus *bus = dev->subordinate; + bool alive = false; + + if (handle) { + acpi_status status; + unsigned long long sta; + + status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); + alive = ACPI_SUCCESS(status) && sta == ACPI_STA_ALL; + } + if (!alive) { + u32 v; + + /* Check if the device responds. */ + alive = pci_bus_read_dev_vendor_id(dev->bus, dev->devfn, &v, 0); + } + if (!alive) { + pci_stop_and_remove_bus_device(dev); + if (handle) + acpiphp_bus_trim(handle); + } else if (bus) { + struct pci_dev *child, *tmp; + + /* The device is a bridge. so check the bus below it. */ + pm_runtime_get_sync(&dev->dev); + list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) + trim_stale_devices(child); + + pm_runtime_put(&dev->dev); + } +} + +/** * acpiphp_check_bridge - re-enumerate devices * @bridge: where to begin re-enumeration * * Iterate over all slots under this bridge and make sure that if a * card is present they are enabled, and if not they are disabled. */ -static int acpiphp_check_bridge(struct acpiphp_bridge *bridge) +static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) { struct acpiphp_slot *slot; - int retval = 0; - int enabled, disabled; - - enabled = disabled = 0; list_for_each_entry(slot, &bridge->slots, node) { - unsigned int status = get_slot_status(slot); - if (slot->flags & SLOT_ENABLED) { - if (status == ACPI_STA_ALL) - continue; + struct pci_bus *bus = slot->bus; + struct pci_dev *dev, *tmp; - retval = acpiphp_disable_and_eject_slot(slot); - if (retval) - goto err_exit; + mutex_lock(&slot->crit_sect); + /* wake up all functions */ + if (get_slot_status(slot) == ACPI_STA_ALL) { + /* remove stale devices if any */ + list_for_each_entry_safe(dev, tmp, &bus->devices, + bus_list) + if (PCI_SLOT(dev->devfn) == slot->device) + trim_stale_devices(dev); - disabled++; + /* configure all functions */ + enable_device(slot); } else { - if (status != ACPI_STA_ALL) - continue; - retval = acpiphp_enable_slot(slot); - if (retval) { - err("Error occurred in enabling\n"); - goto err_exit; - } - enabled++; + disable_device(slot); } + mutex_unlock(&slot->crit_sect); } - - dbg("%s: %d enabled, %d disabled\n", __func__, enabled, disabled); - - err_exit: - return retval; } static void acpiphp_set_hpp_values(struct pci_bus *bus) @@ -828,7 +857,11 @@ static void hotplug_event(acpi_handle ha ACPI_UINT32_MAX, check_sub_bridges, NULL, NULL, NULL); } else { - acpiphp_enable_slot(func->slot); + struct acpiphp_slot *slot = func->slot; + + mutex_lock(&slot->crit_sect); + enable_device(slot); + mutex_unlock(&slot->crit_sect); } break;