diff mbox

[libsas,v13] scsi, sd: limit the scope of the async probe domain

Message ID 20120323000201.17750.18754.stgit@dwillia2-linux.jf.intel.com
State Not Applicable
Delegated to: David Miller
Headers show

Commit Message

Dan Williams March 23, 2012, 12:05 a.m. UTC
sd injects and synchronizes probe work on the global kernel-wide domain.
This runs into conflict with PM that wants to perform resume actions in
async context:

[  494.237079] INFO: task kworker/u:3:554 blocked for more than 120 seconds.
[  494.294396] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  494.360809] kworker/u:3     D 0000000000000000     0   554      2 0x00000000
[  494.420739]  ffff88012e4d3af0 0000000000000046 ffff88013200c160 ffff88012e4d3fd8
[  494.484392]  ffff88012e4d3fd8 0000000000012500 ffff8801394ea0b0 ffff88013200c160
[  494.548038]  ffff88012e4d3ae0 00000000000001e3 ffffffff81a249e0 ffff8801321c5398
[  494.611685] Call Trace:
[  494.632649]  [<ffffffff8149dd25>] schedule+0x5a/0x5c
[  494.674687]  [<ffffffff8104b968>] async_synchronize_cookie_domain+0xb6/0x112
[  494.734177]  [<ffffffff810461ff>] ? __init_waitqueue_head+0x50/0x50
[  494.787134]  [<ffffffff8131a224>] ? scsi_remove_target+0x48/0x48
[  494.837900]  [<ffffffff8104b9d9>] async_synchronize_cookie+0x15/0x17
[  494.891567]  [<ffffffff8104ba49>] async_synchronize_full+0x54/0x70  <-- here we wait for async contexts to complete
[  494.943783]  [<ffffffff8104b9f5>] ? async_synchronize_full_domain+0x1a/0x1a
[  495.002547]  [<ffffffffa00114b1>] sd_remove+0x2c/0xa2 [sd_mod]
[  495.051861]  [<ffffffff812fe94f>] __device_release_driver+0x86/0xcf
[  495.104807]  [<ffffffff812fe9bd>] device_release_driver+0x25/0x32  <-- here we take device_lock()

[  853.511341] INFO: task kworker/u:4:549 blocked for more than 120 seconds.
[  853.568693] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  853.635119] kworker/u:4     D ffff88013097b5d0     0   549      2 0x00000000
[  853.695129]  ffff880132773c40 0000000000000046 ffff880130790000 ffff880132773fd8
[  853.758990]  ffff880132773fd8 0000000000012500 ffff88013288a0b0 ffff880130790000
[  853.822796]  0000000000000246 0000000000000040 ffff88013097b5c8 ffff880130790000
[  853.886633] Call Trace:
[  853.907631]  [<ffffffff8149dd25>] schedule+0x5a/0x5c
[  853.949670]  [<ffffffff8149cc44>] __mutex_lock_common+0x220/0x351
[  854.001225]  [<ffffffff81304bd7>] ? device_resume+0x58/0x1c4
[  854.049082]  [<ffffffff81304bd7>] ? device_resume+0x58/0x1c4
[  854.097011]  [<ffffffff8149ce48>] mutex_lock_nested+0x2f/0x36   <-- here we wait for device_lock()
[  854.145591]  [<ffffffff81304bd7>] device_resume+0x58/0x1c4
[  854.192066]  [<ffffffff81304d61>] async_resume+0x1e/0x45
[  854.237019]  [<ffffffff8104bc93>] async_run_entry_fn+0xc6/0x173  <-- ...while running in async context

Provide a 'scsi_sd_probe_domain' so that async probe actions actions can
be flushed without regard for the state of PM, and allow for the resume
path to handle devices that have transitioned from SDEV_QUIESCE to
SDEV_DEL prior to resume.

Acked-by: Alan Stern <stern@rowland.harvard.edu>
[alan: uplevel scsi_sd_probe_domain, clarify scsi_device_resume]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

Changes since v12: http://marc.info/?l=linux-scsi&m=133239705303419&w=2

Just update this one patch with Alan's feedback to move the declaration
scsi_sd_probe_domain into scsi_priv.h.

  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas-eh-reworks-v13

 drivers/scsi/scsi.c      |    6 ++++++
 drivers/scsi/scsi_lib.c  |   10 +++++++---
 drivers/scsi/scsi_pm.c   |    2 +-
 drivers/scsi/scsi_priv.h |    4 ++++
 drivers/scsi/sd.c        |    5 +++--
 5 files changed, 21 insertions(+), 6 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

James Bottomley May 17, 2012, 8:13 a.m. UTC | #1
On Thu, 2012-03-22 at 17:05 -0700, Dan Williams wrote:
> --- a/drivers/scsi/scsi_priv.h
> +++ b/drivers/scsi/scsi_priv.h
> @@ -163,6 +163,10 @@ static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; }
>  static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
>  #endif /* CONFIG_PM_RUNTIME */
>  
> +#if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_BLK_DEV_SD)
> +extern struct list_head scsi_sd_probe_domain;
> +#endif

This #if is unnecessary.  There's no need to conditionally compile
external variable or function declarations.  The only reason we do it
for functions is if we need an else branch with an empty body.

James


--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Williams May 17, 2012, 1:38 p.m. UTC | #2
On Thu, May 17, 2012 at 1:13 AM, James Bottomley
<James.Bottomley@hansenpartnership.com> wrote:
> On Thu, 2012-03-22 at 17:05 -0700, Dan Williams wrote:
>> --- a/drivers/scsi/scsi_priv.h
>> +++ b/drivers/scsi/scsi_priv.h
>> @@ -163,6 +163,10 @@ static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; }
>>  static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
>>  #endif /* CONFIG_PM_RUNTIME */
>>
>> +#if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_BLK_DEV_SD)
>> +extern struct list_head scsi_sd_probe_domain;
>> +#endif
>
> This #if is unnecessary.  There's no need to conditionally compile
> external variable or function declarations.  The only reason we do it
> for functions is if we need an else branch with an empty body.

Yeah, that got removed in a later version:

http://www.spinics.net/lists/linux-ide/msg43429.html

Seeing as you are pulling individual patches from the Wayback machine
let me know which ones need resending.  My current backlog (minus this
one) is:

9f8a41c libsas: cleanup spurious calls to scsi_schedule_eh
06895b3 libata, libsas: introduce sched_eh and end_eh port ops
f41b618 scsi: fix eh wakeup (scsi_schedule_eh vs scsi_restart_operations)
3bd4c2a scsi_transport_sas: fix delete vs scan race
af80d48 libsas: enforce eh strategy handlers only in eh context
6e1d0a0 libsas: add sas_eh_abort_handler
0422f1f libsas: use ->lldd_I_T_nexus_reset for ->eh_bus_reset_handler
b579e49 isci: use sas eh strategy handlers
3cbe97b libsas: trim sas_task of slow path infrastructure
b377ed2 libsas: sas_rediscover_dev did not look at the SMP exec status.
1821ae0 mvsas: remove unused variable in mvs_task_exec()
3a9bfca libata: reset once
ef475b4 libsas: continue revalidation
dd4a159 libata: export ata_port suspend/resume infrastructure for sas
df05813 libsas: drop sata port multiplier infrastructure
178afd2 libsas: suspend / resume support
56dcb7c scsi: cleanup setting task state in scsi_error_handler()
a4df371 isci: improve 'invalid state' warnings
2b28b3a isci: kill ->is_direct_attached
9890839 isci: fix 'link-up' events occur after 'start-complete'
d34647d isci: kill sci_phy_protocol and sci_request_protocol
efb3820 isci: fix interrupt disable
523303d isci: Don't filter BROADCAST CHANGE primitives
47439eb isci: kill isci_host.shost
f6abc58 isci: kill ->status, and ->state_lock in isci_host
71f4553 isci: implement suspend/resume support
13906c3 isci: kill isci_port.domain_dev_list
a80e24c isci: Changes in COMSAS timings enabling ISCI to detect buggy
disc drives.
ed12d25 isci: refactor initialization for S3/S4
68b9d79 isci: enable BCN in sci_port_add_phy()
bc1ddb0 isci: fix controller stop
c2f6304 isci: fix oem parameter validation on single controller skus
32400e3 isci: Manage the link layer hang detect timer for RNC suspensions.
d95c58c isci: Fixed bug in resumption from RNC Tx/Rx suspend state.
657830a isci: Handle all suspending TC completions
c31f05e isci: Terminate outstanding TCs on TX/RX RNC suspensions.
1a69708 isci: Manage device suspensions during TC terminations.
dc473c8 isci: Remote device must be suspended for NCQ cleanup.
4f30fa2 isci: Remote device stop also suspends the RNC and terminates I/O.
2656b8f isci: Escalate to I_T_Nexus_Reset when the device is gone.
741bab2 isci: Redesign device suspension, abort, cleanup.
db7328c isci: Add suspension cases for RNC INVALIDATING, POSTING states.
671db01 isci: Device access in the error path does not depend on IDEV_GONE.
0790396 isci: All pending TCs are terminated when the RNC is invalidated.
0da9c73 isci: Only set IDEV_GONE in the device stop path.
3384bac isci: Remove isci_device reqs_in_process and dev_node from isci_device.
2eec736 isci: Distinguish between remote device suspension cases
51c541d isci: Fix the terminated I/O to not call sas_task_abort().
751a0f6 isci: Save the suspension hint for upcoming suspensions.
398224e isci: Manage the LLHANG timer enable/disable per-device.
eedd2a2 isci: Make sure all TCs are terminated and cleaned in LUN reset.
00854bb isci: Implement waiting for suspend in the abort path.
7eda4e0 isci: When in the abort path, defeat other resume calls until done.
1efce89 isci: Callbacks to libsas occur under scic_lock and are synchronized.
6314cc6 isci: Manage tag releases differently when aborting tasks.
3a194f8 isci: Fix RNC suspend call for SCI_RESUMING state.
723f5f5 isci: Wait for RNC resumption before leaving the abort path.
64c1001 isci: Directly control IREQ_ABORT_PATH_ACTIVE when completing TMFs.
4539df1 isci: Add protocol indicator for TMF requests.
9db7785 isci: Added timeouts to RNC suspensions in the abort path.
f68e45a isci: Change the phy control and link reset interface for HW reasons.
bbdebf0 isci: Don't wait for an RNC suspend if it's being destroyed.
1479980 isci: Restore the ATAPI device RNC management code.
244d5955 isci: Check IDEV_GONE before performing abort path operations.
9728c87 isci: Remove obviated host callback list.
de98479 isci: Manage the IREQ_NO_AUTO_FREE_TAG under scic_lock.
74dbb19 isci: Fix RNC AWAIT_SUSPENSION->INVALIDATING transition.
1d43a58 isci: Fixed RNC bug that lost the suspension or resumption
during destroy
4fe7411 isci: End the RNC resumption wait when the RNC is destroyed.

These are contained in the following git branches.

git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas/next
git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git isci/next
git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git isci/rnc-rework

--
Dan
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
James Bottomley May 17, 2012, 2:14 p.m. UTC | #3
On Thu, 2012-05-17 at 06:38 -0700, Dan Williams wrote:
> On Thu, May 17, 2012 at 1:13 AM, James Bottomley
> <James.Bottomley@hansenpartnership.com> wrote:
> > On Thu, 2012-03-22 at 17:05 -0700, Dan Williams wrote:
> >> --- a/drivers/scsi/scsi_priv.h
> >> +++ b/drivers/scsi/scsi_priv.h
> >> @@ -163,6 +163,10 @@ static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; }
> >>  static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
> >>  #endif /* CONFIG_PM_RUNTIME */
> >>
> >> +#if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_BLK_DEV_SD)
> >> +extern struct list_head scsi_sd_probe_domain;
> >> +#endif
> >
> > This #if is unnecessary.  There's no need to conditionally compile
> > external variable or function declarations.  The only reason we do it
> > for functions is if we need an else branch with an empty body.
> 
> Yeah, that got removed in a later version:
> 
> http://www.spinics.net/lists/linux-ide/msg43429.html
> 
> Seeing as you are pulling individual patches from the Wayback machine
> let me know which ones need resending.  My current backlog (minus this
> one) is:
> 
> 9f8a41c libsas: cleanup spurious calls to scsi_schedule_eh
> 06895b3 libata, libsas: introduce sched_eh and end_eh port ops
> f41b618 scsi: fix eh wakeup (scsi_schedule_eh vs scsi_restart_operations)
> 3bd4c2a scsi_transport_sas: fix delete vs scan race
> af80d48 libsas: enforce eh strategy handlers only in eh context
> 6e1d0a0 libsas: add sas_eh_abort_handler
> 0422f1f libsas: use ->lldd_I_T_nexus_reset for ->eh_bus_reset_handler
> b579e49 isci: use sas eh strategy handlers
> 3cbe97b libsas: trim sas_task of slow path infrastructure
> b377ed2 libsas: sas_rediscover_dev did not look at the SMP exec status.
> 1821ae0 mvsas: remove unused variable in mvs_task_exec()
> 3a9bfca libata: reset once
> ef475b4 libsas: continue revalidation
> dd4a159 libata: export ata_port suspend/resume infrastructure for sas
> df05813 libsas: drop sata port multiplier infrastructure
> 178afd2 libsas: suspend / resume support
> 56dcb7c scsi: cleanup setting task state in scsi_error_handler()
> a4df371 isci: improve 'invalid state' warnings
> 2b28b3a isci: kill ->is_direct_attached
> 9890839 isci: fix 'link-up' events occur after 'start-complete'
> d34647d isci: kill sci_phy_protocol and sci_request_protocol
> efb3820 isci: fix interrupt disable
> 523303d isci: Don't filter BROADCAST CHANGE primitives
> 47439eb isci: kill isci_host.shost
> f6abc58 isci: kill ->status, and ->state_lock in isci_host
> 71f4553 isci: implement suspend/resume support
> 13906c3 isci: kill isci_port.domain_dev_list
> a80e24c isci: Changes in COMSAS timings enabling ISCI to detect buggy
> disc drives.
> ed12d25 isci: refactor initialization for S3/S4
> 68b9d79 isci: enable BCN in sci_port_add_phy()
> bc1ddb0 isci: fix controller stop
> c2f6304 isci: fix oem parameter validation on single controller skus
> 32400e3 isci: Manage the link layer hang detect timer for RNC suspensions.
> d95c58c isci: Fixed bug in resumption from RNC Tx/Rx suspend state.
> 657830a isci: Handle all suspending TC completions
> c31f05e isci: Terminate outstanding TCs on TX/RX RNC suspensions.
> 1a69708 isci: Manage device suspensions during TC terminations.
> dc473c8 isci: Remote device must be suspended for NCQ cleanup.
> 4f30fa2 isci: Remote device stop also suspends the RNC and terminates I/O.
> 2656b8f isci: Escalate to I_T_Nexus_Reset when the device is gone.
> 741bab2 isci: Redesign device suspension, abort, cleanup.
> db7328c isci: Add suspension cases for RNC INVALIDATING, POSTING states.
> 671db01 isci: Device access in the error path does not depend on IDEV_GONE.
> 0790396 isci: All pending TCs are terminated when the RNC is invalidated.
> 0da9c73 isci: Only set IDEV_GONE in the device stop path.
> 3384bac isci: Remove isci_device reqs_in_process and dev_node from isci_device.
> 2eec736 isci: Distinguish between remote device suspension cases
> 51c541d isci: Fix the terminated I/O to not call sas_task_abort().
> 751a0f6 isci: Save the suspension hint for upcoming suspensions.
> 398224e isci: Manage the LLHANG timer enable/disable per-device.
> eedd2a2 isci: Make sure all TCs are terminated and cleaned in LUN reset.
> 00854bb isci: Implement waiting for suspend in the abort path.
> 7eda4e0 isci: When in the abort path, defeat other resume calls until done.
> 1efce89 isci: Callbacks to libsas occur under scic_lock and are synchronized.
> 6314cc6 isci: Manage tag releases differently when aborting tasks.
> 3a194f8 isci: Fix RNC suspend call for SCI_RESUMING state.
> 723f5f5 isci: Wait for RNC resumption before leaving the abort path.
> 64c1001 isci: Directly control IREQ_ABORT_PATH_ACTIVE when completing TMFs.
> 4539df1 isci: Add protocol indicator for TMF requests.
> 9db7785 isci: Added timeouts to RNC suspensions in the abort path.
> f68e45a isci: Change the phy control and link reset interface for HW reasons.
> bbdebf0 isci: Don't wait for an RNC suspend if it's being destroyed.
> 1479980 isci: Restore the ATAPI device RNC management code.
> 244d5955 isci: Check IDEV_GONE before performing abort path operations.
> 9728c87 isci: Remove obviated host callback list.
> de98479 isci: Manage the IREQ_NO_AUTO_FREE_TAG under scic_lock.
> 74dbb19 isci: Fix RNC AWAIT_SUSPENSION->INVALIDATING transition.
> 1d43a58 isci: Fixed RNC bug that lost the suspension or resumption
> during destroy
> 4fe7411 isci: End the RNC resumption wait when the RNC is destroyed.

> These are contained in the following git branches.
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git libsas/next
> git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git isci/next
> git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git isci/rnc-rework

Lets try a pull request on the isc branch at least ... can you send me
one with a signed tag?

Thanks,

James



--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dan Williams May 17, 2012, 10:15 p.m. UTC | #4
On Thu, May 17, 2012 at 7:14 AM, James Bottomley
<James.Bottomley@hansenpartnership.com> wrote:
> Lets try a pull request on the isc branch at least ... can you send me
> one with a signed tag?

Can do.

To make the merge cleaner I will rebase the isci tree to have zero
dependencies on the pending libsas update (which adds suspend/resume
support).  Then after you have a chance to merge the libsas update [1]
I'll send a final patch to enable suspend/resume for isci.

--
Dan

[1]: http://marc.info/?l=linux-scsi&m=133632746229553&w=2
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 07322ec..61c82a3 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -90,6 +90,12 @@  unsigned int scsi_logging_level;
 EXPORT_SYMBOL(scsi_logging_level);
 #endif
 
+#if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_BLK_DEV_SD)
+/* sd and scsi_pm need to coordinate flushing async actions */
+LIST_HEAD(scsi_sd_probe_domain);
+EXPORT_SYMBOL(scsi_sd_probe_domain);
+#endif
+
 /* NB: These are exposed through /proc/scsi/scsi and form part of the ABI.
  * You may not alter any existing entry (although adding new ones is
  * encouraged once assigned by ANSI/INCITS T10
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b4833de..992ff63 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2348,10 +2348,14 @@  EXPORT_SYMBOL(scsi_device_quiesce);
  *
  *	Must be called with user context, may sleep.
  */
-void
-scsi_device_resume(struct scsi_device *sdev)
+void scsi_device_resume(struct scsi_device *sdev)
 {
-	if(scsi_device_set_state(sdev, SDEV_RUNNING))
+	/* check if the device state was mutated prior to resume, and if
+	 * so assume the state is being managed elsewhere (for example
+	 * device deleted during suspend)
+	 */
+	if (sdev->sdev_state != SDEV_QUIESCE ||
+	    scsi_device_set_state(sdev, SDEV_RUNNING))
 		return;
 	scsi_run_queue(sdev->request_queue);
 }
diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c
index c467064..f661a41 100644
--- a/drivers/scsi/scsi_pm.c
+++ b/drivers/scsi/scsi_pm.c
@@ -97,7 +97,7 @@  static int scsi_bus_prepare(struct device *dev)
 {
 	if (scsi_is_sdev_device(dev)) {
 		/* sd probing uses async_schedule.  Wait until it finishes. */
-		async_synchronize_full();
+		async_synchronize_full_domain(&scsi_sd_probe_domain);
 
 	} else if (scsi_is_host_device(dev)) {
 		/* Wait until async scanning is finished */
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index be4fa6d..bec061d 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -163,6 +163,10 @@  static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; }
 static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
 #endif /* CONFIG_PM_RUNTIME */
 
+#if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_BLK_DEV_SD)
+extern struct list_head scsi_sd_probe_domain;
+#endif
+
 /* 
  * internal scsi timeout functions: for use by mid-layer and transport
  * classes.
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index bd17cf8..19e27d8 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -65,6 +65,7 @@ 
 #include <scsi/scsicam.h>
 
 #include "sd.h"
+#include "scsi_priv.h"
 #include "scsi_logging.h"
 
 MODULE_AUTHOR("Eric Youngdale");
@@ -2717,7 +2718,7 @@  static int sd_probe(struct device *dev)
 	dev_set_drvdata(dev, sdkp);
 
 	get_device(&sdkp->dev);	/* prevent release before async_schedule */
-	async_schedule(sd_probe_async, sdkp);
+	async_schedule_domain(sd_probe_async, sdkp, &scsi_sd_probe_domain);
 
 	return 0;
 
@@ -2751,7 +2752,7 @@  static int sd_remove(struct device *dev)
 	sdkp = dev_get_drvdata(dev);
 	scsi_autopm_get_device(sdkp->device);
 
-	async_synchronize_full();
+	async_synchronize_full_domain(&scsi_sd_probe_domain);
 	blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn);
 	blk_queue_unprep_rq(sdkp->device->request_queue, NULL);
 	device_del(&sdkp->dev);