Message ID | 20240409143748.980206-5-hch@lst.de |
---|---|
State | New |
Headers | show |
Series | [01/23] block: add a helper to cancel atomic queue limit updates | expand |
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Hi, On Tue, Apr 09, 2024 at 04:37:29PM +0200, Christoph Hellwig wrote: > Turn __scsi_init_queue into scsi_init_limits which initializes > queue_limits structure that can be passed to blk_mq_alloc_queue. > With this patch in linux mainline, I see ata2: found unknown device (class 0) ata2.00: ATAPI: QEMU DVD-ROM, 2.5+, max UDMA/100 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 27 at block/blk-settings.c:202 blk_validate_limits+0x28c/0x304 Modules linked in: CPU: 0 PID: 27 Comm: kworker/u4:2 Not tainted 6.9.0-05151-g1b294a1f3561 #1 Hardware name: PowerMac3,1 PPC970FX 0x3c0301 PowerMac Workqueue: async async_run_entry_fn NIP: c0000000007ccec8 LR: c0000000007c805c CTR: 0000000000000000 REGS: c000000006def690 TRAP: 0700 Not tainted (6.9.0-05151-g1b294a1f3561) MSR: 8000000000028032 <SF,EE,IR,DR,RI> CR: 84004228 XER: 20000000 IRQMASK: 0 GPR00: c0000000007c8040 c000000006def930 c00000000159f900 c000000006defac8 GPR04: c00000000146e788 0000000000000000 0000000000000000 0000000000000100 GPR08: 0000000000000200 000000000000ff00 0000000000000000 0000000000004000 GPR12: 000000000fa82000 c000000003330000 c000000000116508 c0000000060c5c40 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000088 GPR20: 0000000000000000 c0000000026f2f40 c0000000025eeff0 0000000000000000 GPR24: c000000006defc80 c0000000031cb3a0 c000000002571c80 c000000006defac8 GPR28: c0000000033052e0 ffffffffffffffff 0000000000000010 c000000008f13df0 NIP [c0000000007ccec8] blk_validate_limits+0x28c/0x304 LR [c0000000007c805c] blk_alloc_queue+0xbc/0x344 Call Trace: [c000000006def930] [c0000000007c8040] blk_alloc_queue+0xa0/0x344 (unreliable) [c000000006def990] [c0000000007e2658] blk_mq_alloc_queue+0x60/0xf4 [c000000006defa60] [c000000000a7a260] scsi_alloc_sdev+0x280/0x464 [c000000006defb90] [c000000000a7a6b4] scsi_probe_and_add_lun+0x270/0x388 [c000000006defc60] [c000000000a7b070] __scsi_add_device+0x168/0x1b4 [c000000006defcc0] [c000000000b08fe0] ata_scsi_scan_host+0x294/0x39c [c000000006defd80] [c000000000af7704] async_port_probe+0x6c/0x98 [c000000006defdb0] [c000000000120028] async_run_entry_fn+0x50/0x13c [c000000006defe00] [c00000000010821c] process_one_work+0x2c0/0x828 [c000000006deff00] [c000000000109090] worker_thread+0x224/0x474 [c000000006deff90] [c000000000116658] kthread+0x158/0x17c followed by scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured usb 1-1: new full-speed USB device number 2 using ohci-pci scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured input: QEMU QEMU USB Keyboard as /devices/pci0000:f0/0000:f0:0d.0/usb1/1-1/1-1:1.0/0003:0627:0001.0001/input/input0 scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured ata2: WARNING: synchronous SCSI scan failed without making any progress, switching to async and ultimately a boot hang. This is with the mac99 emulation in qemu. The warning is always seen, the boot hang is seen when trying to boot from ide/ata drive. Bisect log is attached. Guenter --- # bad: [1b294a1f35616977caddaddf3e9d28e576a1adbc] Merge tag 'net-next-6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next # good: [a5131c3fdf2608f1c15f3809e201cf540eb28489] Merge tag 'x86-shstk-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip git bisect start 'HEAD' 'a5131c3fdf26' # good: [f8beae078c82abde57fed4a5be0bbc3579b59ad0] Merge tag 'gtp-24-05-07' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/gtp Pablo neira Ayuso says: git bisect good f8beae078c82abde57fed4a5be0bbc3579b59ad0 # good: [ce952d8f0e9b58dc6a2bde7e47ca7fa7925583cc] Merge tag 'gpio-updates-for-v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux git bisect good ce952d8f0e9b58dc6a2bde7e47ca7fa7925583cc # bad: [113d1dd9c8ea2186d56a641a787e2588673c9c32] Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi git bisect bad 113d1dd9c8ea2186d56a641a787e2588673c9c32 # good: [a3d1f54d7aa4c3be2c6a10768d4ffa1dcb620da9] Merge tag 'for-6.10-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux git bisect good a3d1f54d7aa4c3be2c6a10768d4ffa1dcb620da9 # bad: [f92141e18c8b466027e226f3388de15b059b6f65] Merge patch series "convert SCSI to atomic queue limits, part 1 (v3)" git bisect bad f92141e18c8b466027e226f3388de15b059b6f65 # good: [0e0a4da35284c85225e3b128912582ebc73256c8] Merge patch series "scsi: ufs: Remove overzealous memory barriers" git bisect good 0e0a4da35284c85225e3b128912582ebc73256c8 # bad: [a25a9c85d17fd2f19bd5a2bb25b8361d72336bc7] scsi: libata: Switch to using ->device_configure git bisect bad a25a9c85d17fd2f19bd5a2bb25b8361d72336bc7 # bad: [6248d7f7714f018f2c02f356582784e74596f8e8] scsi: core: Add a no_highmem flag to struct Scsi_Host git bisect bad 6248d7f7714f018f2c02f356582784e74596f8e8 # good: [33507b3964f136ea1592718cb81885c8f9354f65] scsi: ufs: qcom: Add sanity checks for gear/lane values during ICC scaling git bisect good 33507b3964f136ea1592718cb81885c8f9354f65 # good: [4373d2ecca7fa7ad04aa9c371c80049bafec2610] scsi: bsg: Pass queue_limits to bsg_setup_queue() git bisect good 4373d2ecca7fa7ad04aa9c371c80049bafec2610 # bad: [afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94] scsi: core: Initialize scsi midlayer limits before allocating the queue git bisect bad afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94 # good: [9042fb6d2c085eccdf11069b04754dac807c36ea] scsi: mpi3mr: Pass queue_limits to bsg_setup_queue() git bisect good 9042fb6d2c085eccdf11069b04754dac807c36ea # first bad commit: [afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94] scsi: core: Initialize scsi midlayer limits before allocating the queue
On 15/05/2024 15:50, Guenter Roeck wrote: > Hi, > > On Tue, Apr 09, 2024 at 04:37:29PM +0200, Christoph Hellwig wrote: >> Turn __scsi_init_queue into scsi_init_limits which initializes >> queue_limits structure that can be passed to blk_mq_alloc_queue. The previous behavior would sanitize max_segment_size < PAGE_SIZE, so I suppose you could try: --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -199,6 +199,8 @@ static int blk_validate_limits(struct queue_limits *lim) */ if (!lim->max_segment_size) lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; + else if (lim->max_segment_size < PAGE_SIZE) + lim->max_segment_size = PAGE_SIZE; if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) return -EINVAL; } I guess that this following change could also be made since we fix-up a zero value for lim->max_segment_size, above: --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -474,10 +474,7 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS; - if (sht->max_segment_size) - shost->max_segment_size = sht->max_segment_size; - else - shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + shost->max_segment_size = sht->max_segment_size; >> > > With this patch in linux mainline, I see > > ata2: found unknown device (class 0) > ata2.00: ATAPI: QEMU DVD-ROM, 2.5+, max UDMA/100 > ------------[ cut here ]------------ > WARNING: CPU: 0 PID: 27 at block/blk-settings.c:202 blk_validate_limits+0x28c/0x304 > Modules linked in: > CPU: 0 PID: 27 Comm: kworker/u4:2 Not tainted 6.9.0-05151-g1b294a1f3561 #1 > Hardware name: PowerMac3,1 PPC970FX 0x3c0301 PowerMac > Workqueue: async async_run_entry_fn > NIP: c0000000007ccec8 LR: c0000000007c805c CTR: 0000000000000000 > REGS: c000000006def690 TRAP: 0700 Not tainted (6.9.0-05151-g1b294a1f3561) > MSR: 8000000000028032 <SF,EE,IR,DR,RI> CR: 84004228 XER: 20000000 > IRQMASK: 0 > GPR00: c0000000007c8040 c000000006def930 c00000000159f900 c000000006defac8 > GPR04: c00000000146e788 0000000000000000 0000000000000000 0000000000000100 > GPR08: 0000000000000200 000000000000ff00 0000000000000000 0000000000004000 > GPR12: 000000000fa82000 c000000003330000 c000000000116508 c0000000060c5c40 > GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000088 > GPR20: 0000000000000000 c0000000026f2f40 c0000000025eeff0 0000000000000000 > GPR24: c000000006defc80 c0000000031cb3a0 c000000002571c80 c000000006defac8 > GPR28: c0000000033052e0 ffffffffffffffff 0000000000000010 c000000008f13df0 > NIP [c0000000007ccec8] blk_validate_limits+0x28c/0x304 > LR [c0000000007c805c] blk_alloc_queue+0xbc/0x344 > Call Trace: > [c000000006def930] [c0000000007c8040] blk_alloc_queue+0xa0/0x344 (unreliable) > [c000000006def990] [c0000000007e2658] blk_mq_alloc_queue+0x60/0xf4 > [c000000006defa60] [c000000000a7a260] scsi_alloc_sdev+0x280/0x464 > [c000000006defb90] [c000000000a7a6b4] scsi_probe_and_add_lun+0x270/0x388 > [c000000006defc60] [c000000000a7b070] __scsi_add_device+0x168/0x1b4 > [c000000006defcc0] [c000000000b08fe0] ata_scsi_scan_host+0x294/0x39c > [c000000006defd80] [c000000000af7704] async_port_probe+0x6c/0x98 > [c000000006defdb0] [c000000000120028] async_run_entry_fn+0x50/0x13c > [c000000006defe00] [c00000000010821c] process_one_work+0x2c0/0x828 > [c000000006deff00] [c000000000109090] worker_thread+0x224/0x474 > [c000000006deff90] [c000000000116658] kthread+0x158/0x17c > > followed by > > scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured > usb 1-1: new full-speed USB device number 2 using ohci-pci > scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured > scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured > scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured > input: QEMU QEMU USB Keyboard as /devices/pci0000:f0/0000:f0:0d.0/usb1/1-1/1-1:1.0/0003:0627:0001.0001/input/input0 > scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured > ata2: WARNING: synchronous SCSI scan failed without making any progress, switching to async > > and ultimately a boot hang. This is with the mac99 emulation in qemu. > The warning is always seen, the boot hang is seen when trying to boot > from ide/ata drive. Bisect log is attached. > > Guenter > > --- > # bad: [1b294a1f35616977caddaddf3e9d28e576a1adbc] Merge tag 'net-next-6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next > # good: [a5131c3fdf2608f1c15f3809e201cf540eb28489] Merge tag 'x86-shstk-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip > git bisect start 'HEAD' 'a5131c3fdf26' > # good: [f8beae078c82abde57fed4a5be0bbc3579b59ad0] Merge tag 'gtp-24-05-07' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/gtp Pablo neira Ayuso says: > git bisect good f8beae078c82abde57fed4a5be0bbc3579b59ad0 > # good: [ce952d8f0e9b58dc6a2bde7e47ca7fa7925583cc] Merge tag 'gpio-updates-for-v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux > git bisect good ce952d8f0e9b58dc6a2bde7e47ca7fa7925583cc > # bad: [113d1dd9c8ea2186d56a641a787e2588673c9c32] Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi > git bisect bad 113d1dd9c8ea2186d56a641a787e2588673c9c32 > # good: [a3d1f54d7aa4c3be2c6a10768d4ffa1dcb620da9] Merge tag 'for-6.10-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux > git bisect good a3d1f54d7aa4c3be2c6a10768d4ffa1dcb620da9 > # bad: [f92141e18c8b466027e226f3388de15b059b6f65] Merge patch series "convert SCSI to atomic queue limits, part 1 (v3)" > git bisect bad f92141e18c8b466027e226f3388de15b059b6f65 > # good: [0e0a4da35284c85225e3b128912582ebc73256c8] Merge patch series "scsi: ufs: Remove overzealous memory barriers" > git bisect good 0e0a4da35284c85225e3b128912582ebc73256c8 > # bad: [a25a9c85d17fd2f19bd5a2bb25b8361d72336bc7] scsi: libata: Switch to using ->device_configure > git bisect bad a25a9c85d17fd2f19bd5a2bb25b8361d72336bc7 > # bad: [6248d7f7714f018f2c02f356582784e74596f8e8] scsi: core: Add a no_highmem flag to struct Scsi_Host > git bisect bad 6248d7f7714f018f2c02f356582784e74596f8e8 > # good: [33507b3964f136ea1592718cb81885c8f9354f65] scsi: ufs: qcom: Add sanity checks for gear/lane values during ICC scaling > git bisect good 33507b3964f136ea1592718cb81885c8f9354f65 > # good: [4373d2ecca7fa7ad04aa9c371c80049bafec2610] scsi: bsg: Pass queue_limits to bsg_setup_queue() > git bisect good 4373d2ecca7fa7ad04aa9c371c80049bafec2610 > # bad: [afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94] scsi: core: Initialize scsi midlayer limits before allocating the queue > git bisect bad afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94 > # good: [9042fb6d2c085eccdf11069b04754dac807c36ea] scsi: mpi3mr: Pass queue_limits to bsg_setup_queue() > git bisect good 9042fb6d2c085eccdf11069b04754dac807c36ea > # first bad commit: [afd53a3d852808bfeb5bc3ae3cd1caa9389bcc94] scsi: core: Initialize scsi midlayer limits before allocating the queue
On 5/15/24 15:16, John Garry wrote: > On 15/05/2024 15:50, Guenter Roeck wrote: >> Hi, >> >> On Tue, Apr 09, 2024 at 04:37:29PM +0200, Christoph Hellwig wrote: >>> Turn __scsi_init_queue into scsi_init_limits which initializes >>> queue_limits structure that can be passed to blk_mq_alloc_queue. > > The previous behavior would sanitize max_segment_size < PAGE_SIZE, so I suppose you could try: > > --- a/block/blk-settings.c > +++ b/block/blk-settings.c > @@ -199,6 +199,8 @@ static int blk_validate_limits(struct queue_limits *lim) > */ > if (!lim->max_segment_size) > lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; > + else if (lim->max_segment_size < PAGE_SIZE) > + lim->max_segment_size = PAGE_SIZE; > if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) > return -EINVAL; > } > With some debugging: pata_macio_common_init() Calling ata_host_activate() with limit 65280 ... max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is 65536 WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 blk_validate_limits+0x2d4/0x364 ... This is with PPC_BOOK3S_64 which selects a default page size of 64k. Looking at the old code, I think it did what you suggested above, void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) { if (max_size < PAGE_SIZE) { max_size = PAGE_SIZE; printk(KERN_INFO "%s: set to minimum %d\n", __func__, max_size); } ... but assuming that the driver requested a lower limit on purpose that may not be the best solution. Never mind, though - I updated my test configuration to explicitly configure the page size to 4k to work around the problem. With that, please consider this report a note in case someone hits the problem on a real system (and sorry for the noise). Thanks, Guenter
On 15/05/2024 17:52, Guenter Roeck wrote: > max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is > 65536 > WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 > blk_validate_limits+0x2d4/0x364 > ... > > This is with PPC_BOOK3S_64 which selects a default page size of 64k. > Looking at the old code, I think it did what you suggested above, > > void blk_queue_max_segment_size(struct request_queue *q, unsigned int > max_size) > { > if (max_size < PAGE_SIZE) { > max_size = PAGE_SIZE; > printk(KERN_INFO "%s: set to minimum %d\n", > __func__, max_size); > } > ... > > but assuming that the driver requested a lower limit on purpose that > may not be the best solution. Right, it is relied on that PAGE_SIZE can fit into a segment. > > Never mind, though - I updated my test configuration to explicitly > configure the page size to 4k to work around the problem. With that, > please consider this report a note in case someone hits the problem > on a real system (and sorry for the noise). Other controllers do have a 4K segment limit and are broken on systems with 16/64K PAGE_SIZE, like: https://lore.kernel.org/linux-block/20230612203314.17820-1-bvanassche@acm.org/ Thanks, John
On 5/16/24 06:08, John Garry wrote: > On 15/05/2024 17:52, Guenter Roeck wrote: >> max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is 65536 >> WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 blk_validate_limits+0x2d4/0x364 >> ... >> >> This is with PPC_BOOK3S_64 which selects a default page size of 64k. >> Looking at the old code, I think it did what you suggested above, >> >> void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) >> { >> if (max_size < PAGE_SIZE) { >> max_size = PAGE_SIZE; >> printk(KERN_INFO "%s: set to minimum %d\n", >> __func__, max_size); >> } >> ... >> >> but assuming that the driver requested a lower limit on purpose that >> may not be the best solution. > > Right, it is relied on that PAGE_SIZE can fit into a segment. > >> >> Never mind, though - I updated my test configuration to explicitly >> configure the page size to 4k to work around the problem. With that, >> please consider this report a note in case someone hits the problem >> on a real system (and sorry for the noise). > > Other controllers do have a 4K segment limit and are broken on systems with 16/64K PAGE_SIZE, like: > > https://lore.kernel.org/linux-block/20230612203314.17820-1-bvanassche@acm.org/ > Understood, only it isn't just 4k segment limit with 16/64k page size, but more generally any system with segment limit < PAGE_SIZE. It is a bit sad that support for affected configurations is [now ?] broken because above patch series was rejected (and, no, that has nothing to do with me working for the same company as the submitter of that patch series - that is me testing the upstream kernel with qemu). Given that various controllers are affected by that problem, would it be acceptable to submit patches such as the following to avoid runtime failures ? diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 817838e2f70e..6adf9105b5fb 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -1380,6 +1380,8 @@ static int __init pata_macio_init(void) { int rc; + BUILD_BUG_ON(MAX_DBDMA_SEG < PAGE_SIZE); + if (!machine_is(powermac)) return -ENODEV; or, alternatively, diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index b595494ab9b4..d7bd64702109 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -789,6 +789,7 @@ config PATA_JMICRON config PATA_MACIO tristate "Apple PowerMac/PowerBook internal 'MacIO' IDE" depends on PPC_PMAC + depends on PAGE_SIZE_LESS_THAN_64KB help Most IDE capable PowerMacs have IDE busses driven by a variant of this controller which is part of the Apple chipset used on Thanks, Guenter
Adding ben and the linuxppc list. Context: pata_macio initialization now fails as we enforce that the segment size is set properly. On Wed, May 15, 2024 at 04:52:29PM -0700, Guenter Roeck wrote: > pata_macio_common_init() Calling ata_host_activate() with limit 65280 > ... > max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is 65536 > WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 blk_validate_limits+0x2d4/0x364 > ... > > This is with PPC_BOOK3S_64 which selects a default page size of 64k. Yeah. Did you actually manage to use pata macio previously? Or is it just used because it's part of the pmac default config? > Looking at the old code, I think it did what you suggested above, > but assuming that the driver requested a lower limit on purpose that > may not be the best solution. > Never mind, though - I updated my test configuration to explicitly > configure the page size to 4k to work around the problem. With that, > please consider this report a note in case someone hits the problem > on a real system (and sorry for the noise). Yes, the idea behind this change was to catch such errors. So far most errors have been drivers setting lower limits than what the hardware can actually handle, but I'd love to track this down. If the hardware can't actually handle the lower limit we should probably just fail the probe gracefully with a well comment if statement instead.
[CCing the regression list, as it should be in the loop for regressions: https://docs.kernel.org/admin-guide/reporting-regressions.html] On 20.05.24 17:15, Christoph Hellwig wrote: > Adding ben and the linuxppc list. Hmm, no reply and no other progress to get this resolved afaics. So lets bring Michael into the mix, he might be able to help out. BTW TWIMC: a PowerMac G5 user user reported similar symptoms here recently: https://bugzilla.kernel.org/show_bug.cgi?id=218858 Ciao, Thorsten > Context: pata_macio initialization now fails as we enforce that the > segment size is set properly. > > On Wed, May 15, 2024 at 04:52:29PM -0700, Guenter Roeck wrote: >> pata_macio_common_init() Calling ata_host_activate() with limit 65280 >> ... >> max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is 65536 >> WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 blk_validate_limits+0x2d4/0x364 >> ... >> >> This is with PPC_BOOK3S_64 which selects a default page size of 64k. > > Yeah. Did you actually manage to use pata macio previously? Or is > it just used because it's part of the pmac default config? > >> Looking at the old code, I think it did what you suggested above, > >> but assuming that the driver requested a lower limit on purpose that >> may not be the best solution. > >> Never mind, though - I updated my test configuration to explicitly >> configure the page size to 4k to work around the problem. With that, >> please consider this report a note in case someone hits the problem >> on a real system (and sorry for the noise). > > Yes, the idea behind this change was to catch such errors. So far > most errors have been drivers setting lower limits than what the > hardware can actually handle, but I'd love to track this down. > > If the hardware can't actually handle the lower limit we should > probably just fail the probe gracefully with a well comment if > statement instead.
On 29.05.24 16:36, Linux regression tracking (Thorsten Leemhuis) wrote: > [CCing the regression list, as it should be in the loop for regressions: > https://docs.kernel.org/admin-guide/reporting-regressions.html] > > On 20.05.24 17:15, Christoph Hellwig wrote: >> Adding ben and the linuxppc list. > > Hmm, no reply and no other progress to get this resolved afaics. So lets > bring Michael into the mix, he might be able to help out. > > BTW TWIMC: a PowerMac G5 user user reported similar symptoms here > recently: https://bugzilla.kernel.org/show_bug.cgi?id=218858 And yet another report with similar symptoms, this time with a "PowerMac7,2 PPC970 0x390202 PowerMac": https://bugzilla.kernel.org/show_bug.cgi?id=218905 Ciao, Thorsten >> Context: pata_macio initialization now fails as we enforce that the >> segment size is set properly. >> >> On Wed, May 15, 2024 at 04:52:29PM -0700, Guenter Roeck wrote: >>> pata_macio_common_init() Calling ata_host_activate() with limit 65280 >>> ... >>> max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is 65536 >>> WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 blk_validate_limits+0x2d4/0x364 >>> ... >>> >>> This is with PPC_BOOK3S_64 which selects a default page size of 64k. >> >> Yeah. Did you actually manage to use pata macio previously? Or is >> it just used because it's part of the pmac default config? >> >>> Looking at the old code, I think it did what you suggested above, >> >>> but assuming that the driver requested a lower limit on purpose that >>> may not be the best solution. >> >>> Never mind, though - I updated my test configuration to explicitly >>> configure the page size to 4k to work around the problem. With that, >>> please consider this report a note in case someone hits the problem >>> on a real system (and sorry for the noise). >> >> Yes, the idea behind this change was to catch such errors. So far >> most errors have been drivers setting lower limits than what the >> hardware can actually handle, but I'd love to track this down. >> >> If the hardware can't actually handle the lower limit we should >> probably just fail the probe gracefully with a well comment if >> statement instead.
"Linux regression tracking (Thorsten Leemhuis)" <regressions@leemhuis.info> writes: > [CCing the regression list, as it should be in the loop for regressions: > https://docs.kernel.org/admin-guide/reporting-regressions.html] > > On 20.05.24 17:15, Christoph Hellwig wrote: >> Adding ben and the linuxppc list. > > Hmm, no reply and no other progress to get this resolved afaics. So lets > bring Michael into the mix, he might be able to help out. Sorry I didn't see the original forward for some reason. I haven't seen this on my G5, but it's hard drive is on SATA. I think the CDROM is pata_macio, but there isn't a disk in the drive to test with. > BTW TWIMC: a PowerMac G5 user user reported similar symptoms here > recently: https://bugzilla.kernel.org/show_bug.cgi?id=218858 AFAICS that report is from a 4K page size kernel (Page orders: ... virtual = 12), so there must be something else going on? I've asked the reporter to confirm the page size. cheers >> Context: pata_macio initialization now fails as we enforce that the >> segment size is set properly. >> >> On Wed, May 15, 2024 at 04:52:29PM -0700, Guenter Roeck wrote: >>> pata_macio_common_init() Calling ata_host_activate() with limit 65280 >>> ... >>> max_segment_size is 65280; PAGE_SIZE is 65536; BLK_MAX_SEGMENT_SIZE is 65536 >>> WARNING: CPU: 0 PID: 12 at block/blk-settings.c:202 blk_validate_limits+0x2d4/0x364 >>> ... >>> >>> This is with PPC_BOOK3S_64 which selects a default page size of 64k. >> >> Yeah. Did you actually manage to use pata macio previously? Or is >> it just used because it's part of the pmac default config? >> >>> Looking at the old code, I think it did what you suggested above, >> >>> but assuming that the driver requested a lower limit on purpose that >>> may not be the best solution. >> >>> Never mind, though - I updated my test configuration to explicitly >>> configure the page size to 4k to work around the problem. With that, >>> please consider this report a note in case someone hits the problem >>> on a real system (and sorry for the noise). >> >> Yes, the idea behind this change was to catch such errors. So far >> most errors have been drivers setting lower limits than what the >> hardware can actually handle, but I'd love to track this down. >> >> If the hardware can't actually handle the lower limit we should >> probably just fail the probe gracefully with a well comment if >> statement instead.
Michael Ellerman <mpe@ellerman.id.au> writes: > "Linux regression tracking (Thorsten Leemhuis)" <regressions@leemhuis.info> writes: >> [CCing the regression list, as it should be in the loop for regressions: >> https://docs.kernel.org/admin-guide/reporting-regressions.html] >> >> On 20.05.24 17:15, Christoph Hellwig wrote: >>> Adding ben and the linuxppc list. >> >> Hmm, no reply and no other progress to get this resolved afaics. So lets >> bring Michael into the mix, he might be able to help out. > > Sorry I didn't see the original forward for some reason. > > I haven't seen this on my G5, but it's hard drive is on SATA. I think > the CDROM is pata_macio, but there isn't a disk in the drive to test > with. > >> BTW TWIMC: a PowerMac G5 user user reported similar symptoms here >> recently: https://bugzilla.kernel.org/show_bug.cgi?id=218858 > > AFAICS that report is from a 4K page size kernel (Page orders: ... > virtual = 12), so there must be something else going on? No that's wrong. The actual hardware page size is 4K, but CONFIG_PAGE_SIZE and PAGE_SHIFT etc. is 64K. So at least for this user the driver used to work with 64K pages, and now doesn't. cheers
On Fri, May 31, 2024 at 12:28:21AM +1000, Michael Ellerman wrote: > No that's wrong. The actual hardware page size is 4K, but > CONFIG_PAGE_SIZE and PAGE_SHIFT etc. is 64K. > > So at least for this user the driver used to work with 64K pages, and > now doesn't. Which suggested that the communicated max_hw_sectors is wrong, and previously we were saved by the block layer increasing it to PAGE_SIZE after a warning. Should we just increment it to 64k?
Christoph Hellwig <hch@lst.de> writes: > On Fri, May 31, 2024 at 12:28:21AM +1000, Michael Ellerman wrote: >> No that's wrong. The actual hardware page size is 4K, but >> CONFIG_PAGE_SIZE and PAGE_SHIFT etc. is 64K. >> >> So at least for this user the driver used to work with 64K pages, and >> now doesn't. > > Which suggested that the communicated max_hw_sectors is wrong, and > previously we were saved by the block layer increasing it to > PAGE_SIZE after a warning. Should we just increment it to 64k? It looks like that user actually only has the CDROM hanging off pata_macio, so it's possible it has been broken previously and they didn't notice. I'll see if they can confirm the CDROM has been working up until now. I can test the CDROM on my G5 next week. cheers
Michael Ellerman <mpe@ellerman.id.au> writes: > Christoph Hellwig <hch@lst.de> writes: >> On Fri, May 31, 2024 at 12:28:21AM +1000, Michael Ellerman wrote: >>> No that's wrong. The actual hardware page size is 4K, but >>> CONFIG_PAGE_SIZE and PAGE_SHIFT etc. is 64K. >>> >>> So at least for this user the driver used to work with 64K pages, and >>> now doesn't. >> >> Which suggested that the communicated max_hw_sectors is wrong, and >> previously we were saved by the block layer increasing it to >> PAGE_SIZE after a warning. Should we just increment it to 64k? > > It looks like that user actually only has the CDROM hanging off > pata_macio, so it's possible it has been broken previously and they > didn't notice. I'll see if they can confirm the CDROM has been working > up until now. > > I can test the CDROM on my G5 next week. I can confirm that the driver does work with 64K pages prior to the recent changes. I'm able to boot and read CDs with no errors. However AFAICS that's because the driver splits large requests in pata_macio_qc_prep(): static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) { ... for_each_sg(qc->sg, sg, qc->n_elem, si) { u32 addr, sg_len, len; ... addr = (u32) sg_dma_address(sg); sg_len = sg_dma_len(sg); while (sg_len) { ... len = (sg_len < MAX_DBDMA_SEG) ? sg_len : MAX_DBDMA_SEG; table->command = cpu_to_le16(write ? OUTPUT_MORE: INPUT_MORE); table->req_count = cpu_to_le16(len); ... addr += len; sg_len -= len; ++table; } } If I increase MAX_DBMA_SEG from 0xff00 to 64K I see IO errors at boot: [ 24.989755] sr 4:0:0:0: [sr0] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=6s [ 25.007310] sr 4:0:0:0: [sr0] tag#0 Sense Key : Medium Error [current] [ 25.020502] sr 4:0:0:0: [sr0] tag#0 ASC=0x10 <<vendor>>ASCQ=0x90 [ 25.032655] sr 4:0:0:0: [sr0] tag#0 CDB: Read(10) 28 00 00 00 00 00 00 00 20 00 [ 25.047232] I/O error, dev sr0, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 0 On the other hand increasing max_segment_size to 64K while leaving MAX_DBDMA_SEG at 0xff00 seems to work fine. And that's effectively what's been happening on existing kernels until now. The only question is whether that violates some assumption elsewhere in the SCSI layer? Anyway patch below that works for me on v6.10-rc2. cheers diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 817838e2f70e..3cb455a32d92 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -915,10 +915,13 @@ static const struct scsi_host_template pata_macio_sht = { .sg_tablesize = MAX_DCMDS, /* We may not need that strict one */ .dma_boundary = ATA_DMA_BOUNDARY, - /* Not sure what the real max is but we know it's less than 64K, let's - * use 64K minus 256 + /* + * The SCSI core requires the segment size to cover at least a page, so + * for 64K page size kernels this must be at least 64K. However the + * hardware can't handle 64K, so pata_macio_qc_prep() will split large + * requests. */ - .max_segment_size = MAX_DBDMA_SEG, + .max_segment_size = SZ_64K, .device_configure = pata_macio_device_configure, .sdev_groups = ata_common_sdev_groups, .can_queue = ATA_DEF_QUEUE,
On Wed, Jun 05, 2024 at 10:37:53PM +1000, Michael Ellerman wrote: > On the other hand increasing max_segment_size to 64K while leaving MAX_DBDMA_SEG > at 0xff00 seems to work fine. And that's effectively what's been happening on > existing kernels until now. Exactly. > > The only question is whether that violates some assumption elsewhere in the > SCSI layer? It shouldn't. > Anyway patch below that works for me on v6.10-rc2. This looks good to me: Reviewed-by: Christoph Hellwig <hch@lst.de>
> > diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c > index 817838e2f70e..3cb455a32d92 100644 > --- a/drivers/ata/pata_macio.c > +++ b/drivers/ata/pata_macio.c > @@ -915,10 +915,13 @@ static const struct scsi_host_template pata_macio_sht = { > .sg_tablesize = MAX_DCMDS, > /* We may not need that strict one */ > .dma_boundary = ATA_DMA_BOUNDARY, > - /* Not sure what the real max is but we know it's less than 64K, let's > - * use 64K minus 256 > + /* > + * The SCSI core requires the segment size to cover at least a page, so > + * for 64K page size kernels this must be at least 64K. However the > + * hardware can't handle 64K, so pata_macio_qc_prep() will split large > + * requests. > */ > - .max_segment_size = MAX_DBDMA_SEG, > + .max_segment_size = SZ_64K, > .device_configure = pata_macio_device_configure, > .sdev_groups = ata_common_sdev_groups, > .can_queue = ATA_DEF_QUEUE, Feel free to add: Reviewed-by: John Garry <john.g.garry@oracle.com>
John Garry <john.g.garry@oracle.com> writes: >> diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c >> index 817838e2f70e..3cb455a32d92 100644 >> --- a/drivers/ata/pata_macio.c >> +++ b/drivers/ata/pata_macio.c >> @@ -915,10 +915,13 @@ static const struct scsi_host_template pata_macio_sht = { >> .sg_tablesize = MAX_DCMDS, >> /* We may not need that strict one */ >> .dma_boundary = ATA_DMA_BOUNDARY, >> - /* Not sure what the real max is but we know it's less than 64K, let's >> - * use 64K minus 256 >> + /* >> + * The SCSI core requires the segment size to cover at least a page, so >> + * for 64K page size kernels this must be at least 64K. However the >> + * hardware can't handle 64K, so pata_macio_qc_prep() will split large >> + * requests. >> */ >> - .max_segment_size = MAX_DBDMA_SEG, >> + .max_segment_size = SZ_64K, >> .device_configure = pata_macio_device_configure, >> .sdev_groups = ata_common_sdev_groups, >> .can_queue = ATA_DEF_QUEUE, > > Feel free to add: > Reviewed-by: John Garry <john.g.garry@oracle.com> Thanks. Sorry I missed adding this when sending the proper patch, maybe whoever applies it can add it then. cheers
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2e28e2360c8574..1deca84914e87a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -32,7 +32,7 @@ #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> -#include <scsi/scsi_transport.h> /* __scsi_init_queue() */ +#include <scsi/scsi_transport.h> /* scsi_init_limits() */ #include <scsi/scsi_dh.h> #include <trace/events/scsi.h> @@ -1965,31 +1965,26 @@ static void scsi_map_queues(struct blk_mq_tag_set *set) blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); } -void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) +void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim) { struct device *dev = shost->dma_dev; - /* - * this limit is imposed by hardware restrictions - */ - blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize, - SG_MAX_SEGMENTS)); + memset(lim, 0, sizeof(*lim)); + lim->max_segments = + min_t(unsigned short, shost->sg_tablesize, SG_MAX_SEGMENTS); if (scsi_host_prot_dma(shost)) { shost->sg_prot_tablesize = min_not_zero(shost->sg_prot_tablesize, (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS); BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize); - blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); + lim->max_integrity_segments = shost->sg_prot_tablesize; } - blk_queue_max_hw_sectors(q, shost->max_sectors); - blk_queue_segment_boundary(q, shost->dma_boundary); - dma_set_seg_boundary(dev, shost->dma_boundary); - - blk_queue_max_segment_size(q, shost->max_segment_size); - blk_queue_virt_boundary(q, shost->virt_boundary_mask); - dma_set_max_seg_size(dev, queue_max_segment_size(q)); + lim->max_hw_sectors = shost->max_sectors; + lim->seg_boundary_mask = shost->dma_boundary; + lim->max_segment_size = shost->max_segment_size; + lim->virt_boundary_mask = shost->virt_boundary_mask; /* * Set a reasonable default alignment: The larger of 32-byte (dword), @@ -1998,9 +1993,12 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) * * Devices that require a bigger alignment can increase it later. */ - blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1); + lim->dma_alignment = max(4, dma_get_cache_alignment()) - 1; + + dma_set_seg_boundary(dev, shost->dma_boundary); + dma_set_max_seg_size(dev, shost->max_segment_size); } -EXPORT_SYMBOL_GPL(__scsi_init_queue); +EXPORT_SYMBOL_GPL(scsi_init_limits); static const struct blk_mq_ops scsi_mq_ops_no_commit = { .get_budget = scsi_mq_get_budget, diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 8d06475de17a33..205ab3b3ea89be 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -283,6 +283,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, struct request_queue *q; int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); + struct queue_limits lim; sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, GFP_KERNEL); @@ -332,7 +333,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, sdev->sg_reserved_size = INT_MAX; - q = blk_mq_alloc_queue(&sdev->host->tag_set, NULL, NULL); + scsi_init_limits(shost, &lim); + q = blk_mq_alloc_queue(&sdev->host->tag_set, &lim, NULL); if (IS_ERR(q)) { /* release fn is set up in scsi_sysfs_device_initialise, so * have to free and put manually here */ @@ -343,7 +345,6 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, kref_get(&sdev->host->tagset_refcnt); sdev->request_queue = q; q->queuedata = sdev; - __scsi_init_queue(sdev->host, q); depth = sdev->host->cmd_per_lun ?: 1; diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 87b2235b8ece45..0799700b0fca77 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -4276,6 +4276,7 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) { struct device *dev = &shost->shost_gendev; struct fc_internal *i = to_fc_internal(shost->transportt); + struct queue_limits lim; struct request_queue *q; char bsg_name[20]; @@ -4286,8 +4287,8 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) snprintf(bsg_name, sizeof(bsg_name), "fc_host%d", shost->host_no); - - q = bsg_setup_queue(dev, bsg_name, NULL, fc_bsg_dispatch, + scsi_init_limits(shost, &lim); + q = bsg_setup_queue(dev, bsg_name, &lim, fc_bsg_dispatch, fc_bsg_job_timeout, i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, @@ -4295,7 +4296,6 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) shost->host_no); return PTR_ERR(q); } - __scsi_init_queue(shost, q); blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT); fc_host->rqst_q = q; return 0; @@ -4311,6 +4311,7 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) { struct device *dev = &rport->dev; struct fc_internal *i = to_fc_internal(shost->transportt); + struct queue_limits lim; struct request_queue *q; rport->rqst_q = NULL; @@ -4318,13 +4319,13 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) if (!i->f->bsg_request) return -ENOTSUPP; - q = bsg_setup_queue(dev, dev_name(dev), NULL, fc_bsg_dispatch_prep, + scsi_init_limits(shost, &lim); + q = bsg_setup_queue(dev, dev_name(dev), &lim, fc_bsg_dispatch_prep, fc_bsg_job_timeout, i->f->dd_bsg_size); if (IS_ERR(q)) { dev_err(dev, "failed to setup bsg queue\n"); return PTR_ERR(q); } - __scsi_init_queue(shost, q); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); rport->rqst_q = q; return 0; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index c131746bf20777..5e1bb488da15c0 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1535,6 +1535,7 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) { struct device *dev = &shost->shost_gendev; struct iscsi_internal *i = to_iscsi_internal(shost->transportt); + struct queue_limits lim; struct request_queue *q; char bsg_name[20]; @@ -1542,14 +1543,14 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); - q = bsg_setup_queue(dev, bsg_name, NULL, iscsi_bsg_host_dispatch, NULL, + scsi_init_limits(shost, &lim); + q = bsg_setup_queue(dev, bsg_name, &lim, iscsi_bsg_host_dispatch, NULL, 0); if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); return PTR_ERR(q); } - __scsi_init_queue(shost, q); ihost->bsg_q = q; return 0; diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h index a0458bda314872..1394cf313bb37c 100644 --- a/include/scsi/scsi_transport.h +++ b/include/scsi/scsi_transport.h @@ -83,6 +83,6 @@ scsi_transport_device_data(struct scsi_device *sdev) + shost->transportt->device_private_offset; } -void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q); +void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim); #endif /* SCSI_TRANSPORT_H */