Message ID | 20190730120321.285095769@linutronix.de |
---|---|
State | Superseded |
Headers | show |
Series | fs: Substitute bit-spinlocks for PREEMPT_RT and debugging | expand |
On Tue 30-07-19 13:24:54, Thomas Gleixner wrote: > Bit spinlocks are problematic if PREEMPT_RT is enabled, because they > disable preemption, which is undesired for latency reasons and breaks when > regular spinlocks are taken within the bit_spinlock locked region because > regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT > replaces the bit spinlocks with regular spinlocks to avoid this problem. > > To avoid ifdeffery at the source level, wrap all BH_Uptodate_Lock bitlock > operations with inline functions, so the spinlock substitution can be done > at one place. > > Using regular spinlocks can also be enabled for lock debugging purposes so > the lock operations become visible to lockdep. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Cc: "Theodore Ts'o" <tytso@mit.edu> > Cc: Matthew Wilcox <willy@infradead.org> > Cc: Alexander Viro <viro@zeniv.linux.org.uk> > Cc: linux-fsdevel@vger.kernel.org Looks good to me. You can add: Reviewed-by: Jan Kara <jack@suse.cz> BTW, it should be possible to get rid of BH_Uptodate_Lock altogether using bio chaining (which was non-existent when this bh code was written) to make sure IO completion function gets called only once all bios used to fill in / write out the page are done. It would be also more efficient. But I guess that's an interesting cleanup project for some other time... Honza > --- > fs/buffer.c | 20 ++++++-------------- > fs/ext4/page-io.c | 6 ++---- > fs/ntfs/aops.c | 10 +++------- > include/linux/buffer_head.h | 16 ++++++++++++++++ > 4 files changed, 27 insertions(+), 25 deletions(-) > > --- a/fs/buffer.c > +++ b/fs/buffer.c > @@ -275,8 +275,7 @@ static void end_buffer_async_read(struct > * decide that the page is now completely done. > */ > first = page_buffers(page); > - local_irq_save(flags); > - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); > + flags = bh_uptodate_lock_irqsave(first); > clear_buffer_async_read(bh); > unlock_buffer(bh); > tmp = bh; > @@ -289,8 +288,7 @@ static void end_buffer_async_read(struct > } > tmp = tmp->b_this_page; > } while (tmp != bh); > - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); > - local_irq_restore(flags); > + bh_uptodate_unlock_irqrestore(first, flags); > > /* > * If none of the buffers had errors and they are all > @@ -302,9 +300,7 @@ static void end_buffer_async_read(struct > return; > > still_busy: > - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); > - local_irq_restore(flags); > - return; > + bh_uptodate_unlock_irqrestore(first, flags); > } > > /* > @@ -331,8 +327,7 @@ void end_buffer_async_write(struct buffe > } > > first = page_buffers(page); > - local_irq_save(flags); > - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); > + flags = bh_uptodate_lock_irqsave(first); > > clear_buffer_async_write(bh); > unlock_buffer(bh); > @@ -344,15 +339,12 @@ void end_buffer_async_write(struct buffe > } > tmp = tmp->b_this_page; > } > - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); > - local_irq_restore(flags); > + bh_uptodate_unlock_irqrestore(first, flags); > end_page_writeback(page); > return; > > still_busy: > - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); > - local_irq_restore(flags); > - return; > + bh_uptodate_unlock_irqrestore(first, flags); > } > EXPORT_SYMBOL(end_buffer_async_write); > > --- a/fs/ext4/page-io.c > +++ b/fs/ext4/page-io.c > @@ -90,8 +90,7 @@ static void ext4_finish_bio(struct bio * > * We check all buffers in the page under BH_Uptodate_Lock > * to avoid races with other end io clearing async_write flags > */ > - local_irq_save(flags); > - bit_spin_lock(BH_Uptodate_Lock, &head->b_state); > + flags = bh_uptodate_lock_irqsave(head); > do { > if (bh_offset(bh) < bio_start || > bh_offset(bh) + bh->b_size > bio_end) { > @@ -103,8 +102,7 @@ static void ext4_finish_bio(struct bio * > if (bio->bi_status) > buffer_io_error(bh); > } while ((bh = bh->b_this_page) != head); > - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); > - local_irq_restore(flags); > + bh_uptodate_unlock_irqrestore(head, flags); > if (!under_io) { > fscrypt_free_bounce_page(bounce_page); > end_page_writeback(page); > --- a/fs/ntfs/aops.c > +++ b/fs/ntfs/aops.c > @@ -92,8 +92,7 @@ static void ntfs_end_buffer_async_read(s > "0x%llx.", (unsigned long long)bh->b_blocknr); > } > first = page_buffers(page); > - local_irq_save(flags); > - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); > + flags = bh_uptodate_lock_irqsave(first); > clear_buffer_async_read(bh); > unlock_buffer(bh); > tmp = bh; > @@ -108,8 +107,7 @@ static void ntfs_end_buffer_async_read(s > } > tmp = tmp->b_this_page; > } while (tmp != bh); > - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); > - local_irq_restore(flags); > + bh_uptodate_unlock_irqrestore(first, flags); > /* > * If none of the buffers had errors then we can set the page uptodate, > * but we first have to perform the post read mst fixups, if the > @@ -142,9 +140,7 @@ static void ntfs_end_buffer_async_read(s > unlock_page(page); > return; > still_busy: > - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); > - local_irq_restore(flags); > - return; > + bh_uptodate_unlock_irqrestore(first, flags); > } > > /** > --- a/include/linux/buffer_head.h > +++ b/include/linux/buffer_head.h > @@ -78,6 +78,22 @@ struct buffer_head { > atomic_t b_count; /* users using this buffer_head */ > }; > > +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) > +{ > + unsigned long flags; > + > + local_irq_save(flags); > + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); > + return flags; > +} > + > +static inline void > +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) > +{ > + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); > + local_irq_restore(flags); > +} > + > /* > * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() > * and buffer_foo() functions. > > >
On Wed, 31 Jul 2019, Jan Kara wrote: > On Tue 30-07-19 13:24:54, Thomas Gleixner wrote: > > Bit spinlocks are problematic if PREEMPT_RT is enabled, because they > > disable preemption, which is undesired for latency reasons and breaks when > > regular spinlocks are taken within the bit_spinlock locked region because > > regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT > > replaces the bit spinlocks with regular spinlocks to avoid this problem. > > > > To avoid ifdeffery at the source level, wrap all BH_Uptodate_Lock bitlock > > operations with inline functions, so the spinlock substitution can be done > > at one place. > > > > Using regular spinlocks can also be enabled for lock debugging purposes so > > the lock operations become visible to lockdep. > > > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > > Cc: "Theodore Ts'o" <tytso@mit.edu> > > Cc: Matthew Wilcox <willy@infradead.org> > > Cc: Alexander Viro <viro@zeniv.linux.org.uk> > > Cc: linux-fsdevel@vger.kernel.org > > Looks good to me. You can add: > > Reviewed-by: Jan Kara <jack@suse.cz> > > BTW, it should be possible to get rid of BH_Uptodate_Lock altogether using > bio chaining (which was non-existent when this bh code was written) to make > sure IO completion function gets called only once all bios used to fill in > / write out the page are done. It would be also more efficient. But I guess > that's an interesting cleanup project for some other time... While 'possible cleanup' is something which triggers a certain nerve, that particular project certainly goes beyond my basic understanding of that whole fs/block conglomerate. I rather leave that to people who actually have a clue. :) Thanks, tglx
--- a/fs/buffer.c +++ b/fs/buffer.c @@ -275,8 +275,7 @@ static void end_buffer_async_read(struct * decide that the page is now completely done. */ first = page_buffers(page); - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + flags = bh_uptodate_lock_irqsave(first); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -289,8 +288,7 @@ static void end_buffer_async_read(struct } tmp = tmp->b_this_page; } while (tmp != bh); - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + bh_uptodate_unlock_irqrestore(first, flags); /* * If none of the buffers had errors and they are all @@ -302,9 +300,7 @@ static void end_buffer_async_read(struct return; still_busy: - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); - return; + bh_uptodate_unlock_irqrestore(first, flags); } /* @@ -331,8 +327,7 @@ void end_buffer_async_write(struct buffe } first = page_buffers(page); - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + flags = bh_uptodate_lock_irqsave(first); clear_buffer_async_write(bh); unlock_buffer(bh); @@ -344,15 +339,12 @@ void end_buffer_async_write(struct buffe } tmp = tmp->b_this_page; } - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + bh_uptodate_unlock_irqrestore(first, flags); end_page_writeback(page); return; still_busy: - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); - return; + bh_uptodate_unlock_irqrestore(first, flags); } EXPORT_SYMBOL(end_buffer_async_write); --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -90,8 +90,7 @@ static void ext4_finish_bio(struct bio * * We check all buffers in the page under BH_Uptodate_Lock * to avoid races with other end io clearing async_write flags */ - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &head->b_state); + flags = bh_uptodate_lock_irqsave(head); do { if (bh_offset(bh) < bio_start || bh_offset(bh) + bh->b_size > bio_end) { @@ -103,8 +102,7 @@ static void ext4_finish_bio(struct bio * if (bio->bi_status) buffer_io_error(bh); } while ((bh = bh->b_this_page) != head); - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); - local_irq_restore(flags); + bh_uptodate_unlock_irqrestore(head, flags); if (!under_io) { fscrypt_free_bounce_page(bounce_page); end_page_writeback(page); --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -92,8 +92,7 @@ static void ntfs_end_buffer_async_read(s "0x%llx.", (unsigned long long)bh->b_blocknr); } first = page_buffers(page); - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + flags = bh_uptodate_lock_irqsave(first); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -108,8 +107,7 @@ static void ntfs_end_buffer_async_read(s } tmp = tmp->b_this_page; } while (tmp != bh); - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + bh_uptodate_unlock_irqrestore(first, flags); /* * If none of the buffers had errors then we can set the page uptodate, * but we first have to perform the post read mst fixups, if the @@ -142,9 +140,7 @@ static void ntfs_end_buffer_async_read(s unlock_page(page); return; still_busy: - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); - return; + bh_uptodate_unlock_irqrestore(first, flags); } /** --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -78,6 +78,22 @@ struct buffer_head { atomic_t b_count; /* users using this buffer_head */ }; +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) +{ + unsigned long flags; + + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); + return flags; +} + +static inline void +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) +{ + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); + local_irq_restore(flags); +} + /* * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() * and buffer_foo() functions.
Bit spinlocks are problematic if PREEMPT_RT is enabled, because they disable preemption, which is undesired for latency reasons and breaks when regular spinlocks are taken within the bit_spinlock locked region because regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT replaces the bit spinlocks with regular spinlocks to avoid this problem. To avoid ifdeffery at the source level, wrap all BH_Uptodate_Lock bitlock operations with inline functions, so the spinlock substitution can be done at one place. Using regular spinlocks can also be enabled for lock debugging purposes so the lock operations become visible to lockdep. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Matthew Wilcox <willy@infradead.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: linux-fsdevel@vger.kernel.org --- fs/buffer.c | 20 ++++++-------------- fs/ext4/page-io.c | 6 ++---- fs/ntfs/aops.c | 10 +++------- include/linux/buffer_head.h | 16 ++++++++++++++++ 4 files changed, 27 insertions(+), 25 deletions(-)