Message ID | 20201009143104.22673-17-willy@infradead.org |
---|---|
State | New |
Headers | show |
Series | Allow readpage to return a locked page | expand |
> +static void iomap_read_page_end_io(struct bio_vec *bvec, > + struct completion *done, bool error) I really don't like the parameters here. Part of the problem is that ctx is only assigned to bi_private conditionally, which can easily be fixed. The other part is the strange bool error when we can just pass on bi_stats. See the patch at the end of what I'd do intead. > @@ -318,15 +325,17 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) > > trace_iomap_readpage(page->mapping->host, 1); > > + ctx.status = BLK_STS_OK; This should move into the initializer for ctx. Or we could just drop it given that BLK_STS_OK is and must always be 0. > } else { > WARN_ON_ONCE(ctx.cur_page_in_bio); > - unlock_page(page); > + complete(&ctx.done); > } > > + wait_for_completion(&ctx.done); I don't think we need the complete / wait_for_completion dance in this case. > + if (ret >= 0) > + ret = blk_status_to_errno(ctx.status); > + if (ret == 0) > + return AOP_UPDATED_PAGE; > + unlock_page(page); > + return ret; Nipick, but I'd rather have a goto out_unlock for both error case and have the AOP_UPDATED_PAGE for the normal path straight in line. Here is an untested patch with my suggestions: diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 887bf871ca9bba..81d34725565d7e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -162,33 +162,34 @@ static void iomap_set_range_uptodate(struct page *page, unsigned off, spin_unlock_irqrestore(&iop->uptodate_lock, flags); } -static void iomap_read_page_end_io(struct bio_vec *bvec, - struct completion *done, bool error) +struct iomap_readpage_ctx { + struct page *cur_page; + bool cur_page_in_bio; + blk_status_t status; + struct bio *bio; + struct readahead_control *rac; + struct completion done; +}; + +static void +iomap_read_page_end_io(struct iomap_readpage_ctx *ctx, struct bio_vec *bvec, + blk_status_t status) { struct page *page = bvec->bv_page; struct iomap_page *iop = to_iomap_page(page); - if (!error) + if (status == BLK_STS_OK) iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len); if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending)) { - if (done) - complete(done); - else + if (ctx->rac) unlock_page(page); + else + complete(&ctx->done); } } -struct iomap_readpage_ctx { - struct page *cur_page; - bool cur_page_in_bio; - blk_status_t status; - struct bio *bio; - struct readahead_control *rac; - struct completion done; -}; - static void iomap_read_end_io(struct bio *bio) { @@ -197,12 +198,11 @@ iomap_read_end_io(struct bio *bio) struct bvec_iter_all iter_all; /* Capture the first error */ - if (ctx && ctx->status == BLK_STS_OK) + if (ctx->status == BLK_STS_OK) ctx->status = bio->bi_status; bio_for_each_segment_all(bvec, bio, iter_all) - iomap_read_page_end_io(bvec, ctx ? &ctx->done : NULL, - bio->bi_status != BLK_STS_OK); + iomap_read_page_end_io(ctx, bvec, bio->bi_status); bio_put(bio); } @@ -297,8 +297,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ctx->bio->bi_opf = REQ_OP_READ; if (ctx->rac) ctx->bio->bi_opf |= REQ_RAHEAD; - else - ctx->bio->bi_private = ctx; + ctx->bio->bi_private = ctx; ctx->bio->bi_iter.bi_sector = sector; bio_set_dev(ctx->bio, iomap->bdev); ctx->bio->bi_end_io = iomap_read_end_io; @@ -318,14 +317,16 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, int iomap_readpage(struct page *page, const struct iomap_ops *ops) { - struct iomap_readpage_ctx ctx = { .cur_page = page }; + struct iomap_readpage_ctx ctx = { + .cur_page = page, + .status = BLK_STS_OK, + }; struct inode *inode = page->mapping->host; unsigned poff; loff_t ret; trace_iomap_readpage(page->mapping->host, 1); - ctx.status = BLK_STS_OK; init_completion(&ctx.done); for (poff = 0; poff < PAGE_SIZE; poff += ret) { @@ -340,17 +341,16 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) if (ctx.bio) { submit_bio(ctx.bio); - WARN_ON_ONCE(!ctx.cur_page_in_bio); - } else { - WARN_ON_ONCE(ctx.cur_page_in_bio); - complete(&ctx.done); + wait_for_completion(&ctx.done); } - wait_for_completion(&ctx.done); - if (ret >= 0) - ret = blk_status_to_errno(ctx.status); - if (ret == 0) - return AOP_UPDATED_PAGE; + if (ret < 0) + goto out_unlock; + ret = blk_status_to_errno(ctx.status); + if (ret < 0) + goto out_unlock; + return AOP_UPDATED_PAGE; +out_unlock: unlock_page(page); return ret; }
On Thu, Oct 15, 2020 at 10:42:03AM +0100, Christoph Hellwig wrote: > > +static void iomap_read_page_end_io(struct bio_vec *bvec, > > + struct completion *done, bool error) > > I really don't like the parameters here. Part of the problem is > that ctx is only assigned to bi_private conditionally, which can > easily be fixed. The other part is the strange bool error when > we can just pass on bi_stats. See the patch at the end of what > I'd do intead. I prefer assigning ctx conditionally to propagating the knowledge that !rac means synchronous. I've gone with this: static void iomap_read_page_end_io(struct bio_vec *bvec, - struct completion *done, bool error) + struct iomap_readpage_ctx *ctx, blk_status_t status) { struct page *page = bvec->bv_page; struct iomap_page *iop = to_iomap_page(page); - if (!error) + if (status == BLK_STS_OK) { iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len); + } else if (ctx && ctx->status == BLK_STS_OK) { + ctx->status = status; + } if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending)) { - if (done) - complete(done); + if (ctx) + complete(&ctx->done); else unlock_page(page); } > > } else { > > WARN_ON_ONCE(ctx.cur_page_in_bio); > > - unlock_page(page); > > + complete(&ctx.done); > > } > > > > + wait_for_completion(&ctx.done); > > I don't think we need the complete / wait_for_completion dance in > this case. > > > + if (ret >= 0) > > + ret = blk_status_to_errno(ctx.status); > > + if (ret == 0) > > + return AOP_UPDATED_PAGE; > > + unlock_page(page); > > + return ret; > > Nipick, but I'd rather have a goto out_unlock for both error case > and have the AOP_UPDATED_PAGE for the normal path straight in line. > > Here is an untested patch with my suggestions: I think we can go a little further here: @@ -340,16 +335,12 @@ iomap_readpage(struct page *page, const struct iomap_ops * ops) if (ctx.bio) { submit_bio(ctx.bio); - WARN_ON_ONCE(!ctx.cur_page_in_bio); - } else { - WARN_ON_ONCE(ctx.cur_page_in_bio); - complete(&ctx.done); + wait_for_completion(&ctx.done); + if (ret > 0) + ret = blk_status_to_errno(ctx.status); } - wait_for_completion(&ctx.done); if (ret >= 0) - ret = blk_status_to_errno(ctx.status); - if (ret == 0) return AOP_UPDATED_PAGE; unlock_page(page); return ret; ... there's no need to call blk_status_to_errno if we never submitted a bio.
On Thu, Oct 15, 2020 at 05:43:33PM +0100, Matthew Wilcox wrote: > On Thu, Oct 15, 2020 at 10:42:03AM +0100, Christoph Hellwig wrote: > > > +static void iomap_read_page_end_io(struct bio_vec *bvec, > > > + struct completion *done, bool error) > > > > I really don't like the parameters here. Part of the problem is > > that ctx is only assigned to bi_private conditionally, which can > > easily be fixed. The other part is the strange bool error when > > we can just pass on bi_stats. See the patch at the end of what > > I'd do intead. > > I prefer assigning ctx conditionally to propagating the knowledge > that !rac means synchronous. I've gone with this: And I really hate these kinds of conditional assignments. If the ->rac check is too raw please just add an explicit bool synchronous : 1; flag. > @@ -340,16 +335,12 @@ iomap_readpage(struct page *page, const struct iomap_ops * > ops) > > if (ctx.bio) { > submit_bio(ctx.bio); > + if (ret > 0) > + ret = blk_status_to_errno(ctx.status); > } > > - wait_for_completion(&ctx.done); > if (ret >= 0) > - ret = blk_status_to_errno(ctx.status); > - if (ret == 0) > return AOP_UPDATED_PAGE; > unlock_page(page); > return ret; > > > ... there's no need to call blk_status_to_errno if we never submitted a bio. True. I'd still prefer the AOP_UPDATED_PAGE as the fallthrough case and an explicit goto out_unlock, though.
On Thu, Oct 15, 2020 at 06:58:48PM +0100, Christoph Hellwig wrote: > On Thu, Oct 15, 2020 at 05:43:33PM +0100, Matthew Wilcox wrote: > > I prefer assigning ctx conditionally to propagating the knowledge > > that !rac means synchronous. I've gone with this: > > And I really hate these kinds of conditional assignments. If the > ->rac check is too raw please just add an explicit > > bool synchronous : 1; > > flag. I honestly don't see the problem. We have to assign the status conditionally anyway so we don't overwrite an error with a subsequent success. > True. I'd still prefer the AOP_UPDATED_PAGE as the fallthrough case > and an explicit goto out_unlock, though. So this? if (ctx.bio) { submit_bio(ctx.bio); wait_for_completion(&ctx.done); if (ret < 0) goto err; ret = blk_status_to_errno(ctx.status); } if (ret < 0) goto err; return AOP_UPDATED_PAGE; err: unlock_page(page); return ret;
On Thu, Oct 15, 2020 at 08:03:12PM +0100, Matthew Wilcox wrote: > I honestly don't see the problem. We have to assign the status > conditionally anyway so we don't overwrite an error with a subsequent > success. Yes, but having a potential NULL pointer to a common structure is just waiting for trouble. > > > True. I'd still prefer the AOP_UPDATED_PAGE as the fallthrough case > > and an explicit goto out_unlock, though. > > So this? > > if (ctx.bio) { > submit_bio(ctx.bio); > wait_for_completion(&ctx.done); > if (ret < 0) > goto err; > ret = blk_status_to_errno(ctx.status); > } > > if (ret < 0) > goto err; > return AOP_UPDATED_PAGE; > err: > unlock_page(page); > return ret; > Looks good.
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index e60f572e1590..887bf871ca9b 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -150,9 +150,6 @@ static void iomap_set_range_uptodate(struct page *page, unsigned off, unsigned last = (off + len - 1) >> inode->i_blkbits; unsigned long flags; - if (PageError(page)) - return; - if (!iop) { SetPageUptodate(page); return; @@ -165,42 +162,50 @@ static void iomap_set_range_uptodate(struct page *page, unsigned off, spin_unlock_irqrestore(&iop->uptodate_lock, flags); } -static void -iomap_read_page_end_io(struct bio_vec *bvec, int error) +static void iomap_read_page_end_io(struct bio_vec *bvec, + struct completion *done, bool error) { struct page *page = bvec->bv_page; struct iomap_page *iop = to_iomap_page(page); - if (unlikely(error)) { - ClearPageUptodate(page); - SetPageError(page); - } else { + if (!error) iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len); - } - if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending)) - unlock_page(page); + if (!iop || + atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending)) { + if (done) + complete(done); + else + unlock_page(page); + } } +struct iomap_readpage_ctx { + struct page *cur_page; + bool cur_page_in_bio; + blk_status_t status; + struct bio *bio; + struct readahead_control *rac; + struct completion done; +}; + static void iomap_read_end_io(struct bio *bio) { - int error = blk_status_to_errno(bio->bi_status); + struct iomap_readpage_ctx *ctx = bio->bi_private; struct bio_vec *bvec; struct bvec_iter_all iter_all; + /* Capture the first error */ + if (ctx && ctx->status == BLK_STS_OK) + ctx->status = bio->bi_status; + bio_for_each_segment_all(bvec, bio, iter_all) - iomap_read_page_end_io(bvec, error); + iomap_read_page_end_io(bvec, ctx ? &ctx->done : NULL, + bio->bi_status != BLK_STS_OK); bio_put(bio); } -struct iomap_readpage_ctx { - struct page *cur_page; - bool cur_page_in_bio; - struct bio *bio; - struct readahead_control *rac; -}; - static void iomap_read_inline_data(struct inode *inode, struct page *page, struct iomap *iomap) @@ -292,6 +297,8 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ctx->bio->bi_opf = REQ_OP_READ; if (ctx->rac) ctx->bio->bi_opf |= REQ_RAHEAD; + else + ctx->bio->bi_private = ctx; ctx->bio->bi_iter.bi_sector = sector; bio_set_dev(ctx->bio, iomap->bdev); ctx->bio->bi_end_io = iomap_read_end_io; @@ -318,15 +325,17 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) trace_iomap_readpage(page->mapping->host, 1); + ctx.status = BLK_STS_OK; + init_completion(&ctx.done); + for (poff = 0; poff < PAGE_SIZE; poff += ret) { ret = iomap_apply(inode, page_offset(page) + poff, PAGE_SIZE - poff, 0, ops, &ctx, iomap_readpage_actor); - if (ret <= 0) { - WARN_ON_ONCE(ret == 0); - SetPageError(page); + if (WARN_ON_ONCE(ret == 0)) + ret = -EIO; + if (ret < 0) break; - } } if (ctx.bio) { @@ -334,15 +343,16 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) WARN_ON_ONCE(!ctx.cur_page_in_bio); } else { WARN_ON_ONCE(ctx.cur_page_in_bio); - unlock_page(page); + complete(&ctx.done); } - /* - * Just like mpage_readahead and block_read_full_page we always - * return 0 and just mark the page as PageError on errors. This - * should be cleaned up all through the stack eventually. - */ - return 0; + wait_for_completion(&ctx.done); + if (ret >= 0) + ret = blk_status_to_errno(ctx.status); + if (ret == 0) + return AOP_UPDATED_PAGE; + unlock_page(page); + return ret; } EXPORT_SYMBOL_GPL(iomap_readpage);
A synchronous readpage lets us report the actual errno instead of ineffectively setting PageError. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> --- fs/iomap/buffered-io.c | 74 ++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 32 deletions(-)