Message ID | 20240120230824.2619716-9-willy@infradead.org |
---|---|
State | Superseded |
Headers | show |
Series | ubifs folio conversion | expand |
在 2024/1/21 7:08, Matthew Wilcox (Oracle) 写道: > Save eight calls to compound_head() by using the new folio API. > Remove a few assumptions that would break with large folios. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > fs/ubifs/file.c | 36 +++++++++++++++++------------------- > 1 file changed, 17 insertions(+), 19 deletions(-) > > diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c > index 6302ce65e0b3..ef262499f228 100644 > --- a/fs/ubifs/file.c > +++ b/fs/ubifs/file.c > @@ -428,7 +428,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > pgoff_t index = pos >> PAGE_SHIFT; > int err, appending = !!(pos + len > inode->i_size); > int skipped_read = 0; > - struct page *page; > + struct folio *folio; > > ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size); > ubifs_assert(c, !c->ro_media && !c->ro_mount); > @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > return -EROFS; > > /* Try out the fast-path part first */ > - page = grab_cache_page_write_begin(mapping, index); > - if (unlikely(!page)) > - return -ENOMEM; > + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, > + mapping_gfp_mask(mapping)); > + if (IS_ERR(folio)) > + return PTR_ERR(folio); > > - if (!PageUptodate(page)) { > + if (!folio_test_uptodate(folio)) { > /* The page is not loaded from the flash */ > - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) { > + if (pos == folio_pos(folio) && len >= folio_size(folio)) { Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not wrong. > /* > * We change whole page so no need to load it. But we > * do not know whether this page exists on the media or > @@ -453,29 +454,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > * media. Thus, we are setting the @PG_checked flag > * here. > */ > - SetPageChecked(page); > + folio_set_checked(folio); > skipped_read = 1; > } else { > - err = do_readpage(page); > + err = do_readpage(&folio->page); > if (err) { > - unlock_page(page); > - put_page(page); > + folio_unlock(folio); > + folio_put(folio); > return err; > } > } > } > > - err = allocate_budget(c, page, ui, appending); > + err = allocate_budget(c, &folio->page, ui, appending); > if (unlikely(err)) { > ubifs_assert(c, err == -ENOSPC); > /* > * If we skipped reading the page because we were going to > * write all of it, then it is not up to date. > */ > - if (skipped_read) { > - ClearPageChecked(page); > - ClearPageUptodate(page); > - } > + if (skipped_read) > + folio_clear_checked(folio); > /* > * Budgeting failed which means it would have to force > * write-back but didn't, because we set the @fast flag in the > @@ -487,8 +486,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); > mutex_unlock(&ui->ui_mutex); > } > - unlock_page(page); > - put_page(page); > + folio_unlock(folio); > + folio_put(folio); > > return write_begin_slow(mapping, pos, len, pagep); > } > @@ -499,9 +498,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > * with @ui->ui_mutex locked if we are appending pages, and unlocked > * otherwise. This is an optimization (slightly hacky though). > */ > - *pagep = page; > + *pagep = &folio->page; > return 0; > - > } > > /** >
On Mon, Jan 22, 2024 at 07:51:03PM +0800, Zhihao Cheng wrote: > > @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > > return -EROFS; > > /* Try out the fast-path part first */ > > - page = grab_cache_page_write_begin(mapping, index); > > - if (unlikely(!page)) > > - return -ENOMEM; > > + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, > > + mapping_gfp_mask(mapping)); > > + if (IS_ERR(folio)) > > + return PTR_ERR(folio); > > - if (!PageUptodate(page)) { > > + if (!folio_test_uptodate(folio)) { > > /* The page is not loaded from the flash */ > > - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) { > > + if (pos == folio_pos(folio) && len >= folio_size(folio)) { > > Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not > wrong. This is based on my experience with iomap. Today, the caller passes in min(length-of-write, PAGE_SIZE). In order to be able to create large folios in the write path, we want to change the caller of write_begin to pass in the length of the write, so we can see a len which is larger than the size of the folio we found.
在 2024/1/22 22:43, Matthew Wilcox 写道: > On Mon, Jan 22, 2024 at 07:51:03PM +0800, Zhihao Cheng wrote: >>> @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, >>> return -EROFS; >>> /* Try out the fast-path part first */ >>> - page = grab_cache_page_write_begin(mapping, index); >>> - if (unlikely(!page)) >>> - return -ENOMEM; >>> + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, >>> + mapping_gfp_mask(mapping)); >>> + if (IS_ERR(folio)) >>> + return PTR_ERR(folio); >>> - if (!PageUptodate(page)) { >>> + if (!folio_test_uptodate(folio)) { >>> /* The page is not loaded from the flash */ >>> - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) { >>> + if (pos == folio_pos(folio) && len >= folio_size(folio)) { >> >> Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not >> wrong. > > This is based on my experience with iomap. Today, the caller passes in > min(length-of-write, PAGE_SIZE). In order to be able to create large > folios in the write path, we want to change the caller of write_begin > to pass in the length of the write, so we can see a len which is larger > than the size of the folio we found. > > . > I guess you are preparing for adding large folio support for generic_perform_write path, may I have a look for the demo code? I'm a little confused for this words "we want to change the caller of write_begin to pass in the length of the write". According to the implementation of iomap_write_iter(), I find the write length in each iteration is min(write_bytes, folio_size), so I guess the write length passed in ubifs_write_begin won't exceed folio_size.
On Tue, Jan 23, 2024 at 10:15:50AM +0800, Zhihao Cheng wrote: > 在 2024/1/22 22:43, Matthew Wilcox 写道: > > On Mon, Jan 22, 2024 at 07:51:03PM +0800, Zhihao Cheng wrote: > > > Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not > > > wrong. > > > > This is based on my experience with iomap. Today, the caller passes in > > min(length-of-write, PAGE_SIZE). In order to be able to create large > > folios in the write path, we want to change the caller of write_begin > > to pass in the length of the write, so we can see a len which is larger > > than the size of the folio we found. > > I guess you are preparing for adding large folio support for > generic_perform_write path, may I have a look for the demo code? I'm a https://lore.kernel.org/linux-fsdevel/20240111192513.3505769-1-willy@infradead.org/ (now withdrawn because it breaks ext4) > little confused for this words "we want to change the caller of write_begin > to pass in the length of the write". According to the implementation of > iomap_write_iter(), I find the write length in each iteration is > min(write_bytes, folio_size), so I guess the write length passed in > ubifs_write_begin won't exceed folio_size. But that happens after write_begin, because we don't know what the folio size will be until we've called write_begin. ie we call write_begin, passing in the size remaining from the write. If write_begin finds an existing folio in the page cache, we use it. If not, we allocate a folio based on the size of the write. After write_begin returns a folio, we limit the number of bytes copied to the size of the folio.
在 2024/1/23 11:07, Matthew Wilcox 写道: > On Tue, Jan 23, 2024 at 10:15:50AM +0800, Zhihao Cheng wrote: >> 在 2024/1/22 22:43, Matthew Wilcox 写道: >>> On Mon, Jan 22, 2024 at 07:51:03PM +0800, Zhihao Cheng wrote: >>>> Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not >>>> wrong. >>> >>> This is based on my experience with iomap. Today, the caller passes in >>> min(length-of-write, PAGE_SIZE). In order to be able to create large >>> folios in the write path, we want to change the caller of write_begin >>> to pass in the length of the write, so we can see a len which is larger >>> than the size of the folio we found. >> >> I guess you are preparing for adding large folio support for >> generic_perform_write path, may I have a look for the demo code? I'm a > > https://lore.kernel.org/linux-fsdevel/20240111192513.3505769-1-willy@infradead.org/ > (now withdrawn because it breaks ext4) > >> little confused for this words "we want to change the caller of write_begin >> to pass in the length of the write". According to the implementation of >> iomap_write_iter(), I find the write length in each iteration is >> min(write_bytes, folio_size), so I guess the write length passed in >> ubifs_write_begin won't exceed folio_size. > > But that happens after write_begin, because we don't know what the folio > size will be until we've called write_begin. > > ie we call write_begin, passing in the size remaining from the write. > If write_begin finds an existing folio in the page cache, we use it. > If not, we allocate a folio based on the size of the write. After > write_begin returns a folio, we limit the number of bytes copied to the > size of the folio. > . > Makes sense. Thanks for the explaination.
在 2024/1/21 7:08, Matthew Wilcox (Oracle) 写道: > Save eight calls to compound_head() by using the new folio API. > Remove a few assumptions that would break with large folios. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > fs/ubifs/file.c | 36 +++++++++++++++++------------------- > 1 file changed, 17 insertions(+), 19 deletions(-) > Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com> > diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c > index 6302ce65e0b3..ef262499f228 100644 > --- a/fs/ubifs/file.c > +++ b/fs/ubifs/file.c > @@ -428,7 +428,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > pgoff_t index = pos >> PAGE_SHIFT; > int err, appending = !!(pos + len > inode->i_size); > int skipped_read = 0; > - struct page *page; > + struct folio *folio; > > ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size); > ubifs_assert(c, !c->ro_media && !c->ro_mount); > @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > return -EROFS; > > /* Try out the fast-path part first */ > - page = grab_cache_page_write_begin(mapping, index); > - if (unlikely(!page)) > - return -ENOMEM; > + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, > + mapping_gfp_mask(mapping)); > + if (IS_ERR(folio)) > + return PTR_ERR(folio); > > - if (!PageUptodate(page)) { > + if (!folio_test_uptodate(folio)) { > /* The page is not loaded from the flash */ > - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) { > + if (pos == folio_pos(folio) && len >= folio_size(folio)) { > /* > * We change whole page so no need to load it. But we > * do not know whether this page exists on the media or > @@ -453,29 +454,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > * media. Thus, we are setting the @PG_checked flag > * here. > */ > - SetPageChecked(page); > + folio_set_checked(folio); > skipped_read = 1; > } else { > - err = do_readpage(page); > + err = do_readpage(&folio->page); > if (err) { > - unlock_page(page); > - put_page(page); > + folio_unlock(folio); > + folio_put(folio); > return err; > } > } > } > > - err = allocate_budget(c, page, ui, appending); > + err = allocate_budget(c, &folio->page, ui, appending); > if (unlikely(err)) { > ubifs_assert(c, err == -ENOSPC); > /* > * If we skipped reading the page because we were going to > * write all of it, then it is not up to date. > */ > - if (skipped_read) { > - ClearPageChecked(page); > - ClearPageUptodate(page); > - } > + if (skipped_read) > + folio_clear_checked(folio); > /* > * Budgeting failed which means it would have to force > * write-back but didn't, because we set the @fast flag in the > @@ -487,8 +486,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); > mutex_unlock(&ui->ui_mutex); > } > - unlock_page(page); > - put_page(page); > + folio_unlock(folio); > + folio_put(folio); > > return write_begin_slow(mapping, pos, len, pagep); > } > @@ -499,9 +498,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, > * with @ui->ui_mutex locked if we are appending pages, and unlocked > * otherwise. This is an optimization (slightly hacky though). > */ > - *pagep = page; > + *pagep = &folio->page; > return 0; > - > } > > /** >
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 6302ce65e0b3..ef262499f228 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -428,7 +428,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; int err, appending = !!(pos + len > inode->i_size); int skipped_read = 0; - struct page *page; + struct folio *folio; ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size); ubifs_assert(c, !c->ro_media && !c->ro_mount); @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return -EROFS; /* Try out the fast-path part first */ - page = grab_cache_page_write_begin(mapping, index); - if (unlikely(!page)) - return -ENOMEM; + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { /* The page is not loaded from the flash */ - if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) { + if (pos == folio_pos(folio) && len >= folio_size(folio)) { /* * We change whole page so no need to load it. But we * do not know whether this page exists on the media or @@ -453,29 +454,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * media. Thus, we are setting the @PG_checked flag * here. */ - SetPageChecked(page); + folio_set_checked(folio); skipped_read = 1; } else { - err = do_readpage(page); + err = do_readpage(&folio->page); if (err) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return err; } } } - err = allocate_budget(c, page, ui, appending); + err = allocate_budget(c, &folio->page, ui, appending); if (unlikely(err)) { ubifs_assert(c, err == -ENOSPC); /* * If we skipped reading the page because we were going to * write all of it, then it is not up to date. */ - if (skipped_read) { - ClearPageChecked(page); - ClearPageUptodate(page); - } + if (skipped_read) + folio_clear_checked(folio); /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the @@ -487,8 +486,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, ubifs_assert(c, mutex_is_locked(&ui->ui_mutex)); mutex_unlock(&ui->ui_mutex); } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return write_begin_slow(mapping, pos, len, pagep); } @@ -499,9 +498,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * with @ui->ui_mutex locked if we are appending pages, and unlocked * otherwise. This is an optimization (slightly hacky though). */ - *pagep = page; + *pagep = &folio->page; return 0; - } /**
Save eight calls to compound_head() by using the new folio API. Remove a few assumptions that would break with large folios. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> --- fs/ubifs/file.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-)