Message ID | 1280393201-27874-2-git-send-email-apw@canonical.com |
---|---|
State | Accepted |
Delegated to: | Leann Ogasawara |
Headers | show |
On 07/29/2010 10:46 AM, Andy Whitcroft wrote: > Track pages which undergo readahead and for each record which were > actually consumed, via either read or faulted into a map. This allows > userspace readahead applications (such as ureadahead) to track which > pages in core at the end of a boot are actually required and generate an > optimal readahead pack. It also allows pack adjustment and optimisation > in parallel with readahead, allowing the pack to evolve to be accurate > as userspace paths change. The status of the pages are reported back via > the mincore() call using a newly allocated bit. > > Signed-off-by: Andy Whitcroft <apw@canonical.com> > --- > include/linux/page-flags.h | 3 +++ > mm/filemap.c | 3 +++ > mm/memory.c | 7 ++++++- > mm/mincore.c | 2 ++ > mm/readahead.c | 1 + > 5 files changed, 15 insertions(+), 1 deletions(-) > > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > index 5b59f35..89dc94f 100644 > --- a/include/linux/page-flags.h > +++ b/include/linux/page-flags.h > @@ -108,6 +108,7 @@ enum pageflags { > #ifdef CONFIG_MEMORY_FAILURE > PG_hwpoison, /* hardware poisoned page. Don't touch */ > #endif > + PG_readaheadunused, /* user oriented readahead as yet unused*/ > __NR_PAGEFLAGS, > > /* Filesystems */ > @@ -239,6 +240,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk) > PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) > PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ > > +PAGEFLAG(ReadaheadUnused, readaheadunused) > + > #ifdef CONFIG_HIGHMEM > /* > * Must use a macro here due to header dependency issues. page_zone() is not > diff --git a/mm/filemap.c b/mm/filemap.c > index 20e5642..26e5e15 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -1192,6 +1192,9 @@ int file_read_actor(read_descriptor_t *desc, struct page *page, > if (size > count) > size = count; > > + if (PageReadaheadUnused(page)) > + ClearPageReadaheadUnused(page); > + > /* > * Faults on the destination of a read are common, so do it before > * taking the kmap. > diff --git a/mm/memory.c b/mm/memory.c > index 119b7cc..97ca21b 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -2865,10 +2865,15 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, > else > VM_BUG_ON(!PageLocked(vmf.page)); > > + page = vmf.page; > + > + /* Mark the page as used on fault. */ > + if (PageReadaheadUnused(page)) > + ClearPageReadaheadUnused(page); > + > /* > * Should we do an early C-O-W break? > */ > - page = vmf.page; > if (flags & FAULT_FLAG_WRITE) { > if (!(vma->vm_flags & VM_SHARED)) { > anon = 1; > diff --git a/mm/mincore.c b/mm/mincore.c > index 9ac42dc..a4e573a 100644 > --- a/mm/mincore.c > +++ b/mm/mincore.c > @@ -77,6 +77,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) > page = find_get_page(mapping, pgoff); > if (page) { > present = PageUptodate(page); > + if (present) > + present |= (PageReadaheadUnused(page) << 7); > page_cache_release(page); > } > > diff --git a/mm/readahead.c b/mm/readahead.c > index 77506a2..6948b92 100644 > --- a/mm/readahead.c > +++ b/mm/readahead.c > @@ -181,6 +181,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, > list_add(&page->lru, &page_pool); > if (page_idx == nr_to_read - lookahead_size) > SetPageReadahead(page); > + SetPageReadaheadUnused(page); > ret++; > } > I think it looks good. Just out of interest, the last hunk sounds a bit like it only sets PageReadahead on one page while PageREadaheadUnused is set on all of them. Which seems a bit odd. -Stefan
On Thu, Jul 29, 2010 at 11:20:40AM +0200, Stefan Bader wrote: > On 07/29/2010 10:46 AM, Andy Whitcroft wrote: > > Track pages which undergo readahead and for each record which were > > actually consumed, via either read or faulted into a map. This allows > > userspace readahead applications (such as ureadahead) to track which > > pages in core at the end of a boot are actually required and generate an > > optimal readahead pack. It also allows pack adjustment and optimisation > > in parallel with readahead, allowing the pack to evolve to be accurate > > as userspace paths change. The status of the pages are reported back via > > the mincore() call using a newly allocated bit. > > > > Signed-off-by: Andy Whitcroft <apw@canonical.com> > > --- > > include/linux/page-flags.h | 3 +++ > > mm/filemap.c | 3 +++ > > mm/memory.c | 7 ++++++- > > mm/mincore.c | 2 ++ > > mm/readahead.c | 1 + > > 5 files changed, 15 insertions(+), 1 deletions(-) > > > > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > > index 5b59f35..89dc94f 100644 > > --- a/include/linux/page-flags.h > > +++ b/include/linux/page-flags.h > > @@ -108,6 +108,7 @@ enum pageflags { > > #ifdef CONFIG_MEMORY_FAILURE > > PG_hwpoison, /* hardware poisoned page. Don't touch */ > > #endif > > + PG_readaheadunused, /* user oriented readahead as yet unused*/ > > __NR_PAGEFLAGS, > > > > /* Filesystems */ > > @@ -239,6 +240,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk) > > PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) > > PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ > > > > +PAGEFLAG(ReadaheadUnused, readaheadunused) > > + > > #ifdef CONFIG_HIGHMEM > > /* > > * Must use a macro here due to header dependency issues. page_zone() is not > > diff --git a/mm/filemap.c b/mm/filemap.c > > index 20e5642..26e5e15 100644 > > --- a/mm/filemap.c > > +++ b/mm/filemap.c > > @@ -1192,6 +1192,9 @@ int file_read_actor(read_descriptor_t *desc, struct page *page, > > if (size > count) > > size = count; > > > > + if (PageReadaheadUnused(page)) > > + ClearPageReadaheadUnused(page); > > + > > /* > > * Faults on the destination of a read are common, so do it before > > * taking the kmap. > > diff --git a/mm/memory.c b/mm/memory.c > > index 119b7cc..97ca21b 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -2865,10 +2865,15 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, > > else > > VM_BUG_ON(!PageLocked(vmf.page)); > > > > + page = vmf.page; > > + > > + /* Mark the page as used on fault. */ > > + if (PageReadaheadUnused(page)) > > + ClearPageReadaheadUnused(page); > > + > > /* > > * Should we do an early C-O-W break? > > */ > > - page = vmf.page; > > if (flags & FAULT_FLAG_WRITE) { > > if (!(vma->vm_flags & VM_SHARED)) { > > anon = 1; > > diff --git a/mm/mincore.c b/mm/mincore.c > > index 9ac42dc..a4e573a 100644 > > --- a/mm/mincore.c > > +++ b/mm/mincore.c > > @@ -77,6 +77,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) > > page = find_get_page(mapping, pgoff); > > if (page) { > > present = PageUptodate(page); > > + if (present) > > + present |= (PageReadaheadUnused(page) << 7); > > page_cache_release(page); > > } > > > > diff --git a/mm/readahead.c b/mm/readahead.c > > index 77506a2..6948b92 100644 > > --- a/mm/readahead.c > > +++ b/mm/readahead.c > > @@ -181,6 +181,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, > > list_add(&page->lru, &page_pool); > > if (page_idx == nr_to_read - lookahead_size) > > SetPageReadahead(page); > > + SetPageReadaheadUnused(page); > > ret++; > > } > > > > I think it looks good. Just out of interest, the last hunk sounds a bit like it > only sets PageReadahead on one page while PageREadaheadUnused is set on all of > them. Which seems a bit odd. Thats because the PageReadahead flag is a marker, a pointer into the memory space, we read the block marked with that for real we know its time to schedule more readahead as we are close to consume all of the previous readhead. -apw
On 07/29/2010 11:41 AM, Andy Whitcroft wrote: > On Thu, Jul 29, 2010 at 11:20:40AM +0200, Stefan Bader wrote: >> On 07/29/2010 10:46 AM, Andy Whitcroft wrote: >>> Track pages which undergo readahead and for each record which were >>> actually consumed, via either read or faulted into a map. This allows >>> userspace readahead applications (such as ureadahead) to track which >>> pages in core at the end of a boot are actually required and generate an >>> optimal readahead pack. It also allows pack adjustment and optimisation >>> in parallel with readahead, allowing the pack to evolve to be accurate >>> as userspace paths change. The status of the pages are reported back via >>> the mincore() call using a newly allocated bit. >>> >>> Signed-off-by: Andy Whitcroft <apw@canonical.com> >>> --- >>> include/linux/page-flags.h | 3 +++ >>> mm/filemap.c | 3 +++ >>> mm/memory.c | 7 ++++++- >>> mm/mincore.c | 2 ++ >>> mm/readahead.c | 1 + >>> 5 files changed, 15 insertions(+), 1 deletions(-) >>> >>> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h >>> index 5b59f35..89dc94f 100644 >>> --- a/include/linux/page-flags.h >>> +++ b/include/linux/page-flags.h >>> @@ -108,6 +108,7 @@ enum pageflags { >>> #ifdef CONFIG_MEMORY_FAILURE >>> PG_hwpoison, /* hardware poisoned page. Don't touch */ >>> #endif >>> + PG_readaheadunused, /* user oriented readahead as yet unused*/ >>> __NR_PAGEFLAGS, >>> >>> /* Filesystems */ >>> @@ -239,6 +240,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk) >>> PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) >>> PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ >>> >>> +PAGEFLAG(ReadaheadUnused, readaheadunused) >>> + >>> #ifdef CONFIG_HIGHMEM >>> /* >>> * Must use a macro here due to header dependency issues. page_zone() is not >>> diff --git a/mm/filemap.c b/mm/filemap.c >>> index 20e5642..26e5e15 100644 >>> --- a/mm/filemap.c >>> +++ b/mm/filemap.c >>> @@ -1192,6 +1192,9 @@ int file_read_actor(read_descriptor_t *desc, struct page *page, >>> if (size > count) >>> size = count; >>> >>> + if (PageReadaheadUnused(page)) >>> + ClearPageReadaheadUnused(page); >>> + >>> /* >>> * Faults on the destination of a read are common, so do it before >>> * taking the kmap. >>> diff --git a/mm/memory.c b/mm/memory.c >>> index 119b7cc..97ca21b 100644 >>> --- a/mm/memory.c >>> +++ b/mm/memory.c >>> @@ -2865,10 +2865,15 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, >>> else >>> VM_BUG_ON(!PageLocked(vmf.page)); >>> >>> + page = vmf.page; >>> + >>> + /* Mark the page as used on fault. */ >>> + if (PageReadaheadUnused(page)) >>> + ClearPageReadaheadUnused(page); >>> + >>> /* >>> * Should we do an early C-O-W break? >>> */ >>> - page = vmf.page; >>> if (flags & FAULT_FLAG_WRITE) { >>> if (!(vma->vm_flags & VM_SHARED)) { >>> anon = 1; >>> diff --git a/mm/mincore.c b/mm/mincore.c >>> index 9ac42dc..a4e573a 100644 >>> --- a/mm/mincore.c >>> +++ b/mm/mincore.c >>> @@ -77,6 +77,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) >>> page = find_get_page(mapping, pgoff); >>> if (page) { >>> present = PageUptodate(page); >>> + if (present) >>> + present |= (PageReadaheadUnused(page) << 7); >>> page_cache_release(page); >>> } >>> >>> diff --git a/mm/readahead.c b/mm/readahead.c >>> index 77506a2..6948b92 100644 >>> --- a/mm/readahead.c >>> +++ b/mm/readahead.c >>> @@ -181,6 +181,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, >>> list_add(&page->lru, &page_pool); >>> if (page_idx == nr_to_read - lookahead_size) >>> SetPageReadahead(page); >>> + SetPageReadaheadUnused(page); >>> ret++; >>> } >>> >> >> I think it looks good. Just out of interest, the last hunk sounds a bit like it >> only sets PageReadahead on one page while PageREadaheadUnused is set on all of >> them. Which seems a bit odd. > > Thats because the PageReadahead flag is a marker, a pointer into the > memory space, we read the block marked with that for real we know its > time to schedule more readahead as we are close to consume all of the > previous readhead. > > -apw Ah, thanks for the explanation. It sounded rather like a marker which pages came from readahead. But it makes sense and probably is hard to find a good name for it. Acked-by: Stefan Bader <stefan.bader@canonical.com>
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5b59f35..89dc94f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -108,6 +108,7 @@ enum pageflags { #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif + PG_readaheadunused, /* user oriented readahead as yet unused*/ __NR_PAGEFLAGS, /* Filesystems */ @@ -239,6 +240,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk) PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ +PAGEFLAG(ReadaheadUnused, readaheadunused) + #ifdef CONFIG_HIGHMEM /* * Must use a macro here due to header dependency issues. page_zone() is not diff --git a/mm/filemap.c b/mm/filemap.c index 20e5642..26e5e15 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1192,6 +1192,9 @@ int file_read_actor(read_descriptor_t *desc, struct page *page, if (size > count) size = count; + if (PageReadaheadUnused(page)) + ClearPageReadaheadUnused(page); + /* * Faults on the destination of a read are common, so do it before * taking the kmap. diff --git a/mm/memory.c b/mm/memory.c index 119b7cc..97ca21b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2865,10 +2865,15 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, else VM_BUG_ON(!PageLocked(vmf.page)); + page = vmf.page; + + /* Mark the page as used on fault. */ + if (PageReadaheadUnused(page)) + ClearPageReadaheadUnused(page); + /* * Should we do an early C-O-W break? */ - page = vmf.page; if (flags & FAULT_FLAG_WRITE) { if (!(vma->vm_flags & VM_SHARED)) { anon = 1; diff --git a/mm/mincore.c b/mm/mincore.c index 9ac42dc..a4e573a 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -77,6 +77,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) page = find_get_page(mapping, pgoff); if (page) { present = PageUptodate(page); + if (present) + present |= (PageReadaheadUnused(page) << 7); page_cache_release(page); } diff --git a/mm/readahead.c b/mm/readahead.c index 77506a2..6948b92 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -181,6 +181,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, list_add(&page->lru, &page_pool); if (page_idx == nr_to_read - lookahead_size) SetPageReadahead(page); + SetPageReadaheadUnused(page); ret++; }
Track pages which undergo readahead and for each record which were actually consumed, via either read or faulted into a map. This allows userspace readahead applications (such as ureadahead) to track which pages in core at the end of a boot are actually required and generate an optimal readahead pack. It also allows pack adjustment and optimisation in parallel with readahead, allowing the pack to evolve to be accurate as userspace paths change. The status of the pages are reported back via the mincore() call using a newly allocated bit. Signed-off-by: Andy Whitcroft <apw@canonical.com> --- include/linux/page-flags.h | 3 +++ mm/filemap.c | 3 +++ mm/memory.c | 7 ++++++- mm/mincore.c | 2 ++ mm/readahead.c | 1 + 5 files changed, 15 insertions(+), 1 deletions(-)