diff mbox series

[v9,15/24] mm: Introduce __vm_normal_page()

Message ID 1520963994-28477-16-git-send-email-ldufour@linux.vnet.ibm.com (mailing list archive)
State Not Applicable
Headers show
Series Speculative page faults | expand

Commit Message

Laurent Dufour March 13, 2018, 5:59 p.m. UTC
When dealing with the speculative fault path we should use the VMA's field
cached value stored in the vm_fault structure.

Currently vm_normal_page() is using the pointer to the VMA to fetch the
vm_flags value. This patch provides a new __vm_normal_page() which is
receiving the vm_flags flags value as parameter.

Note: The speculative path is turned on for architecture providing support
for special PTE flag. So only the first block of vm_normal_page is used
during the speculative path.

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
---
 include/linux/mm.h |  7 +++++--
 mm/memory.c        | 18 ++++++++++--------
 2 files changed, 15 insertions(+), 10 deletions(-)

Comments

David Rientjes April 2, 2018, 11:18 p.m. UTC | #1
On Tue, 13 Mar 2018, Laurent Dufour wrote:

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index a84ddc218bbd..73b8b99f482b 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1263,8 +1263,11 @@ struct zap_details {
>  	pgoff_t last_index;			/* Highest page->index to unmap */
>  };
>  
> -struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
> -			     pte_t pte, bool with_public_device);
> +struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
> +			      pte_t pte, bool with_public_device,
> +			      unsigned long vma_flags);
> +#define _vm_normal_page(vma, addr, pte, with_public_device) \
> +	__vm_normal_page(vma, addr, pte, with_public_device, (vma)->vm_flags)
>  #define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false)
>  
>  struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,

If _vm_normal_page() is a static inline function does it break somehow?  
It's nice to avoid the #define's.

> diff --git a/mm/memory.c b/mm/memory.c
> index af0338fbc34d..184a0d663a76 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -826,8 +826,9 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
>  #else
>  # define HAVE_PTE_SPECIAL 0
>  #endif
> -struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
> -			     pte_t pte, bool with_public_device)
> +struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
> +			      pte_t pte, bool with_public_device,
> +			      unsigned long vma_flags)
>  {
>  	unsigned long pfn = pte_pfn(pte);
>  

Would it be possible to update the comment since the function itself is no 
longer named vm_normal_page?
Jerome Glisse April 3, 2018, 7:39 p.m. UTC | #2
On Tue, Mar 13, 2018 at 06:59:45PM +0100, Laurent Dufour wrote:
> When dealing with the speculative fault path we should use the VMA's field
> cached value stored in the vm_fault structure.
> 
> Currently vm_normal_page() is using the pointer to the VMA to fetch the
> vm_flags value. This patch provides a new __vm_normal_page() which is
> receiving the vm_flags flags value as parameter.
> 
> Note: The speculative path is turned on for architecture providing support
> for special PTE flag. So only the first block of vm_normal_page is used
> during the speculative path.

Might be a good idea to explicitly have SPECULATIVE Kconfig option depends
on ARCH_PTE_SPECIAL and a comment for !HAVE_PTE_SPECIAL in the function
explaining that speculative page fault should never reach that point.

Cheers,
Jérôme
David Rientjes April 3, 2018, 8:45 p.m. UTC | #3
On Tue, 3 Apr 2018, Jerome Glisse wrote:

> > When dealing with the speculative fault path we should use the VMA's field
> > cached value stored in the vm_fault structure.
> > 
> > Currently vm_normal_page() is using the pointer to the VMA to fetch the
> > vm_flags value. This patch provides a new __vm_normal_page() which is
> > receiving the vm_flags flags value as parameter.
> > 
> > Note: The speculative path is turned on for architecture providing support
> > for special PTE flag. So only the first block of vm_normal_page is used
> > during the speculative path.
> 
> Might be a good idea to explicitly have SPECULATIVE Kconfig option depends
> on ARCH_PTE_SPECIAL and a comment for !HAVE_PTE_SPECIAL in the function
> explaining that speculative page fault should never reach that point.

Yeah, I think that's appropriate but in a follow-up patch since this is 
only propagating vma_flags.  It will require that __HAVE_ARCH_PTE_SPECIAL 
become an actual Kconfig entry, however.
Laurent Dufour April 4, 2018, 4:04 p.m. UTC | #4
On 03/04/2018 01:18, David Rientjes wrote:
> On Tue, 13 Mar 2018, Laurent Dufour wrote:
> 
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index a84ddc218bbd..73b8b99f482b 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1263,8 +1263,11 @@ struct zap_details {
>>  	pgoff_t last_index;			/* Highest page->index to unmap */
>>  };
>>  
>> -struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
>> -			     pte_t pte, bool with_public_device);
>> +struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
>> +			      pte_t pte, bool with_public_device,
>> +			      unsigned long vma_flags);
>> +#define _vm_normal_page(vma, addr, pte, with_public_device) \
>> +	__vm_normal_page(vma, addr, pte, with_public_device, (vma)->vm_flags)
>>  #define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false)
>>  
>>  struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
> 
> If _vm_normal_page() is a static inline function does it break somehow?  
> It's nice to avoid the #define's.

No problem, I'll create it as a static inline function.

> 
>> diff --git a/mm/memory.c b/mm/memory.c
>> index af0338fbc34d..184a0d663a76 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -826,8 +826,9 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
>>  #else
>>  # define HAVE_PTE_SPECIAL 0
>>  #endif
>> -struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
>> -			     pte_t pte, bool with_public_device)
>> +struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
>> +			      pte_t pte, bool with_public_device,
>> +			      unsigned long vma_flags)
>>  {
>>  	unsigned long pfn = pte_pfn(pte);
>>  
> 
> Would it be possible to update the comment since the function itself is no 
> longer named vm_normal_page?

Sure.
Laurent Dufour April 4, 2018, 4:26 p.m. UTC | #5
On 03/04/2018 21:39, Jerome Glisse wrote:
> On Tue, Mar 13, 2018 at 06:59:45PM +0100, Laurent Dufour wrote:
>> When dealing with the speculative fault path we should use the VMA's field
>> cached value stored in the vm_fault structure.
>>
>> Currently vm_normal_page() is using the pointer to the VMA to fetch the
>> vm_flags value. This patch provides a new __vm_normal_page() which is
>> receiving the vm_flags flags value as parameter.
>>
>> Note: The speculative path is turned on for architecture providing support
>> for special PTE flag. So only the first block of vm_normal_page is used
>> during the speculative path.
> 
> Might be a good idea to explicitly have SPECULATIVE Kconfig option depends
> on ARCH_PTE_SPECIAL and a comment for !HAVE_PTE_SPECIAL in the function
> explaining that speculative page fault should never reach that point.

Unfortunately there is no ARCH_PTE_SPECIAL in the config file, it is defined in
the per architecture header files.
So I can't do anything in the Kconfig file

However, I can check that at build time, and doing such a check in
__vm_normal_page sounds to be a good place, like that:

@@ -869,6 +870,14 @@ struct page *__vm_normal_page(struct vm_area_struct *vma,
unsigned long addr,

        /* !HAVE_PTE_SPECIAL case follows: */

+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+       /* This part should never get called when the speculative page fault
+        * handler is turned on. This is mainly because we can't rely on
+        * vm_start.
+        */
+#error CONFIG_SPECULATIVE_PAGE_FAULT requires HAVE_PTE_SPECIAL
+#endif
+
        if (unlikely(vma_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
                if (vma_flags & VM_MIXEDMAP) {
                        if (!pfn_valid(pfn))

Thanks,
Laurent.
Jerome Glisse April 4, 2018, 9:59 p.m. UTC | #6
On Wed, Apr 04, 2018 at 06:26:44PM +0200, Laurent Dufour wrote:
> 
> 
> On 03/04/2018 21:39, Jerome Glisse wrote:
> > On Tue, Mar 13, 2018 at 06:59:45PM +0100, Laurent Dufour wrote:
> >> When dealing with the speculative fault path we should use the VMA's field
> >> cached value stored in the vm_fault structure.
> >>
> >> Currently vm_normal_page() is using the pointer to the VMA to fetch the
> >> vm_flags value. This patch provides a new __vm_normal_page() which is
> >> receiving the vm_flags flags value as parameter.
> >>
> >> Note: The speculative path is turned on for architecture providing support
> >> for special PTE flag. So only the first block of vm_normal_page is used
> >> during the speculative path.
> > 
> > Might be a good idea to explicitly have SPECULATIVE Kconfig option depends
> > on ARCH_PTE_SPECIAL and a comment for !HAVE_PTE_SPECIAL in the function
> > explaining that speculative page fault should never reach that point.
> 
> Unfortunately there is no ARCH_PTE_SPECIAL in the config file, it is defined in
> the per architecture header files.
> So I can't do anything in the Kconfig file

Maybe adding a new Kconfig symbol for ARCH_PTE_SPECIAL very much like
others ARCH_HAS_

> 
> However, I can check that at build time, and doing such a check in
> __vm_normal_page sounds to be a good place, like that:
> 
> @@ -869,6 +870,14 @@ struct page *__vm_normal_page(struct vm_area_struct *vma,
> unsigned long addr,
> 
>         /* !HAVE_PTE_SPECIAL case follows: */
> 
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +       /* This part should never get called when the speculative page fault
> +        * handler is turned on. This is mainly because we can't rely on
> +        * vm_start.
> +        */
> +#error CONFIG_SPECULATIVE_PAGE_FAULT requires HAVE_PTE_SPECIAL
> +#endif
> +
>         if (unlikely(vma_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
>                 if (vma_flags & VM_MIXEDMAP) {
>                         if (!pfn_valid(pfn))
> 

I am not a fan of #if/#else/#endif in code. But that's a taste thing.
I honnestly think that adding a Kconfig for special pte is the cleanest
solution.

Cheers,
Jérôme
Laurent Dufour April 5, 2018, 12:53 p.m. UTC | #7
On 04/04/2018 23:59, Jerome Glisse wrote:
> On Wed, Apr 04, 2018 at 06:26:44PM +0200, Laurent Dufour wrote:
>>
>>
>> On 03/04/2018 21:39, Jerome Glisse wrote:
>>> On Tue, Mar 13, 2018 at 06:59:45PM +0100, Laurent Dufour wrote:
>>>> When dealing with the speculative fault path we should use the VMA's field
>>>> cached value stored in the vm_fault structure.
>>>>
>>>> Currently vm_normal_page() is using the pointer to the VMA to fetch the
>>>> vm_flags value. This patch provides a new __vm_normal_page() which is
>>>> receiving the vm_flags flags value as parameter.
>>>>
>>>> Note: The speculative path is turned on for architecture providing support
>>>> for special PTE flag. So only the first block of vm_normal_page is used
>>>> during the speculative path.
>>>
>>> Might be a good idea to explicitly have SPECULATIVE Kconfig option depends
>>> on ARCH_PTE_SPECIAL and a comment for !HAVE_PTE_SPECIAL in the function
>>> explaining that speculative page fault should never reach that point.
>>
>> Unfortunately there is no ARCH_PTE_SPECIAL in the config file, it is defined in
>> the per architecture header files.
>> So I can't do anything in the Kconfig file
> 
> Maybe adding a new Kconfig symbol for ARCH_PTE_SPECIAL very much like
> others ARCH_HAS_
> 
>>
>> However, I can check that at build time, and doing such a check in
>> __vm_normal_page sounds to be a good place, like that:
>>
>> @@ -869,6 +870,14 @@ struct page *__vm_normal_page(struct vm_area_struct *vma,
>> unsigned long addr,
>>
>>         /* !HAVE_PTE_SPECIAL case follows: */
>>
>> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
>> +       /* This part should never get called when the speculative page fault
>> +        * handler is turned on. This is mainly because we can't rely on
>> +        * vm_start.
>> +        */
>> +#error CONFIG_SPECULATIVE_PAGE_FAULT requires HAVE_PTE_SPECIAL
>> +#endif
>> +
>>         if (unlikely(vma_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
>>                 if (vma_flags & VM_MIXEDMAP) {
>>                         if (!pfn_valid(pfn))
>>
> 
> I am not a fan of #if/#else/#endif in code. But that's a taste thing.
> I honnestly think that adding a Kconfig for special pte is the cleanest
> solution.

I do agree, but this should be done in a separate series.

I'll see how this could be done but there are some arch (like powerpc) where
this is a bit obfuscated for unknown reason.

For the time being, I'll remove the check and just let the comment in place.
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a84ddc218bbd..73b8b99f482b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1263,8 +1263,11 @@  struct zap_details {
 	pgoff_t last_index;			/* Highest page->index to unmap */
 };
 
-struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
-			     pte_t pte, bool with_public_device);
+struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
+			      pte_t pte, bool with_public_device,
+			      unsigned long vma_flags);
+#define _vm_normal_page(vma, addr, pte, with_public_device) \
+	__vm_normal_page(vma, addr, pte, with_public_device, (vma)->vm_flags)
 #define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false)
 
 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index af0338fbc34d..184a0d663a76 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -826,8 +826,9 @@  static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 #else
 # define HAVE_PTE_SPECIAL 0
 #endif
-struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
-			     pte_t pte, bool with_public_device)
+struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
+			      pte_t pte, bool with_public_device,
+			      unsigned long vma_flags)
 {
 	unsigned long pfn = pte_pfn(pte);
 
@@ -836,7 +837,7 @@  struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 			goto check_pfn;
 		if (vma->vm_ops && vma->vm_ops->find_special_page)
 			return vma->vm_ops->find_special_page(vma, addr);
-		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+		if (vma_flags & (VM_PFNMAP | VM_MIXEDMAP))
 			return NULL;
 		if (is_zero_pfn(pfn))
 			return NULL;
@@ -868,8 +869,8 @@  struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 
 	/* !HAVE_PTE_SPECIAL case follows: */
 
-	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
-		if (vma->vm_flags & VM_MIXEDMAP) {
+	if (unlikely(vma_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+		if (vma_flags & VM_MIXEDMAP) {
 			if (!pfn_valid(pfn))
 				return NULL;
 			goto out;
@@ -878,7 +879,7 @@  struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 			off = (addr - vma->vm_start) >> PAGE_SHIFT;
 			if (pfn == vma->vm_pgoff + off)
 				return NULL;
-			if (!is_cow_mapping(vma->vm_flags))
+			if (!is_cow_mapping(vma_flags))
 				return NULL;
 		}
 	}
@@ -2742,7 +2743,8 @@  static int do_wp_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
-	vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
+	vmf->page = __vm_normal_page(vma, vmf->address, vmf->orig_pte, false,
+				     vmf->vma_flags);
 	if (!vmf->page) {
 		/*
 		 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
@@ -3839,7 +3841,7 @@  static int do_numa_page(struct vm_fault *vmf)
 	ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte);
 	update_mmu_cache(vma, vmf->address, vmf->pte);
 
-	page = vm_normal_page(vma, vmf->address, pte);
+	page = __vm_normal_page(vma, vmf->address, pte, false, vmf->vma_flags);
 	if (!page) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return 0;