diff mbox series

[7/7] x86: mm: accelerate pagefault when badaccess

Message ID 20240402075142.196265-8-wangkefeng.wang@huawei.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series arch/mm/fault: accelerate pagefault when badaccess | expand

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 6 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu success Successfully ran 23 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.

Commit Message

Kefeng Wang April 2, 2024, 7:51 a.m. UTC
The vm_flags of vma already checked under per-VMA lock, if it is a
bad access, directly handle error and return, there is no need to
lock_mm_and_find_vma() and check vm_flags again.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/x86/mm/fault.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

Comments

Suren Baghdasaryan April 3, 2024, 5:59 a.m. UTC | #1
On Tue, Apr 2, 2024 at 12:53 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
> The vm_flags of vma already checked under per-VMA lock, if it is a
> bad access, directly handle error and return, there is no need to
> lock_mm_and_find_vma() and check vm_flags again.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Looks safe to me.
Using (mm != NULL) to indicate that we are holding mmap_lock is not
ideal but I guess that works.

Reviewed-by: Suren Baghdasaryan <surenb@google.com>

> ---
>  arch/x86/mm/fault.c | 23 ++++++++++++++---------
>  1 file changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index a4cc20d0036d..67b18adc75dd 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -866,14 +866,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
>
>  static void
>  __bad_area(struct pt_regs *regs, unsigned long error_code,
> -          unsigned long address, u32 pkey, int si_code)
> +          unsigned long address, struct mm_struct *mm,
> +          struct vm_area_struct *vma, u32 pkey, int si_code)
>  {
> -       struct mm_struct *mm = current->mm;
>         /*
>          * Something tried to access memory that isn't in our memory map..
>          * Fix it, but check if it's kernel or user first..
>          */
> -       mmap_read_unlock(mm);
> +       if (mm)
> +               mmap_read_unlock(mm);
> +       else
> +               vma_end_read(vma);
>
>         __bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
>  }
> @@ -897,7 +900,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
>
>  static noinline void
>  bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
> -                     unsigned long address, struct vm_area_struct *vma)
> +                     unsigned long address, struct mm_struct *mm,
> +                     struct vm_area_struct *vma)
>  {
>         /*
>          * This OSPKE check is not strictly necessary at runtime.
> @@ -927,9 +931,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
>                  */
>                 u32 pkey = vma_pkey(vma);
>
> -               __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
> +               __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR);
>         } else {
> -               __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
> +               __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR);
>         }
>  }
>
> @@ -1357,8 +1361,9 @@ void do_user_addr_fault(struct pt_regs *regs,
>                 goto lock_mmap;
>
>         if (unlikely(access_error(error_code, vma))) {
> -               vma_end_read(vma);
> -               goto lock_mmap;
> +               bad_area_access_error(regs, error_code, address, NULL, vma);
> +               count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> +               return;
>         }
>         fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
>         if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> @@ -1394,7 +1399,7 @@ void do_user_addr_fault(struct pt_regs *regs,
>          * we can handle it..
>          */
>         if (unlikely(access_error(error_code, vma))) {
> -               bad_area_access_error(regs, error_code, address, vma);
> +               bad_area_access_error(regs, error_code, address, mm, vma);
>                 return;
>         }
>
> --
> 2.27.0
>
Kefeng Wang April 3, 2024, 7:58 a.m. UTC | #2
On 2024/4/3 13:59, Suren Baghdasaryan wrote:
> On Tue, Apr 2, 2024 at 12:53 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>>
>> The vm_flags of vma already checked under per-VMA lock, if it is a
>> bad access, directly handle error and return, there is no need to
>> lock_mm_and_find_vma() and check vm_flags again.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> 
> Looks safe to me.
> Using (mm != NULL) to indicate that we are holding mmap_lock is not
> ideal but I guess that works.
> 

Yes, I will add this part it into change too,

The access_error() of vma already checked under per-VMA lock, if it
is a bad access, directly handle error, no need to retry with mmap_lock
again. In order to release the correct lock, pass the mm_struct into
bad_area_access_error(), if mm is NULL, release vma lock, or release
mmap_lock. Since the page faut is handled under per-VMA lock, count it
as a vma lock event with VMA_LOCK_SUCCESS.

Thanks.


> Reviewed-by: Suren Baghdasaryan <surenb@google.com>
> 
>> ---
>>   arch/x86/mm/fault.c | 23 ++++++++++++++---------
>>   1 file changed, 14 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>> index a4cc20d0036d..67b18adc75dd 100644
>> --- a/arch/x86/mm/fault.c
>> +++ b/arch/x86/mm/fault.c
>> @@ -866,14 +866,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
>>
>>   static void
>>   __bad_area(struct pt_regs *regs, unsigned long error_code,
>> -          unsigned long address, u32 pkey, int si_code)
>> +          unsigned long address, struct mm_struct *mm,
>> +          struct vm_area_struct *vma, u32 pkey, int si_code)
>>   {
>> -       struct mm_struct *mm = current->mm;
>>          /*
>>           * Something tried to access memory that isn't in our memory map..
>>           * Fix it, but check if it's kernel or user first..
>>           */
>> -       mmap_read_unlock(mm);
>> +       if (mm)
>> +               mmap_read_unlock(mm);
>> +       else
>> +               vma_end_read(vma);
>>
>>          __bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
>>   }
>> @@ -897,7 +900,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
>>
>>   static noinline void
>>   bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
>> -                     unsigned long address, struct vm_area_struct *vma)
>> +                     unsigned long address, struct mm_struct *mm,
>> +                     struct vm_area_struct *vma)
>>   {
>>          /*
>>           * This OSPKE check is not strictly necessary at runtime.
>> @@ -927,9 +931,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
>>                   */
>>                  u32 pkey = vma_pkey(vma);
>>
>> -               __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
>> +               __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR);
>>          } else {
>> -               __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
>> +               __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR);
>>          }
>>   }
>>
>> @@ -1357,8 +1361,9 @@ void do_user_addr_fault(struct pt_regs *regs,
>>                  goto lock_mmap;
>>
>>          if (unlikely(access_error(error_code, vma))) {
>> -               vma_end_read(vma);
>> -               goto lock_mmap;
>> +               bad_area_access_error(regs, error_code, address, NULL, vma);
>> +               count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>> +               return;
>>          }
>>          fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
>>          if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>> @@ -1394,7 +1399,7 @@ void do_user_addr_fault(struct pt_regs *regs,
>>           * we can handle it..
>>           */
>>          if (unlikely(access_error(error_code, vma))) {
>> -               bad_area_access_error(regs, error_code, address, vma);
>> +               bad_area_access_error(regs, error_code, address, mm, vma);
>>                  return;
>>          }
>>
>> --
>> 2.27.0
>>
Suren Baghdasaryan April 3, 2024, 2:23 p.m. UTC | #3
On Wed, Apr 3, 2024 at 12:58 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
>
>
>
> On 2024/4/3 13:59, Suren Baghdasaryan wrote:
> > On Tue, Apr 2, 2024 at 12:53 AM Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> >>
> >> The vm_flags of vma already checked under per-VMA lock, if it is a
> >> bad access, directly handle error and return, there is no need to
> >> lock_mm_and_find_vma() and check vm_flags again.
> >>
> >> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> >
> > Looks safe to me.
> > Using (mm != NULL) to indicate that we are holding mmap_lock is not
> > ideal but I guess that works.
> >
>
> Yes, I will add this part it into change too,
>
> The access_error() of vma already checked under per-VMA lock, if it
> is a bad access, directly handle error, no need to retry with mmap_lock
> again. In order to release the correct lock, pass the mm_struct into
> bad_area_access_error(), if mm is NULL, release vma lock, or release
> mmap_lock. Since the page faut is handled under per-VMA lock, count it
> as a vma lock event with VMA_LOCK_SUCCESS.

The part about passing mm_struct is unnecessary IMHO. It explains "how
you do things" but changelog should describe only "what you do" and
"why you do that". The rest we can see from the code.

>
> Thanks.
>
>
> > Reviewed-by: Suren Baghdasaryan <surenb@google.com>
> >
> >> ---
> >>   arch/x86/mm/fault.c | 23 ++++++++++++++---------
> >>   1 file changed, 14 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> >> index a4cc20d0036d..67b18adc75dd 100644
> >> --- a/arch/x86/mm/fault.c
> >> +++ b/arch/x86/mm/fault.c
> >> @@ -866,14 +866,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
> >>
> >>   static void
> >>   __bad_area(struct pt_regs *regs, unsigned long error_code,
> >> -          unsigned long address, u32 pkey, int si_code)
> >> +          unsigned long address, struct mm_struct *mm,
> >> +          struct vm_area_struct *vma, u32 pkey, int si_code)
> >>   {
> >> -       struct mm_struct *mm = current->mm;
> >>          /*
> >>           * Something tried to access memory that isn't in our memory map..
> >>           * Fix it, but check if it's kernel or user first..
> >>           */
> >> -       mmap_read_unlock(mm);
> >> +       if (mm)
> >> +               mmap_read_unlock(mm);
> >> +       else
> >> +               vma_end_read(vma);
> >>
> >>          __bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
> >>   }
> >> @@ -897,7 +900,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
> >>
> >>   static noinline void
> >>   bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
> >> -                     unsigned long address, struct vm_area_struct *vma)
> >> +                     unsigned long address, struct mm_struct *mm,
> >> +                     struct vm_area_struct *vma)
> >>   {
> >>          /*
> >>           * This OSPKE check is not strictly necessary at runtime.
> >> @@ -927,9 +931,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
> >>                   */
> >>                  u32 pkey = vma_pkey(vma);
> >>
> >> -               __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
> >> +               __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR);
> >>          } else {
> >> -               __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
> >> +               __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR);
> >>          }
> >>   }
> >>
> >> @@ -1357,8 +1361,9 @@ void do_user_addr_fault(struct pt_regs *regs,
> >>                  goto lock_mmap;
> >>
> >>          if (unlikely(access_error(error_code, vma))) {
> >> -               vma_end_read(vma);
> >> -               goto lock_mmap;
> >> +               bad_area_access_error(regs, error_code, address, NULL, vma);
> >> +               count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> >> +               return;
> >>          }
> >>          fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
> >>          if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> >> @@ -1394,7 +1399,7 @@ void do_user_addr_fault(struct pt_regs *regs,
> >>           * we can handle it..
> >>           */
> >>          if (unlikely(access_error(error_code, vma))) {
> >> -               bad_area_access_error(regs, error_code, address, vma);
> >> +               bad_area_access_error(regs, error_code, address, mm, vma);
> >>                  return;
> >>          }
> >>
> >> --
> >> 2.27.0
> >>
diff mbox series

Patch

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a4cc20d0036d..67b18adc75dd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -866,14 +866,17 @@  bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 
 static void
 __bad_area(struct pt_regs *regs, unsigned long error_code,
-	   unsigned long address, u32 pkey, int si_code)
+	   unsigned long address, struct mm_struct *mm,
+	   struct vm_area_struct *vma, u32 pkey, int si_code)
 {
-	struct mm_struct *mm = current->mm;
 	/*
 	 * Something tried to access memory that isn't in our memory map..
 	 * Fix it, but check if it's kernel or user first..
 	 */
-	mmap_read_unlock(mm);
+	if (mm)
+		mmap_read_unlock(mm);
+	else
+		vma_end_read(vma);
 
 	__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
 }
@@ -897,7 +900,8 @@  static inline bool bad_area_access_from_pkeys(unsigned long error_code,
 
 static noinline void
 bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
-		      unsigned long address, struct vm_area_struct *vma)
+		      unsigned long address, struct mm_struct *mm,
+		      struct vm_area_struct *vma)
 {
 	/*
 	 * This OSPKE check is not strictly necessary at runtime.
@@ -927,9 +931,9 @@  bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
 		 */
 		u32 pkey = vma_pkey(vma);
 
-		__bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
+		__bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR);
 	} else {
-		__bad_area(regs, error_code, address, 0, SEGV_ACCERR);
+		__bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR);
 	}
 }
 
@@ -1357,8 +1361,9 @@  void do_user_addr_fault(struct pt_regs *regs,
 		goto lock_mmap;
 
 	if (unlikely(access_error(error_code, vma))) {
-		vma_end_read(vma);
-		goto lock_mmap;
+		bad_area_access_error(regs, error_code, address, NULL, vma);
+		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+		return;
 	}
 	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
 	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
@@ -1394,7 +1399,7 @@  void do_user_addr_fault(struct pt_regs *regs,
 	 * we can handle it..
 	 */
 	if (unlikely(access_error(error_code, vma))) {
-		bad_area_access_error(regs, error_code, address, vma);
+		bad_area_access_error(regs, error_code, address, mm, vma);
 		return;
 	}