diff mbox series

[v3,10/14] mm: page_vma_mapped_walk: map_pte() use pte_offset_map_rw_nolock()

Message ID 20240904084022.32728-11-zhengqi.arch@bytedance.com (mailing list archive)
State Handled Elsewhere
Headers show
Series introduce pte_offset_map_{ro|rw}_nolock() | expand

Commit Message

Qi Zheng Sept. 4, 2024, 8:40 a.m. UTC
In the caller of map_pte(), we may modify the pvmw->pte after acquiring
the pvmw->ptl, so convert it to using pte_offset_map_rw_nolock(). At
this time, the pte_same() check is not performed after the pvmw->ptl held,
so we should get pmdval and do pmd_same() check to ensure the stability of
pvmw->pmd.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 mm/page_vma_mapped.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

Comments

Muchun Song Sept. 5, 2024, 12:07 p.m. UTC | #1
On 2024/9/4 16:40, Qi Zheng wrote:
> In the caller of map_pte(), we may modify the pvmw->pte after acquiring
> the pvmw->ptl, so convert it to using pte_offset_map_rw_nolock(). At
> this time, the pte_same() check is not performed after the pvmw->ptl held,
> so we should get pmdval and do pmd_same() check to ensure the stability of
> pvmw->pmd.
>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>   mm/page_vma_mapped.c | 24 ++++++++++++++++++++----
>   1 file changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index ae5cc42aa2087..f1d73fd448708 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -13,9 +13,11 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
>   	return false;
>   }
>   
> -static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
> +static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> +		    spinlock_t **ptlp)
>   {
>   	pte_t ptent;
> +	pmd_t pmdval;
>   
>   	if (pvmw->flags & PVMW_SYNC) {
>   		/* Use the stricter lookup */
> @@ -25,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
>   		return !!pvmw->pte;
>   	}
>   
> +again:
>   	/*
>   	 * It is important to return the ptl corresponding to pte,
>   	 * in case *pvmw->pmd changes underneath us; so we need to
> @@ -32,10 +35,11 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
>   	 * proceeds to loop over next ptes, and finds a match later.
>   	 * Though, in most cases, page lock already protects this.
>   	 */
> -	pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd,
> -					  pvmw->address, ptlp);
> +	pvmw->pte = pte_offset_map_rw_nolock(pvmw->vma->vm_mm, pvmw->pmd,
> +					     pvmw->address, &pmdval, ptlp);
>   	if (!pvmw->pte)
>   		return false;
> +	*pmdvalp = pmdval;
>   
>   	ptent = ptep_get(pvmw->pte);
>   
> @@ -69,6 +73,12 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
>   	}
>   	pvmw->ptl = *ptlp;
>   	spin_lock(pvmw->ptl);
> +
> +	if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pvmw->pmd)))) {
> +		spin_unlock(pvmw->ptl);

Forgot to clear pvmw->ptl? Or how about moving the assignment for it
to the place where the pmd_same check is successful?

> +		goto again;
> +	}
> +

Maybe here is the right place to assign pvmw->ptl.

Muchun,
Thanks.

>   	return true;
>   }
>   
> @@ -278,7 +288,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>   			step_forward(pvmw, PMD_SIZE);
>   			continue;
>   		}
> -		if (!map_pte(pvmw, &ptl)) {
> +		if (!map_pte(pvmw, &pmde, &ptl)) {
>   			if (!pvmw->pte)
>   				goto restart;
>   			goto next_pte;
> @@ -307,6 +317,12 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>   		if (!pvmw->ptl) {
>   			pvmw->ptl = ptl;
>   			spin_lock(pvmw->ptl);
> +			if (unlikely(!pmd_same(pmde, pmdp_get_lockless(pvmw->pmd)))) {
> +				pte_unmap_unlock(pvmw->pte, pvmw->ptl);
> +				pvmw->ptl = NULL;
> +				pvmw->pte = NULL;
> +				goto restart;
> +			}
>   		}
>   		goto this_pte;
>   	} while (pvmw->address < end);
Qi Zheng Sept. 12, 2024, 9:30 a.m. UTC | #2
On 2024/9/5 20:07, Muchun Song wrote:
> 
> 
> On 2024/9/4 16:40, Qi Zheng wrote:
>> In the caller of map_pte(), we may modify the pvmw->pte after acquiring
>> the pvmw->ptl, so convert it to using pte_offset_map_rw_nolock(). At
>> this time, the pte_same() check is not performed after the pvmw->ptl 
>> held,
>> so we should get pmdval and do pmd_same() check to ensure the 
>> stability of
>> pvmw->pmd.
>>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   mm/page_vma_mapped.c | 24 ++++++++++++++++++++----
>>   1 file changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
>> index ae5cc42aa2087..f1d73fd448708 100644
>> --- a/mm/page_vma_mapped.c
>> +++ b/mm/page_vma_mapped.c
>> @@ -13,9 +13,11 @@ static inline bool not_found(struct 
>> page_vma_mapped_walk *pvmw)
>>       return false;
>>   }
>> -static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t 
>> **ptlp)
>> +static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>> +            spinlock_t **ptlp)
>>   {
>>       pte_t ptent;
>> +    pmd_t pmdval;
>>       if (pvmw->flags & PVMW_SYNC) {
>>           /* Use the stricter lookup */
>> @@ -25,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk 
>> *pvmw, spinlock_t **ptlp)
>>           return !!pvmw->pte;
>>       }
>> +again:
>>       /*
>>        * It is important to return the ptl corresponding to pte,
>>        * in case *pvmw->pmd changes underneath us; so we need to
>> @@ -32,10 +35,11 @@ static bool map_pte(struct page_vma_mapped_walk 
>> *pvmw, spinlock_t **ptlp)
>>        * proceeds to loop over next ptes, and finds a match later.
>>        * Though, in most cases, page lock already protects this.
>>        */
>> -    pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd,
>> -                      pvmw->address, ptlp);
>> +    pvmw->pte = pte_offset_map_rw_nolock(pvmw->vma->vm_mm, pvmw->pmd,
>> +                         pvmw->address, &pmdval, ptlp);
>>       if (!pvmw->pte)
>>           return false;
>> +    *pmdvalp = pmdval;
>>       ptent = ptep_get(pvmw->pte);
>> @@ -69,6 +73,12 @@ static bool map_pte(struct page_vma_mapped_walk 
>> *pvmw, spinlock_t **ptlp)
>>       }
>>       pvmw->ptl = *ptlp;
>>       spin_lock(pvmw->ptl);
>> +
>> +    if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pvmw->pmd)))) {
>> +        spin_unlock(pvmw->ptl);
> 
> Forgot to clear pvmw->ptl? Or how about moving the assignment for it
> to the place where the pmd_same check is successful?
> 
>> +        goto again;
>> +    }
>> +
> 
> Maybe here is the right place to assign pvmw->ptl.

Right, will do in the v4.

> 
> Muchun,
> Thanks.
> 
>>       return true;
>>   }
>> @@ -278,7 +288,7 @@ bool page_vma_mapped_walk(struct 
>> page_vma_mapped_walk *pvmw)
>>               step_forward(pvmw, PMD_SIZE);
>>               continue;
>>           }
>> -        if (!map_pte(pvmw, &ptl)) {
>> +        if (!map_pte(pvmw, &pmde, &ptl)) {
>>               if (!pvmw->pte)
>>                   goto restart;
>>               goto next_pte;
>> @@ -307,6 +317,12 @@ bool page_vma_mapped_walk(struct 
>> page_vma_mapped_walk *pvmw)
>>           if (!pvmw->ptl) {
>>               pvmw->ptl = ptl;
>>               spin_lock(pvmw->ptl);
>> +            if (unlikely(!pmd_same(pmde, 
>> pmdp_get_lockless(pvmw->pmd)))) {
>> +                pte_unmap_unlock(pvmw->pte, pvmw->ptl);
>> +                pvmw->ptl = NULL;
>> +                pvmw->pte = NULL;
>> +                goto restart;
>> +            }
>>           }
>>           goto this_pte;
>>       } while (pvmw->address < end);
>
diff mbox series

Patch

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index ae5cc42aa2087..f1d73fd448708 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -13,9 +13,11 @@  static inline bool not_found(struct page_vma_mapped_walk *pvmw)
 	return false;
 }
 
-static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
+static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
+		    spinlock_t **ptlp)
 {
 	pte_t ptent;
+	pmd_t pmdval;
 
 	if (pvmw->flags & PVMW_SYNC) {
 		/* Use the stricter lookup */
@@ -25,6 +27,7 @@  static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
 		return !!pvmw->pte;
 	}
 
+again:
 	/*
 	 * It is important to return the ptl corresponding to pte,
 	 * in case *pvmw->pmd changes underneath us; so we need to
@@ -32,10 +35,11 @@  static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
 	 * proceeds to loop over next ptes, and finds a match later.
 	 * Though, in most cases, page lock already protects this.
 	 */
-	pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd,
-					  pvmw->address, ptlp);
+	pvmw->pte = pte_offset_map_rw_nolock(pvmw->vma->vm_mm, pvmw->pmd,
+					     pvmw->address, &pmdval, ptlp);
 	if (!pvmw->pte)
 		return false;
+	*pmdvalp = pmdval;
 
 	ptent = ptep_get(pvmw->pte);
 
@@ -69,6 +73,12 @@  static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
 	}
 	pvmw->ptl = *ptlp;
 	spin_lock(pvmw->ptl);
+
+	if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pvmw->pmd)))) {
+		spin_unlock(pvmw->ptl);
+		goto again;
+	}
+
 	return true;
 }
 
@@ -278,7 +288,7 @@  bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 			step_forward(pvmw, PMD_SIZE);
 			continue;
 		}
-		if (!map_pte(pvmw, &ptl)) {
+		if (!map_pte(pvmw, &pmde, &ptl)) {
 			if (!pvmw->pte)
 				goto restart;
 			goto next_pte;
@@ -307,6 +317,12 @@  bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 		if (!pvmw->ptl) {
 			pvmw->ptl = ptl;
 			spin_lock(pvmw->ptl);
+			if (unlikely(!pmd_same(pmde, pmdp_get_lockless(pvmw->pmd)))) {
+				pte_unmap_unlock(pvmw->pte, pvmw->ptl);
+				pvmw->ptl = NULL;
+				pvmw->pte = NULL;
+				goto restart;
+			}
 		}
 		goto this_pte;
 	} while (pvmw->address < end);