diff mbox series

[v3,09/14] mm: mremap: move_ptes() use pte_offset_map_rw_nolock()

Message ID 20240904084022.32728-10-zhengqi.arch@bytedance.com (mailing list archive)
State Handled Elsewhere
Headers show
Series introduce pte_offset_map_{ro|rw}_nolock() | expand

Commit Message

Qi Zheng Sept. 4, 2024, 8:40 a.m. UTC
In move_ptes(), we may modify the new_pte after acquiring the new_ptl, so
convert it to using pte_offset_map_rw_nolock(). Since we may free the PTE
page in retract_page_tables() without holding the read lock of mmap_lock,
so we still need to do a pmd_same() check after holding the PTL.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 mm/mremap.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

Comments

Muchun Song Sept. 5, 2024, 9:25 a.m. UTC | #1
On 2024/9/4 16:40, Qi Zheng wrote:
> In move_ptes(), we may modify the new_pte after acquiring the new_ptl, so
> convert it to using pte_offset_map_rw_nolock(). Since we may free the PTE
> page in retract_page_tables() without holding the read lock of mmap_lock,
> so we still need to do a pmd_same() check after holding the PTL.

retract_page_tables() and move_ptes() are synchronized with
i_mmap_lock, right?

Muchun,
Thanks.

>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>   mm/mremap.c | 20 ++++++++++++++++++--
>   1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 24712f8dbb6b5..16e54151395ad 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -143,6 +143,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   	spinlock_t *old_ptl, *new_ptl;
>   	bool force_flush = false;
>   	unsigned long len = old_end - old_addr;
> +	pmd_t pmdval;
>   	int err = 0;
>   
>   	/*
> @@ -175,14 +176,29 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>   		err = -EAGAIN;
>   		goto out;
>   	}
> -	new_pte = pte_offset_map_nolock(mm, new_pmd, new_addr, &new_ptl);
> +	/*
> +	 * Since we may free the PTE page in retract_page_tables() without
> +	 * holding the read lock of mmap_lock, so we still need to do a
> +	 * pmd_same() check after holding the PTL.
> +	 */
> +	new_pte = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &pmdval,
> +					   &new_ptl);
>   	if (!new_pte) {
>   		pte_unmap_unlock(old_pte, old_ptl);
>   		err = -EAGAIN;
>   		goto out;
>   	}
> -	if (new_ptl != old_ptl)
> +	if (new_ptl != old_ptl) {
>   		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
> +
> +		if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(new_pmd)))) {
> +			pte_unmap_unlock(new_pte, new_ptl);
> +			pte_unmap_unlock(old_pte, old_ptl);
> +			err = -EAGAIN;
> +			goto out;
> +		}
> +	}
> +
>   	flush_tlb_batched_pending(vma->vm_mm);
>   	arch_enter_lazy_mmu_mode();
>
Qi Zheng Sept. 5, 2024, 10:56 a.m. UTC | #2
On 2024/9/5 17:25, Muchun Song wrote:
> 
> 
> On 2024/9/4 16:40, Qi Zheng wrote:
>> In move_ptes(), we may modify the new_pte after acquiring the new_ptl, so
>> convert it to using pte_offset_map_rw_nolock(). Since we may free the PTE
>> page in retract_page_tables() without holding the read lock of mmap_lock,
>> so we still need to do a pmd_same() check after holding the PTL.
> 
> retract_page_tables() and move_ptes() are synchronized with
> i_mmap_lock, right?

Right, will remove the pmd_same() check in v4. Thanks!

> 
> Muchun,
> Thanks.
> 
>>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   mm/mremap.c | 20 ++++++++++++++++++--
>>   1 file changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/mm/mremap.c b/mm/mremap.c
>> index 24712f8dbb6b5..16e54151395ad 100644
>> --- a/mm/mremap.c
>> +++ b/mm/mremap.c
>> @@ -143,6 +143,7 @@ static int move_ptes(struct vm_area_struct *vma, 
>> pmd_t *old_pmd,
>>       spinlock_t *old_ptl, *new_ptl;
>>       bool force_flush = false;
>>       unsigned long len = old_end - old_addr;
>> +    pmd_t pmdval;
>>       int err = 0;
>>       /*
>> @@ -175,14 +176,29 @@ static int move_ptes(struct vm_area_struct *vma, 
>> pmd_t *old_pmd,
>>           err = -EAGAIN;
>>           goto out;
>>       }
>> -    new_pte = pte_offset_map_nolock(mm, new_pmd, new_addr, &new_ptl);
>> +    /*
>> +     * Since we may free the PTE page in retract_page_tables() without
>> +     * holding the read lock of mmap_lock, so we still need to do a
>> +     * pmd_same() check after holding the PTL.
>> +     */
>> +    new_pte = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &pmdval,
>> +                       &new_ptl);
>>       if (!new_pte) {
>>           pte_unmap_unlock(old_pte, old_ptl);
>>           err = -EAGAIN;
>>           goto out;
>>       }
>> -    if (new_ptl != old_ptl)
>> +    if (new_ptl != old_ptl) {
>>           spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>> +
>> +        if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(new_pmd)))) {
>> +            pte_unmap_unlock(new_pte, new_ptl);
>> +            pte_unmap_unlock(old_pte, old_ptl);
>> +            err = -EAGAIN;
>> +            goto out;
>> +        }
>> +    }
>> +
>>       flush_tlb_batched_pending(vma->vm_mm);
>>       arch_enter_lazy_mmu_mode();
>
diff mbox series

Patch

diff --git a/mm/mremap.c b/mm/mremap.c
index 24712f8dbb6b5..16e54151395ad 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -143,6 +143,7 @@  static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	spinlock_t *old_ptl, *new_ptl;
 	bool force_flush = false;
 	unsigned long len = old_end - old_addr;
+	pmd_t pmdval;
 	int err = 0;
 
 	/*
@@ -175,14 +176,29 @@  static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		err = -EAGAIN;
 		goto out;
 	}
-	new_pte = pte_offset_map_nolock(mm, new_pmd, new_addr, &new_ptl);
+	/*
+	 * Since we may free the PTE page in retract_page_tables() without
+	 * holding the read lock of mmap_lock, so we still need to do a
+	 * pmd_same() check after holding the PTL.
+	 */
+	new_pte = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &pmdval,
+					   &new_ptl);
 	if (!new_pte) {
 		pte_unmap_unlock(old_pte, old_ptl);
 		err = -EAGAIN;
 		goto out;
 	}
-	if (new_ptl != old_ptl)
+	if (new_ptl != old_ptl) {
 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+		if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(new_pmd)))) {
+			pte_unmap_unlock(new_pte, new_ptl);
+			pte_unmap_unlock(old_pte, old_ptl);
+			err = -EAGAIN;
+			goto out;
+		}
+	}
+
 	flush_tlb_batched_pending(vma->vm_mm);
 	arch_enter_lazy_mmu_mode();