Message ID | 20240904084022.32728-11-zhengqi.arch@bytedance.com (mailing list archive) |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | introduce pte_offset_map_{ro|rw}_nolock() | expand |
On 2024/9/4 16:40, Qi Zheng wrote: > In the caller of map_pte(), we may modify the pvmw->pte after acquiring > the pvmw->ptl, so convert it to using pte_offset_map_rw_nolock(). At > this time, the pte_same() check is not performed after the pvmw->ptl held, > so we should get pmdval and do pmd_same() check to ensure the stability of > pvmw->pmd. > > Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> > --- > mm/page_vma_mapped.c | 24 ++++++++++++++++++++---- > 1 file changed, 20 insertions(+), 4 deletions(-) > > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c > index ae5cc42aa2087..f1d73fd448708 100644 > --- a/mm/page_vma_mapped.c > +++ b/mm/page_vma_mapped.c > @@ -13,9 +13,11 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw) > return false; > } > > -static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) > +static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, > + spinlock_t **ptlp) > { > pte_t ptent; > + pmd_t pmdval; > > if (pvmw->flags & PVMW_SYNC) { > /* Use the stricter lookup */ > @@ -25,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) > return !!pvmw->pte; > } > > +again: > /* > * It is important to return the ptl corresponding to pte, > * in case *pvmw->pmd changes underneath us; so we need to > @@ -32,10 +35,11 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) > * proceeds to loop over next ptes, and finds a match later. > * Though, in most cases, page lock already protects this. > */ > - pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd, > - pvmw->address, ptlp); > + pvmw->pte = pte_offset_map_rw_nolock(pvmw->vma->vm_mm, pvmw->pmd, > + pvmw->address, &pmdval, ptlp); > if (!pvmw->pte) > return false; > + *pmdvalp = pmdval; > > ptent = ptep_get(pvmw->pte); > > @@ -69,6 +73,12 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) > } > pvmw->ptl = *ptlp; > spin_lock(pvmw->ptl); > + > + if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pvmw->pmd)))) { > + spin_unlock(pvmw->ptl); Forgot to clear pvmw->ptl? Or how about moving the assignment for it to the place where the pmd_same check is successful? > + goto again; > + } > + Maybe here is the right place to assign pvmw->ptl. Muchun, Thanks. > return true; > } > > @@ -278,7 +288,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) > step_forward(pvmw, PMD_SIZE); > continue; > } > - if (!map_pte(pvmw, &ptl)) { > + if (!map_pte(pvmw, &pmde, &ptl)) { > if (!pvmw->pte) > goto restart; > goto next_pte; > @@ -307,6 +317,12 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) > if (!pvmw->ptl) { > pvmw->ptl = ptl; > spin_lock(pvmw->ptl); > + if (unlikely(!pmd_same(pmde, pmdp_get_lockless(pvmw->pmd)))) { > + pte_unmap_unlock(pvmw->pte, pvmw->ptl); > + pvmw->ptl = NULL; > + pvmw->pte = NULL; > + goto restart; > + } > } > goto this_pte; > } while (pvmw->address < end);
On 2024/9/5 20:07, Muchun Song wrote: > > > On 2024/9/4 16:40, Qi Zheng wrote: >> In the caller of map_pte(), we may modify the pvmw->pte after acquiring >> the pvmw->ptl, so convert it to using pte_offset_map_rw_nolock(). At >> this time, the pte_same() check is not performed after the pvmw->ptl >> held, >> so we should get pmdval and do pmd_same() check to ensure the >> stability of >> pvmw->pmd. >> >> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> >> --- >> mm/page_vma_mapped.c | 24 ++++++++++++++++++++---- >> 1 file changed, 20 insertions(+), 4 deletions(-) >> >> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c >> index ae5cc42aa2087..f1d73fd448708 100644 >> --- a/mm/page_vma_mapped.c >> +++ b/mm/page_vma_mapped.c >> @@ -13,9 +13,11 @@ static inline bool not_found(struct >> page_vma_mapped_walk *pvmw) >> return false; >> } >> -static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t >> **ptlp) >> +static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, >> + spinlock_t **ptlp) >> { >> pte_t ptent; >> + pmd_t pmdval; >> if (pvmw->flags & PVMW_SYNC) { >> /* Use the stricter lookup */ >> @@ -25,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk >> *pvmw, spinlock_t **ptlp) >> return !!pvmw->pte; >> } >> +again: >> /* >> * It is important to return the ptl corresponding to pte, >> * in case *pvmw->pmd changes underneath us; so we need to >> @@ -32,10 +35,11 @@ static bool map_pte(struct page_vma_mapped_walk >> *pvmw, spinlock_t **ptlp) >> * proceeds to loop over next ptes, and finds a match later. >> * Though, in most cases, page lock already protects this. >> */ >> - pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd, >> - pvmw->address, ptlp); >> + pvmw->pte = pte_offset_map_rw_nolock(pvmw->vma->vm_mm, pvmw->pmd, >> + pvmw->address, &pmdval, ptlp); >> if (!pvmw->pte) >> return false; >> + *pmdvalp = pmdval; >> ptent = ptep_get(pvmw->pte); >> @@ -69,6 +73,12 @@ static bool map_pte(struct page_vma_mapped_walk >> *pvmw, spinlock_t **ptlp) >> } >> pvmw->ptl = *ptlp; >> spin_lock(pvmw->ptl); >> + >> + if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pvmw->pmd)))) { >> + spin_unlock(pvmw->ptl); > > Forgot to clear pvmw->ptl? Or how about moving the assignment for it > to the place where the pmd_same check is successful? > >> + goto again; >> + } >> + > > Maybe here is the right place to assign pvmw->ptl. Right, will do in the v4. > > Muchun, > Thanks. > >> return true; >> } >> @@ -278,7 +288,7 @@ bool page_vma_mapped_walk(struct >> page_vma_mapped_walk *pvmw) >> step_forward(pvmw, PMD_SIZE); >> continue; >> } >> - if (!map_pte(pvmw, &ptl)) { >> + if (!map_pte(pvmw, &pmde, &ptl)) { >> if (!pvmw->pte) >> goto restart; >> goto next_pte; >> @@ -307,6 +317,12 @@ bool page_vma_mapped_walk(struct >> page_vma_mapped_walk *pvmw) >> if (!pvmw->ptl) { >> pvmw->ptl = ptl; >> spin_lock(pvmw->ptl); >> + if (unlikely(!pmd_same(pmde, >> pmdp_get_lockless(pvmw->pmd)))) { >> + pte_unmap_unlock(pvmw->pte, pvmw->ptl); >> + pvmw->ptl = NULL; >> + pvmw->pte = NULL; >> + goto restart; >> + } >> } >> goto this_pte; >> } while (pvmw->address < end); >
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index ae5cc42aa2087..f1d73fd448708 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -13,9 +13,11 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw) return false; } -static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) +static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, + spinlock_t **ptlp) { pte_t ptent; + pmd_t pmdval; if (pvmw->flags & PVMW_SYNC) { /* Use the stricter lookup */ @@ -25,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) return !!pvmw->pte; } +again: /* * It is important to return the ptl corresponding to pte, * in case *pvmw->pmd changes underneath us; so we need to @@ -32,10 +35,11 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) * proceeds to loop over next ptes, and finds a match later. * Though, in most cases, page lock already protects this. */ - pvmw->pte = pte_offset_map_nolock(pvmw->vma->vm_mm, pvmw->pmd, - pvmw->address, ptlp); + pvmw->pte = pte_offset_map_rw_nolock(pvmw->vma->vm_mm, pvmw->pmd, + pvmw->address, &pmdval, ptlp); if (!pvmw->pte) return false; + *pmdvalp = pmdval; ptent = ptep_get(pvmw->pte); @@ -69,6 +73,12 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp) } pvmw->ptl = *ptlp; spin_lock(pvmw->ptl); + + if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pvmw->pmd)))) { + spin_unlock(pvmw->ptl); + goto again; + } + return true; } @@ -278,7 +288,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) step_forward(pvmw, PMD_SIZE); continue; } - if (!map_pte(pvmw, &ptl)) { + if (!map_pte(pvmw, &pmde, &ptl)) { if (!pvmw->pte) goto restart; goto next_pte; @@ -307,6 +317,12 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (!pvmw->ptl) { pvmw->ptl = ptl; spin_lock(pvmw->ptl); + if (unlikely(!pmd_same(pmde, pmdp_get_lockless(pvmw->pmd)))) { + pte_unmap_unlock(pvmw->pte, pvmw->ptl); + pvmw->ptl = NULL; + pvmw->pte = NULL; + goto restart; + } } goto this_pte; } while (pvmw->address < end);
In the caller of map_pte(), we may modify the pvmw->pte after acquiring the pvmw->ptl, so convert it to using pte_offset_map_rw_nolock(). At this time, the pte_same() check is not performed after the pvmw->ptl held, so we should get pmdval and do pmd_same() check to ensure the stability of pvmw->pmd. Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> --- mm/page_vma_mapped.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-)