Message ID | 20171201201048.8883-1-cascardo@canonical.com |
---|---|
State | New |
Headers | show |
Series | [CVE-2017-1000405,artful] mm, thp: Do not make page table dirty unconditionally in touch_p[mu]d() | expand |
On 01.12.2017 21:10, Thadeu Lima de Souza Cascardo wrote: > From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> > > Currently, we unconditionally make page table dirty in touch_pmd(). > It may result in false-positive can_follow_write_pmd(). > > We may avoid the situation, if we would only make the page table entry > dirty if caller asks for write access -- FOLL_WRITE. > > The patch also changes touch_pud() in the same way. > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> > Cc: Michal Hocko <mhocko@suse.com> > Cc: Hugh Dickins <hughd@google.com> > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> > (cherry picked from commit a8f97366452ed491d13cf1e44241bc0b5740b1f0) > CVE-2017-1000405 > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> Acked-by: Stefan Bader <stefan.bader@canonical.com> > --- > > Reproducer has been tested. It "exploits" without the patch. With the patch, it > fails. Cherry pick and tested. > > --- > mm/huge_memory.c | 36 +++++++++++++----------------------- > 1 file changed, 13 insertions(+), 23 deletions(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 90731e3b7e58..8b887db33383 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -842,20 +842,15 @@ EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); > #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ > > static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, > - pmd_t *pmd) > + pmd_t *pmd, int flags) > { > pmd_t _pmd; > > - /* > - * We should set the dirty bit only for FOLL_WRITE but for now > - * the dirty bit in the pmd is meaningless. And if the dirty > - * bit will become meaningful and we'll only set it with > - * FOLL_WRITE, an atomic set_bit will be required on the pmd to > - * set the young bit, instead of the current set_pmd_at. > - */ > - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); > + _pmd = pmd_mkyoung(*pmd); > + if (flags & FOLL_WRITE) > + _pmd = pmd_mkdirty(_pmd); > if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, > - pmd, _pmd, 1)) > + pmd, _pmd, flags & FOLL_WRITE)) > update_mmu_cache_pmd(vma, addr, pmd); > } > > @@ -884,7 +879,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, > return NULL; > > if (flags & FOLL_TOUCH) > - touch_pmd(vma, addr, pmd); > + touch_pmd(vma, addr, pmd, flags); > > /* > * device mapped pages can only be returned if the > @@ -973,20 +968,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, > > #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD > static void touch_pud(struct vm_area_struct *vma, unsigned long addr, > - pud_t *pud) > + pud_t *pud, int flags) > { > pud_t _pud; > > - /* > - * We should set the dirty bit only for FOLL_WRITE but for now > - * the dirty bit in the pud is meaningless. And if the dirty > - * bit will become meaningful and we'll only set it with > - * FOLL_WRITE, an atomic set_bit will be required on the pud to > - * set the young bit, instead of the current set_pud_at. > - */ > - _pud = pud_mkyoung(pud_mkdirty(*pud)); > + _pud = pud_mkyoung(*pud); > + if (flags & FOLL_WRITE) > + _pud = pud_mkdirty(_pud); > if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, > - pud, _pud, 1)) > + pud, _pud, flags & FOLL_WRITE)) > update_mmu_cache_pud(vma, addr, pud); > } > > @@ -1009,7 +999,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, > return NULL; > > if (flags & FOLL_TOUCH) > - touch_pud(vma, addr, pud); > + touch_pud(vma, addr, pud, flags); > > /* > * device mapped pages can only be returned if the > @@ -1371,7 +1361,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, > page = pmd_page(*pmd); > VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); > if (flags & FOLL_TOUCH) > - touch_pmd(vma, addr, pmd); > + touch_pmd(vma, addr, pmd, flags); > if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { > /* > * We don't mlock() pte-mapped THPs. This way we can avoid >
On 01/12/17 20:10, Thadeu Lima de Souza Cascardo wrote: > From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> > > Currently, we unconditionally make page table dirty in touch_pmd(). > It may result in false-positive can_follow_write_pmd(). > > We may avoid the situation, if we would only make the page table entry > dirty if caller asks for write access -- FOLL_WRITE. > > The patch also changes touch_pud() in the same way. > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> > Cc: Michal Hocko <mhocko@suse.com> > Cc: Hugh Dickins <hughd@google.com> > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> > (cherry picked from commit a8f97366452ed491d13cf1e44241bc0b5740b1f0) > CVE-2017-1000405 > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> > --- > > Reproducer has been tested. It "exploits" without the patch. With the patch, it > fails. > > --- > mm/huge_memory.c | 36 +++++++++++++----------------------- > 1 file changed, 13 insertions(+), 23 deletions(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 90731e3b7e58..8b887db33383 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -842,20 +842,15 @@ EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); > #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ > > static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, > - pmd_t *pmd) > + pmd_t *pmd, int flags) > { > pmd_t _pmd; > > - /* > - * We should set the dirty bit only for FOLL_WRITE but for now > - * the dirty bit in the pmd is meaningless. And if the dirty > - * bit will become meaningful and we'll only set it with > - * FOLL_WRITE, an atomic set_bit will be required on the pmd to > - * set the young bit, instead of the current set_pmd_at. > - */ > - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); > + _pmd = pmd_mkyoung(*pmd); > + if (flags & FOLL_WRITE) > + _pmd = pmd_mkdirty(_pmd); > if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, > - pmd, _pmd, 1)) > + pmd, _pmd, flags & FOLL_WRITE)) > update_mmu_cache_pmd(vma, addr, pmd); > } > > @@ -884,7 +879,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, > return NULL; > > if (flags & FOLL_TOUCH) > - touch_pmd(vma, addr, pmd); > + touch_pmd(vma, addr, pmd, flags); > > /* > * device mapped pages can only be returned if the > @@ -973,20 +968,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, > > #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD > static void touch_pud(struct vm_area_struct *vma, unsigned long addr, > - pud_t *pud) > + pud_t *pud, int flags) > { > pud_t _pud; > > - /* > - * We should set the dirty bit only for FOLL_WRITE but for now > - * the dirty bit in the pud is meaningless. And if the dirty > - * bit will become meaningful and we'll only set it with > - * FOLL_WRITE, an atomic set_bit will be required on the pud to > - * set the young bit, instead of the current set_pud_at. > - */ > - _pud = pud_mkyoung(pud_mkdirty(*pud)); > + _pud = pud_mkyoung(*pud); > + if (flags & FOLL_WRITE) > + _pud = pud_mkdirty(_pud); > if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, > - pud, _pud, 1)) > + pud, _pud, flags & FOLL_WRITE)) > update_mmu_cache_pud(vma, addr, pud); > } > > @@ -1009,7 +999,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, > return NULL; > > if (flags & FOLL_TOUCH) > - touch_pud(vma, addr, pud); > + touch_pud(vma, addr, pud, flags); > > /* > * device mapped pages can only be returned if the > @@ -1371,7 +1361,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, > page = pmd_page(*pmd); > VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); > if (flags & FOLL_TOUCH) > - touch_pmd(vma, addr, pmd); > + touch_pmd(vma, addr, pmd, flags); > if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { > /* > * We don't mlock() pte-mapped THPs. This way we can avoid > Clean upstream cherry pick and positive test results. Acked-by: Colin Ian King <colin.king@canonical.com>
Applied to artful master-next branch. Thanks. Cascardo. Applied-to: artful/master-next
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 90731e3b7e58..8b887db33383 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -842,20 +842,15 @@ EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd) + pmd_t *pmd, int flags) { pmd_t _pmd; - /* - * We should set the dirty bit only for FOLL_WRITE but for now - * the dirty bit in the pmd is meaningless. And if the dirty - * bit will become meaningful and we'll only set it with - * FOLL_WRITE, an atomic set_bit will be required on the pmd to - * set the young bit, instead of the current set_pmd_at. - */ - _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); + _pmd = pmd_mkyoung(*pmd); + if (flags & FOLL_WRITE) + _pmd = pmd_mkdirty(_pmd); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, - pmd, _pmd, 1)) + pmd, _pmd, flags & FOLL_WRITE)) update_mmu_cache_pmd(vma, addr, pmd); } @@ -884,7 +879,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, return NULL; if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); /* * device mapped pages can only be returned if the @@ -973,20 +968,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static void touch_pud(struct vm_area_struct *vma, unsigned long addr, - pud_t *pud) + pud_t *pud, int flags) { pud_t _pud; - /* - * We should set the dirty bit only for FOLL_WRITE but for now - * the dirty bit in the pud is meaningless. And if the dirty - * bit will become meaningful and we'll only set it with - * FOLL_WRITE, an atomic set_bit will be required on the pud to - * set the young bit, instead of the current set_pud_at. - */ - _pud = pud_mkyoung(pud_mkdirty(*pud)); + _pud = pud_mkyoung(*pud); + if (flags & FOLL_WRITE) + _pud = pud_mkdirty(_pud); if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, - pud, _pud, 1)) + pud, _pud, flags & FOLL_WRITE)) update_mmu_cache_pud(vma, addr, pud); } @@ -1009,7 +999,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, return NULL; if (flags & FOLL_TOUCH) - touch_pud(vma, addr, pud); + touch_pud(vma, addr, pud, flags); /* * device mapped pages can only be returned if the @@ -1371,7 +1361,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, page = pmd_page(*pmd); VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); if (flags & FOLL_TOUCH) - touch_pmd(vma, addr, pmd); + touch_pmd(vma, addr, pmd, flags); if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * We don't mlock() pte-mapped THPs. This way we can avoid