Message ID | 20170719044907.21703-2-benh@kernel.crashing.org (mailing list archive) |
---|---|
State | Accepted |
Commit | a46cc7a90fd8d95bfbb2b27080efe872a1a51db4 |
Headers | show |
Benjamin Herrenschmidt <benh@kernel.crashing.org> writes: > At the moment we have to rather sub-optimal flushing behaviours: > > - flush_tlb_mm() will flush the PWC which is unnecessary (for example > when doing a fork) > > - A large unmap will call flush_tlb_pwc() multiple times causing us > to perform that fairly expensive operation repeatedly. This happens > often in batches of 3 on every new process. > > So we change flush_tlb_mm() to only flush the TLB, and we use the > existing "need_flush_all" flag in struct mmu_gather to indicate > that the PWC needs flushing. > > Unfortunately, flush_tlb_range() still needs to do a full flush > for now as it's used by the THP collapsing. We will fix that later. > Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> > Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> > --- > .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 4 +- > arch/powerpc/mm/tlb-radix.c | 66 +++++++++------------- > 2 files changed, 28 insertions(+), 42 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > index cc7fbde4f53c..7196999cdc82 100644 > --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > @@ -22,22 +22,20 @@ extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end > > extern void radix__local_flush_tlb_mm(struct mm_struct *mm); > extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); > -extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); > extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, > int psize); > extern void radix__tlb_flush(struct mmu_gather *tlb); > #ifdef CONFIG_SMP > extern void radix__flush_tlb_mm(struct mm_struct *mm); > extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); > -extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); > extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, > int psize); > #else > #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) > #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) > #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) > -#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) > #endif > +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); > extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, > unsigned long page_size); > extern void radix__flush_tlb_lpid(unsigned long lpid); > diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c > index 2f2967a2db93..28f339cdd836 100644 > --- a/arch/powerpc/mm/tlb-radix.c > +++ b/arch/powerpc/mm/tlb-radix.c > @@ -68,17 +68,6 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) > asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); > } > > -static inline void tlbiel_pwc(unsigned long pid) > -{ > - asm volatile("ptesync": : :"memory"); > - > - /* For PWC flush, we don't look at set number */ > - __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); > - > - asm volatile("ptesync": : :"memory"); > - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); > -} > - > static inline void _tlbie_pid(unsigned long pid, unsigned long ric) > { > unsigned long rb,rs,prs,r; > @@ -149,31 +138,23 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) > preempt_disable(); > pid = mm->context.id; > if (pid != MMU_NO_CONTEXT) > - _tlbiel_pid(pid, RIC_FLUSH_ALL); > + _tlbiel_pid(pid, RIC_FLUSH_TLB); > preempt_enable(); > } > EXPORT_SYMBOL(radix__local_flush_tlb_mm); > > -void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) > +#ifndef CONFIG_SMP > +static void radix__local_flush_all_mm(struct mm_struct *mm) > { > unsigned long pid; > - struct mm_struct *mm = tlb->mm; > - /* > - * If we are doing a full mm flush, we will do a tlb flush > - * with RIC_FLUSH_ALL later. > - */ > - if (tlb->fullmm) > - return; > > preempt_disable(); > - > pid = mm->context.id; > if (pid != MMU_NO_CONTEXT) > - tlbiel_pwc(pid); > - > + _tlbiel_pid(pid, RIC_FLUSH_ALL); > preempt_enable(); > } > -EXPORT_SYMBOL(radix__local_flush_tlb_pwc); > +#endif /* CONFIG_SMP */ > > void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, > int psize) > @@ -211,38 +192,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm) > goto no_context; > > if (!mm_is_thread_local(mm)) > - _tlbie_pid(pid, RIC_FLUSH_ALL); > + _tlbie_pid(pid, RIC_FLUSH_TLB); > else > - _tlbiel_pid(pid, RIC_FLUSH_ALL); > + _tlbiel_pid(pid, RIC_FLUSH_TLB); > no_context: > preempt_enable(); > } > EXPORT_SYMBOL(radix__flush_tlb_mm); > > -void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) > +static void radix__flush_all_mm(struct mm_struct *mm) > { > unsigned long pid; > - struct mm_struct *mm = tlb->mm; > > - /* > - * If we are doing a full mm flush, we will do a tlb flush > - * with RIC_FLUSH_ALL later. > - */ > - if (tlb->fullmm) > - return; > preempt_disable(); > - > pid = mm->context.id; > if (unlikely(pid == MMU_NO_CONTEXT)) > goto no_context; > > if (!mm_is_thread_local(mm)) > - _tlbie_pid(pid, RIC_FLUSH_PWC); > + _tlbie_pid(pid, RIC_FLUSH_ALL); > else > - tlbiel_pwc(pid); > + _tlbiel_pid(pid, RIC_FLUSH_ALL); > no_context: > preempt_enable(); > } > + > +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) > +{ > + tlb->need_flush_all = 1; > +} > EXPORT_SYMBOL(radix__flush_tlb_pwc); > > void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, > @@ -274,6 +252,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) > } > EXPORT_SYMBOL(radix__flush_tlb_page); > > +#else /* CONFIG_SMP */ > +#define radix__flush_all_mm radix__local_flush_all_mm > #endif /* CONFIG_SMP */ > > void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) > @@ -291,7 +271,12 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > > { > struct mm_struct *mm = vma->vm_mm; > - radix__flush_tlb_mm(mm); > + > + /* > + * This is currently used when collapsing THPs so we need to > + * flush the PWC. We should fix this. > + */ > + radix__flush_all_mm(mm); > } > EXPORT_SYMBOL(radix__flush_tlb_range); > > @@ -322,7 +307,10 @@ void radix__tlb_flush(struct mmu_gather *tlb) > */ > if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all) > radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize); > - else > + else if (tlb->need_flush_all) { > + tlb->need_flush_all = 0; > + radix__flush_all_mm(mm); > + } else > radix__flush_tlb_mm(mm); > } > > -- > 2.13.3
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index cc7fbde4f53c..7196999cdc82 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -22,22 +22,20 @@ extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) -#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 2f2967a2db93..28f339cdd836 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -68,17 +68,6 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } -static inline void tlbiel_pwc(unsigned long pid) -{ - asm volatile("ptesync": : :"memory"); - - /* For PWC flush, we don't look at set number */ - __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); -} - static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -149,31 +138,23 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); -void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +#ifndef CONFIG_SMP +static void radix__local_flush_all_mm(struct mm_struct *mm) { unsigned long pid; - struct mm_struct *mm = tlb->mm; - /* - * If we are doing a full mm flush, we will do a tlb flush - * with RIC_FLUSH_ALL later. - */ - if (tlb->fullmm) - return; preempt_disable(); - pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - tlbiel_pwc(pid); - + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } -EXPORT_SYMBOL(radix__local_flush_tlb_pwc); +#endif /* CONFIG_SMP */ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) @@ -211,38 +192,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm) goto no_context; if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_ALL); + _tlbie_pid(pid, RIC_FLUSH_TLB); else - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbiel_pid(pid, RIC_FLUSH_TLB); no_context: preempt_enable(); } EXPORT_SYMBOL(radix__flush_tlb_mm); -void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +static void radix__flush_all_mm(struct mm_struct *mm) { unsigned long pid; - struct mm_struct *mm = tlb->mm; - /* - * If we are doing a full mm flush, we will do a tlb flush - * with RIC_FLUSH_ALL later. - */ - if (tlb->fullmm) - return; preempt_disable(); - pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_PWC); + _tlbie_pid(pid, RIC_FLUSH_ALL); else - tlbiel_pwc(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); } + +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + tlb->need_flush_all = 1; +} EXPORT_SYMBOL(radix__flush_tlb_pwc); void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, @@ -274,6 +252,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(radix__flush_tlb_page); +#else /* CONFIG_SMP */ +#define radix__flush_all_mm radix__local_flush_all_mm #endif /* CONFIG_SMP */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -291,7 +271,12 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm = vma->vm_mm; - radix__flush_tlb_mm(mm); + + /* + * This is currently used when collapsing THPs so we need to + * flush the PWC. We should fix this. + */ + radix__flush_all_mm(mm); } EXPORT_SYMBOL(radix__flush_tlb_range); @@ -322,7 +307,10 @@ void radix__tlb_flush(struct mmu_gather *tlb) */ if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all) radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize); - else + else if (tlb->need_flush_all) { + tlb->need_flush_all = 0; + radix__flush_all_mm(mm); + } else radix__flush_tlb_mm(mm); }
At the moment we have to rather sub-optimal flushing behaviours: - flush_tlb_mm() will flush the PWC which is unnecessary (for example when doing a fork) - A large unmap will call flush_tlb_pwc() multiple times causing us to perform that fairly expensive operation repeatedly. This happens often in batches of 3 on every new process. So we change flush_tlb_mm() to only flush the TLB, and we use the existing "need_flush_all" flag in struct mmu_gather to indicate that the PWC needs flushing. Unfortunately, flush_tlb_range() still needs to do a full flush for now as it's used by the THP collapsing. We will fix that later. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 4 +- arch/powerpc/mm/tlb-radix.c | 66 +++++++++------------- 2 files changed, 28 insertions(+), 42 deletions(-)