Message ID | 1228960541-11407-1-git-send-email-yanok@emcraft.com (mailing list archive) |
---|---|
State | Accepted, archived |
Commit | ca9153a3a2a7556d091dfe080e42b0e67881fff6 |
Delegated to: | Benjamin Herrenschmidt |
Headers | show |
On Thu, Dec 11, 2008 at 04:55:41AM +0300, Ilya Yanok wrote: >This patch adds support for page sizes bigger than 4K (16K/64K) on >PPC 44x. >PGDIR table is much smaller than page in case of 16K/64K pages (512 >and 32 bytes resp.) so we allocate PGDIR with kzalloc() instead of >__get_free_pages(). >PTE table covers rather big memory area in case of 16K/64K pages >(32MB and 512MB resp.) so we can easily put FIXMAP and PKMAP in >area covered by one PTE table. > >Signed-off-by: Yuri Tikhonov <yur@emcraft.com> >Signed-off-by: Vladimir Panfilov <pvr@emcraft.com> >Signed-off-by: Ilya Yanok <yanok@emcraft.com> I tested this a bit today on a Bamboo board. Overall, it functioned well enough to not crash :). Note that I also included Hollis' memory size alignment patch which is required. The code looks pretty clean now. I think if we're going to include this patch it should go in now. Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com> >--- > arch/powerpc/Kconfig | 58 ++++++++++++++++++++++++-------- > arch/powerpc/include/asm/highmem.h | 19 +++++++++- > arch/powerpc/include/asm/mmu-44x.h | 17 +++++++++ > arch/powerpc/include/asm/page.h | 13 ++++--- > arch/powerpc/include/asm/page_32.h | 7 +++- > arch/powerpc/kernel/asm-offsets.c | 4 ++ > arch/powerpc/kernel/head_44x.S | 23 ++++++++----- > arch/powerpc/kernel/misc_32.S | 12 +++--- > arch/powerpc/mm/pgtable_32.c | 23 ++++++++----- > arch/powerpc/platforms/Kconfig.cputype | 2 +- > 10 files changed, 130 insertions(+), 48 deletions(-) > >diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig >index 525c13a..cd8ff7c 100644 >--- a/arch/powerpc/Kconfig >+++ b/arch/powerpc/Kconfig >@@ -401,23 +401,53 @@ config PPC_HAS_HASH_64K > depends on PPC64 > default n > >-config PPC_64K_PAGES >- bool "64k page size" >- depends on PPC64 >- select PPC_HAS_HASH_64K >+choice >+ prompt "Page size" >+ default PPC_4K_PAGES > help >- This option changes the kernel logical page size to 64k. On machines >- without processor support for 64k pages, the kernel will simulate >- them by loading each individual 4k page on demand transparently, >- while on hardware with such support, it will be used to map >- normal application pages. >+ Select the kernel logical page size. Increasing the page size >+ will reduce software overhead at each page boundary, allow >+ hardware prefetch mechanisms to be more effective, and allow >+ larger dma transfers increasing IO efficiency and reducing >+ overhead. However the utilization of memory will increase. >+ For example, each cached file will using a multiple of the >+ page size to hold its contents and the difference between the >+ end of file and the end of page is wasted. >+ >+ Some dedicated systems, such as software raid serving with >+ accelerated calculations, have shown significant increases. >+ >+ If you configure a 64 bit kernel for 64k pages but the >+ processor does not support them, then the kernel will simulate >+ them with 4k pages, loading them on demand, but with the >+ reduced software overhead and larger internal fragmentation. >+ For the 32 bit kernel, a large page option will not be offered >+ unless it is supported by the configured processor. >+ >+ If unsure, choose 4K_PAGES. >+ >+config PPC_4K_PAGES >+ bool "4k page size" >+ >+config PPC_16K_PAGES >+ bool "16k page size" if 44x >+ >+config PPC_64K_PAGES >+ bool "64k page size" if 44x || PPC_STD_MMU_64 >+ select PPC_HAS_HASH_64K if PPC_STD_MMU_64 >+ >+endchoice > > config FORCE_MAX_ZONEORDER > int "Maximum zone order" >- range 9 64 if PPC_64K_PAGES >- default "9" if PPC_64K_PAGES >- range 13 64 if PPC64 && !PPC_64K_PAGES >- default "13" if PPC64 && !PPC_64K_PAGES >+ range 9 64 if PPC_STD_MMU_64 && PPC_64K_PAGES >+ default "9" if PPC_STD_MMU_64 && PPC_64K_PAGES >+ range 13 64 if PPC_STD_MMU_64 && !PPC_64K_PAGES >+ default "13" if PPC_STD_MMU_64 && !PPC_64K_PAGES >+ range 9 64 if PPC_STD_MMU_32 && PPC_16K_PAGES >+ default "9" if PPC_STD_MMU_32 && PPC_16K_PAGES >+ range 7 64 if PPC_STD_MMU_32 && PPC_64K_PAGES >+ default "7" if PPC_STD_MMU_32 && PPC_64K_PAGES > range 11 64 > default "11" > help >@@ -437,7 +467,7 @@ config FORCE_MAX_ZONEORDER > > config PPC_SUBPAGE_PROT > bool "Support setting protections for 4k subpages" >- depends on PPC_64K_PAGES >+ depends on PPC_STD_MMU_64 && PPC_64K_PAGES > help > This option adds support for a system call to allow user programs > to set access permissions (read/write, readonly, or no access) >diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h >index 91c5895..7d6bb37 100644 >--- a/arch/powerpc/include/asm/highmem.h >+++ b/arch/powerpc/include/asm/highmem.h >@@ -38,9 +38,24 @@ extern pte_t *pkmap_page_table; > * easily, subsequent pte tables have to be allocated in one physical > * chunk of RAM. > */ >-#define LAST_PKMAP (1 << PTE_SHIFT) >-#define LAST_PKMAP_MASK (LAST_PKMAP-1) >+/* >+ * We use one full pte table with 4K pages. And with 16K/64K pages pte >+ * table covers enough memory (32MB and 512MB resp.) that both FIXMAP >+ * and PKMAP can be placed in single pte table. We use 1024 pages for >+ * PKMAP in case of 16K/64K pages. >+ */ >+#ifdef CONFIG_PPC_4K_PAGES >+#define PKMAP_ORDER PTE_SHIFT >+#else >+#define PKMAP_ORDER 10 >+#endif >+#define LAST_PKMAP (1 << PKMAP_ORDER) >+#ifndef CONFIG_PPC_4K_PAGES >+#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) >+#else > #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK) >+#endif >+#define LAST_PKMAP_MASK (LAST_PKMAP-1) > #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) > #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) > >diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h >index a825524..73e1909 100644 >--- a/arch/powerpc/include/asm/mmu-44x.h >+++ b/arch/powerpc/include/asm/mmu-44x.h >@@ -4,6 +4,8 @@ > * PPC440 support > */ > >+#include <asm/page.h> >+ > #define PPC44x_MMUCR_TID 0x000000ff > #define PPC44x_MMUCR_STS 0x00010000 > >@@ -73,4 +75,19 @@ typedef struct { > /* Size of the TLBs used for pinning in lowmem */ > #define PPC_PIN_SIZE (1 << 28) /* 256M */ > >+#if (PAGE_SHIFT == 12) >+#define PPC44x_TLBE_SIZE PPC44x_TLB_4K >+#elif (PAGE_SHIFT == 14) >+#define PPC44x_TLBE_SIZE PPC44x_TLB_16K >+#elif (PAGE_SHIFT == 16) >+#define PPC44x_TLBE_SIZE PPC44x_TLB_64K >+#else >+#error "Unsupported PAGE_SIZE" >+#endif >+ >+#define PPC44x_PGD_OFF_SHIFT (32 - PGDIR_SHIFT + PGD_T_LOG2) >+#define PPC44x_PGD_OFF_MASK_BIT (PGDIR_SHIFT - PGD_T_LOG2) >+#define PPC44x_PTE_ADD_SHIFT (32 - PGDIR_SHIFT + PTE_SHIFT + PTE_T_LOG2) >+#define PPC44x_PTE_ADD_MASK_BIT (32 - PTE_T_LOG2 - PTE_SHIFT) >+ > #endif /* _ASM_POWERPC_MMU_44X_H_ */ >diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h >index c0b8d4a..197d569 100644 >--- a/arch/powerpc/include/asm/page.h >+++ b/arch/powerpc/include/asm/page.h >@@ -19,12 +19,15 @@ > #include <asm/kdump.h> > > /* >- * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software >+ * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages >+ * on PPC44x). For PPC64 we support either 4K or 64K software > * page size. When using 64K pages however, whether we are really supporting > * 64K pages in HW or not is irrelevant to those definitions. > */ >-#ifdef CONFIG_PPC_64K_PAGES >+#if defined(CONFIG_PPC_64K_PAGES) > #define PAGE_SHIFT 16 >+#elif defined(CONFIG_PPC_16K_PAGES) >+#define PAGE_SHIFT 14 > #else > #define PAGE_SHIFT 12 > #endif >@@ -151,7 +154,7 @@ typedef struct { pte_basic_t pte; } pte_t; > /* 64k pages additionally define a bigger "real PTE" type that gathers > * the "second half" part of the PTE for pseudo 64k pages > */ >-#ifdef CONFIG_PPC_64K_PAGES >+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) > typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; > #else > typedef struct { pte_t pte; } real_pte_t; >@@ -191,10 +194,10 @@ typedef pte_basic_t pte_t; > #define pte_val(x) (x) > #define __pte(x) (x) > >-#ifdef CONFIG_PPC_64K_PAGES >+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) > typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; > #else >-typedef unsigned long real_pte_t; >+typedef pte_t real_pte_t; > #endif > > >diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h >index d77072a..1458d95 100644 >--- a/arch/powerpc/include/asm/page_32.h >+++ b/arch/powerpc/include/asm/page_32.h >@@ -19,6 +19,8 @@ > #define PTE_FLAGS_OFFSET 0 > #endif > >+#define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2) /* full page */ >+ > #ifndef __ASSEMBLY__ > /* > * The basic type of a PTE - 64 bits for those CPUs with > 32 bit >@@ -26,10 +28,8 @@ > */ > #ifdef CONFIG_PTE_64BIT > typedef unsigned long long pte_basic_t; >-#define PTE_SHIFT (PAGE_SHIFT - 3) /* 512 ptes per page */ > #else > typedef unsigned long pte_basic_t; >-#define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page */ > #endif > > struct page; >@@ -39,6 +39,9 @@ extern void copy_page(void *to, void *from); > > #include <asm-generic/page.h> > >+#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) >+#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) >+ > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_PAGE_32_H */ >diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c >index 75c5dd0..0142318 100644 >--- a/arch/powerpc/kernel/asm-offsets.c >+++ b/arch/powerpc/kernel/asm-offsets.c >@@ -378,6 +378,10 @@ int main(void) > DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); > DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); > #endif >+#ifdef CONFIG_44x >+ DEFINE(PGD_T_LOG2, PGD_T_LOG2); >+ DEFINE(PTE_T_LOG2, PTE_T_LOG2); >+#endif > > return 0; > } >diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S >index f3a1ea9..3bf6bd1 100644 >--- a/arch/powerpc/kernel/head_44x.S >+++ b/arch/powerpc/kernel/head_44x.S >@@ -391,12 +391,14 @@ interrupt_base: > rlwimi r13,r12,10,30,30 > > /* Load the PTE */ >- rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ >+ /* Compute pgdir/pmd offset */ >+ rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29 > lwzx r11, r12, r11 /* Get pgd/pmd entry */ > rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ > beq 2f /* Bail if no table */ > >- rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ >+ /* Compute pte address */ >+ rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28 > lwz r11, 0(r12) /* Get high word of pte entry */ > lwz r12, 4(r12) /* Get low word of pte entry */ > >@@ -485,12 +487,14 @@ tlb_44x_patch_hwater_D: > /* Make up the required permissions */ > li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC > >- rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ >+ /* Compute pgdir/pmd offset */ >+ rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29 > lwzx r11, r12, r11 /* Get pgd/pmd entry */ > rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ > beq 2f /* Bail if no table */ > >- rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ >+ /* Compute pte address */ >+ rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28 > lwz r11, 0(r12) /* Get high word of pte entry */ > lwz r12, 4(r12) /* Get low word of pte entry */ > >@@ -554,15 +558,16 @@ tlb_44x_patch_hwater_I: > */ > finish_tlb_load: > /* Combine RPN & ERPN an write WS 0 */ >- rlwimi r11,r12,0,0,19 >+ rlwimi r11,r12,0,0,31-PAGE_SHIFT > tlbwe r11,r13,PPC44x_TLB_XLAT > > /* > * Create WS1. This is the faulting address (EPN), > * page size, and valid flag. > */ >- li r11,PPC44x_TLB_VALID | PPC44x_TLB_4K >- rlwimi r10,r11,0,20,31 /* Insert valid and page size*/ >+ li r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE >+ /* Insert valid and page size */ >+ rlwimi r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31 > tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ > > /* And WS 2 */ >@@ -634,12 +639,12 @@ _GLOBAL(set_context) > * goes at the beginning of the data segment, which is page-aligned. > */ > .data >- .align 12 >+ .align PAGE_SHIFT > .globl sdata > sdata: > .globl empty_zero_page > empty_zero_page: >- .space 4096 >+ .space PAGE_SIZE > > /* > * To support >32-bit physical addresses, we use an 8KB pgdir. >diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S >index bdc8b0e..950b708 100644 >--- a/arch/powerpc/kernel/misc_32.S >+++ b/arch/powerpc/kernel/misc_32.S >@@ -647,8 +647,8 @@ _GLOBAL(__flush_dcache_icache) > BEGIN_FTR_SECTION > blr > END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) >- rlwinm r3,r3,0,0,19 /* Get page base address */ >- li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ >+ rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ >+ li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ > mtctr r4 > mr r6,r3 > 0: dcbst 0,r3 /* Write line to ram */ >@@ -688,8 +688,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) > rlwinm r0,r10,0,28,26 /* clear DR */ > mtmsr r0 > isync >- rlwinm r3,r3,0,0,19 /* Get page base address */ >- li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ >+ rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ >+ li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ > mtctr r4 > mr r6,r3 > 0: dcbst 0,r3 /* Write line to ram */ >@@ -713,7 +713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) > * void clear_pages(void *page, int order) ; > */ > _GLOBAL(clear_pages) >- li r0,4096/L1_CACHE_BYTES >+ li r0,PAGE_SIZE/L1_CACHE_BYTES > slw r0,r0,r4 > mtctr r0 > #ifdef CONFIG_8xx >@@ -771,7 +771,7 @@ _GLOBAL(copy_page) > dcbt r5,r4 > li r11,L1_CACHE_BYTES+4 > #endif /* MAX_COPY_PREFETCH */ >- li r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH >+ li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH > crclr 4*cr0+eq > 2: > mtctr r0 >diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c >index c31d6d2..f966a5e 100644 >--- a/arch/powerpc/mm/pgtable_32.c >+++ b/arch/powerpc/mm/pgtable_32.c >@@ -72,24 +72,29 @@ extern unsigned long p_mapped_by_tlbcam(unsigned long pa); > #define p_mapped_by_tlbcam(x) (0UL) > #endif /* HAVE_TLBCAM */ > >-#ifdef CONFIG_PTE_64BIT >-/* Some processors use an 8kB pgdir because they have 8-byte Linux PTEs. */ >-#define PGDIR_ORDER 1 >-#else >-#define PGDIR_ORDER 0 >-#endif >+#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT) > > pgd_t *pgd_alloc(struct mm_struct *mm) > { > pgd_t *ret; > >- ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); >+ /* pgdir take page or two with 4K pages and a page fraction otherwise */ >+#ifndef CONFIG_PPC_4K_PAGES >+ ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); >+#else >+ ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, >+ PGDIR_ORDER - PAGE_SHIFT); >+#endif > return ret; > } > > void pgd_free(struct mm_struct *mm, pgd_t *pgd) > { >- free_pages((unsigned long)pgd, PGDIR_ORDER); >+#ifndef CONFIG_PPC_4K_PAGES >+ kfree((void *)pgd); >+#else >+ free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); >+#endif > } > > __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) >@@ -400,7 +405,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) > #endif /* CONFIG_DEBUG_PAGEALLOC */ > > static int fixmaps; >-unsigned long FIXADDR_TOP = 0xfffff000; >+unsigned long FIXADDR_TOP = (-PAGE_SIZE); > EXPORT_SYMBOL(FIXADDR_TOP); > > void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) >diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype >index 548efa5..51098bc 100644 >--- a/arch/powerpc/platforms/Kconfig.cputype >+++ b/arch/powerpc/platforms/Kconfig.cputype >@@ -204,7 +204,7 @@ config PPC_STD_MMU_32 > > config PPC_MM_SLICES > bool >- default y if HUGETLB_PAGE || PPC_64K_PAGES >+ default y if HUGETLB_PAGE || (PPC_STD_MMU_64 && PPC_64K_PAGES) > default n > > config VIRT_CPU_ACCOUNTING >-- >1.5.6.1 > >_______________________________________________ >Linuxppc-dev mailing list >Linuxppc-dev@ozlabs.org >https://ozlabs.org/mailman/listinfo/linuxppc-dev
On Wed, Dec 17, 2008 at 02:56:07PM -0500, Josh Boyer wrote: >On Thu, Dec 11, 2008 at 04:55:41AM +0300, Ilya Yanok wrote: >>This patch adds support for page sizes bigger than 4K (16K/64K) on >>PPC 44x. >>PGDIR table is much smaller than page in case of 16K/64K pages (512 >>and 32 bytes resp.) so we allocate PGDIR with kzalloc() instead of >>__get_free_pages(). >>PTE table covers rather big memory area in case of 16K/64K pages >>(32MB and 512MB resp.) so we can easily put FIXMAP and PKMAP in >>area covered by one PTE table. >> >>Signed-off-by: Yuri Tikhonov <yur@emcraft.com> >>Signed-off-by: Vladimir Panfilov <pvr@emcraft.com> >>Signed-off-by: Ilya Yanok <yanok@emcraft.com> > >I tested this a bit today on a Bamboo board. Overall, it functioned >well enough to not crash :). Note that I also included Hollis' >memory size alignment patch which is required. > >The code looks pretty clean now. I think if we're going to include >this patch it should go in now. > >Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com> Ben, Paul, What else is needed to get this patch included? josh
> >The code looks pretty clean now. I think if we're going to include > >this patch it should go in now. > > > >Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com> > > Ben, Paul, > > What else is needed to get this patch included? Can you remind us the pre-req ? Cheers, Ben.
On Sat, Dec 27, 2008 at 08:22:55AM +1100, Benjamin Herrenschmidt wrote: > >> >The code looks pretty clean now. I think if we're going to include >> >this patch it should go in now. >> > >> >Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com> >> >> Ben, Paul, >> >> What else is needed to get this patch included? > >Can you remind us the pre-req ? http://patchwork.ozlabs.org/patch/10951/ josh
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 525c13a..cd8ff7c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -401,23 +401,53 @@ config PPC_HAS_HASH_64K depends on PPC64 default n -config PPC_64K_PAGES - bool "64k page size" - depends on PPC64 - select PPC_HAS_HASH_64K +choice + prompt "Page size" + default PPC_4K_PAGES help - This option changes the kernel logical page size to 64k. On machines - without processor support for 64k pages, the kernel will simulate - them by loading each individual 4k page on demand transparently, - while on hardware with such support, it will be used to map - normal application pages. + Select the kernel logical page size. Increasing the page size + will reduce software overhead at each page boundary, allow + hardware prefetch mechanisms to be more effective, and allow + larger dma transfers increasing IO efficiency and reducing + overhead. However the utilization of memory will increase. + For example, each cached file will using a multiple of the + page size to hold its contents and the difference between the + end of file and the end of page is wasted. + + Some dedicated systems, such as software raid serving with + accelerated calculations, have shown significant increases. + + If you configure a 64 bit kernel for 64k pages but the + processor does not support them, then the kernel will simulate + them with 4k pages, loading them on demand, but with the + reduced software overhead and larger internal fragmentation. + For the 32 bit kernel, a large page option will not be offered + unless it is supported by the configured processor. + + If unsure, choose 4K_PAGES. + +config PPC_4K_PAGES + bool "4k page size" + +config PPC_16K_PAGES + bool "16k page size" if 44x + +config PPC_64K_PAGES + bool "64k page size" if 44x || PPC_STD_MMU_64 + select PPC_HAS_HASH_64K if PPC_STD_MMU_64 + +endchoice config FORCE_MAX_ZONEORDER int "Maximum zone order" - range 9 64 if PPC_64K_PAGES - default "9" if PPC_64K_PAGES - range 13 64 if PPC64 && !PPC_64K_PAGES - default "13" if PPC64 && !PPC_64K_PAGES + range 9 64 if PPC_STD_MMU_64 && PPC_64K_PAGES + default "9" if PPC_STD_MMU_64 && PPC_64K_PAGES + range 13 64 if PPC_STD_MMU_64 && !PPC_64K_PAGES + default "13" if PPC_STD_MMU_64 && !PPC_64K_PAGES + range 9 64 if PPC_STD_MMU_32 && PPC_16K_PAGES + default "9" if PPC_STD_MMU_32 && PPC_16K_PAGES + range 7 64 if PPC_STD_MMU_32 && PPC_64K_PAGES + default "7" if PPC_STD_MMU_32 && PPC_64K_PAGES range 11 64 default "11" help @@ -437,7 +467,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool "Support setting protections for 4k subpages" - depends on PPC_64K_PAGES + depends on PPC_STD_MMU_64 && PPC_64K_PAGES help This option adds support for a system call to allow user programs to set access permissions (read/write, readonly, or no access) diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 91c5895..7d6bb37 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,9 +38,24 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#define LAST_PKMAP (1 << PTE_SHIFT) -#define LAST_PKMAP_MASK (LAST_PKMAP-1) +/* + * We use one full pte table with 4K pages. And with 16K/64K pages pte + * table covers enough memory (32MB and 512MB resp.) that both FIXMAP + * and PKMAP can be placed in single pte table. We use 1024 pages for + * PKMAP in case of 16K/64K pages. + */ +#ifdef CONFIG_PPC_4K_PAGES +#define PKMAP_ORDER PTE_SHIFT +#else +#define PKMAP_ORDER 10 +#endif +#define LAST_PKMAP (1 << PKMAP_ORDER) +#ifndef CONFIG_PPC_4K_PAGES +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) +#else #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK) +#endif +#define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index a825524..73e1909 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -4,6 +4,8 @@ * PPC440 support */ +#include <asm/page.h> + #define PPC44x_MMUCR_TID 0x000000ff #define PPC44x_MMUCR_STS 0x00010000 @@ -73,4 +75,19 @@ typedef struct { /* Size of the TLBs used for pinning in lowmem */ #define PPC_PIN_SIZE (1 << 28) /* 256M */ +#if (PAGE_SHIFT == 12) +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#elif (PAGE_SHIFT == 14) +#define PPC44x_TLBE_SIZE PPC44x_TLB_16K +#elif (PAGE_SHIFT == 16) +#define PPC44x_TLBE_SIZE PPC44x_TLB_64K +#else +#error "Unsupported PAGE_SIZE" +#endif + +#define PPC44x_PGD_OFF_SHIFT (32 - PGDIR_SHIFT + PGD_T_LOG2) +#define PPC44x_PGD_OFF_MASK_BIT (PGDIR_SHIFT - PGD_T_LOG2) +#define PPC44x_PTE_ADD_SHIFT (32 - PGDIR_SHIFT + PTE_SHIFT + PTE_T_LOG2) +#define PPC44x_PTE_ADD_MASK_BIT (32 - PTE_T_LOG2 - PTE_SHIFT) + #endif /* _ASM_POWERPC_MMU_44X_H_ */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index c0b8d4a..197d569 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -19,12 +19,15 @@ #include <asm/kdump.h> /* - * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software + * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages + * on PPC44x). For PPC64 we support either 4K or 64K software * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#ifdef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC_64K_PAGES) #define PAGE_SHIFT 16 +#elif defined(CONFIG_PPC_16K_PAGES) +#define PAGE_SHIFT 14 #else #define PAGE_SHIFT 12 #endif @@ -151,7 +154,7 @@ typedef struct { pte_basic_t pte; } pte_t; /* 64k pages additionally define a bigger "real PTE" type that gathers * the "second half" part of the PTE for pseudo 64k pages */ -#ifdef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; @@ -191,10 +194,10 @@ typedef pte_basic_t pte_t; #define pte_val(x) (x) #define __pte(x) (x) -#ifdef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else -typedef unsigned long real_pte_t; +typedef pte_t real_pte_t; #endif diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index d77072a..1458d95 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -19,6 +19,8 @@ #define PTE_FLAGS_OFFSET 0 #endif +#define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2) /* full page */ + #ifndef __ASSEMBLY__ /* * The basic type of a PTE - 64 bits for those CPUs with > 32 bit @@ -26,10 +28,8 @@ */ #ifdef CONFIG_PTE_64BIT typedef unsigned long long pte_basic_t; -#define PTE_SHIFT (PAGE_SHIFT - 3) /* 512 ptes per page */ #else typedef unsigned long pte_basic_t; -#define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page */ #endif struct page; @@ -39,6 +39,9 @@ extern void copy_page(void *to, void *from); #include <asm-generic/page.h> +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PAGE_32_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 75c5dd0..0142318 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -378,6 +378,10 @@ int main(void) DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); #endif +#ifdef CONFIG_44x + DEFINE(PGD_T_LOG2, PGD_T_LOG2); + DEFINE(PTE_T_LOG2, PTE_T_LOG2); +#endif return 0; } diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index f3a1ea9..3bf6bd1 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -391,12 +391,14 @@ interrupt_base: rlwimi r13,r12,10,30,30 /* Load the PTE */ - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + /* Compute pgdir/pmd offset */ + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29 lwzx r11, r12, r11 /* Get pgd/pmd entry */ rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + /* Compute pte address */ + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28 lwz r11, 0(r12) /* Get high word of pte entry */ lwz r12, 4(r12) /* Get low word of pte entry */ @@ -485,12 +487,14 @@ tlb_44x_patch_hwater_D: /* Make up the required permissions */ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + /* Compute pgdir/pmd offset */ + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29 lwzx r11, r12, r11 /* Get pgd/pmd entry */ rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + /* Compute pte address */ + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28 lwz r11, 0(r12) /* Get high word of pte entry */ lwz r12, 4(r12) /* Get low word of pte entry */ @@ -554,15 +558,16 @@ tlb_44x_patch_hwater_I: */ finish_tlb_load: /* Combine RPN & ERPN an write WS 0 */ - rlwimi r11,r12,0,0,19 + rlwimi r11,r12,0,0,31-PAGE_SHIFT tlbwe r11,r13,PPC44x_TLB_XLAT /* * Create WS1. This is the faulting address (EPN), * page size, and valid flag. */ - li r11,PPC44x_TLB_VALID | PPC44x_TLB_4K - rlwimi r10,r11,0,20,31 /* Insert valid and page size*/ + li r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE + /* Insert valid and page size */ + rlwimi r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31 tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ /* And WS 2 */ @@ -634,12 +639,12 @@ _GLOBAL(set_context) * goes at the beginning of the data segment, which is page-aligned. */ .data - .align 12 + .align PAGE_SHIFT .globl sdata sdata: .globl empty_zero_page empty_zero_page: - .space 4096 + .space PAGE_SIZE /* * To support >32-bit physical addresses, we use an 8KB pgdir. diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index bdc8b0e..950b708 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -647,8 +647,8 @@ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION blr END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - rlwinm r3,r3,0,0,19 /* Get page base address */ - li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ + rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 mr r6,r3 0: dcbst 0,r3 /* Write line to ram */ @@ -688,8 +688,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) rlwinm r0,r10,0,28,26 /* clear DR */ mtmsr r0 isync - rlwinm r3,r3,0,0,19 /* Get page base address */ - li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ + rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 mr r6,r3 0: dcbst 0,r3 /* Write line to ram */ @@ -713,7 +713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * void clear_pages(void *page, int order) ; */ _GLOBAL(clear_pages) - li r0,4096/L1_CACHE_BYTES + li r0,PAGE_SIZE/L1_CACHE_BYTES slw r0,r0,r4 mtctr r0 #ifdef CONFIG_8xx @@ -771,7 +771,7 @@ _GLOBAL(copy_page) dcbt r5,r4 li r11,L1_CACHE_BYTES+4 #endif /* MAX_COPY_PREFETCH */ - li r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH + li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH crclr 4*cr0+eq 2: mtctr r0 diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index c31d6d2..f966a5e 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -72,24 +72,29 @@ extern unsigned long p_mapped_by_tlbcam(unsigned long pa); #define p_mapped_by_tlbcam(x) (0UL) #endif /* HAVE_TLBCAM */ -#ifdef CONFIG_PTE_64BIT -/* Some processors use an 8kB pgdir because they have 8-byte Linux PTEs. */ -#define PGDIR_ORDER 1 -#else -#define PGDIR_ORDER 0 -#endif +#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *ret; - ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); + /* pgdir take page or two with 4K pages and a page fraction otherwise */ +#ifndef CONFIG_PPC_4K_PAGES + ret = (pgd_t *)kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); +#else + ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, + PGDIR_ORDER - PAGE_SHIFT); +#endif return ret; } void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - free_pages((unsigned long)pgd, PGDIR_ORDER); +#ifndef CONFIG_PPC_4K_PAGES + kfree((void *)pgd); +#else + free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); +#endif } __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) @@ -400,7 +405,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) #endif /* CONFIG_DEBUG_PAGEALLOC */ static int fixmaps; -unsigned long FIXADDR_TOP = 0xfffff000; +unsigned long FIXADDR_TOP = (-PAGE_SIZE); EXPORT_SYMBOL(FIXADDR_TOP); void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 548efa5..51098bc 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -204,7 +204,7 @@ config PPC_STD_MMU_32 config PPC_MM_SLICES bool - default y if HUGETLB_PAGE || PPC_64K_PAGES + default y if HUGETLB_PAGE || (PPC_STD_MMU_64 && PPC_64K_PAGES) default n config VIRT_CPU_ACCOUNTING