@@ -281,94 +281,6 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
-static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
-{
- void *pmd_frag, *ret;
-
- if (PMD_FRAG_NR == 1)
- return NULL;
-
- spin_lock(&mm->page_table_lock);
- ret = mm->context.pmd_frag;
- if (ret) {
- pmd_frag = ret + PMD_FRAG_SIZE;
- /*
- * If we have taken up all the fragments mark PTE page NULL
- */
- if (((unsigned long)pmd_frag & ~PAGE_MASK) == 0)
- pmd_frag = NULL;
- mm->context.pmd_frag = pmd_frag;
- }
- spin_unlock(&mm->page_table_lock);
- return (pmd_t *)ret;
-}
-
-static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
-{
- void *ret = NULL;
- struct page *page;
- gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
-
- if (mm == &init_mm)
- gfp &= ~__GFP_ACCOUNT;
- page = alloc_page(gfp);
- if (!page)
- return NULL;
- if (!pgtable_pmd_page_ctor(page)) {
- __free_pages(page, 0);
- return NULL;
- }
-
- atomic_set(&page->pt_frag_refcount, 1);
-
- ret = page_address(page);
- /*
- * if we support only one fragment just return the
- * allocated page.
- */
- if (PMD_FRAG_NR == 1)
- return ret;
-
- spin_lock(&mm->page_table_lock);
- /*
- * If we find pgtable_page set, we return
- * the allocated page with single fragment
- * count.
- */
- if (likely(!mm->context.pmd_frag)) {
- atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
- mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
- }
- spin_unlock(&mm->page_table_lock);
-
- return (pmd_t *)ret;
-}
-
-pmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
-{
- pmd_t *pmd;
-
- pmd = get_pmd_from_cache(mm);
- if (pmd)
- return pmd;
-
- return __alloc_for_pmdcache(mm);
-}
-
-void pmd_fragment_free(unsigned long *pmd)
-{
- struct page *page = virt_to_page(pmd);
-
- if (PageReserved(page))
- return free_reserved_page(page);
-
- BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
- if (atomic_dec_and_test(&page->pt_frag_refcount)) {
- pgtable_pmd_page_dtor(page);
- __free_page(page);
- }
-}
-
static inline void pgtable_free(void *table, int index)
{
switch (index) {
@@ -150,6 +150,8 @@ void pgtable_cache_add(unsigned int shift)
}
EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */
+void pt_frag_init(void);
+
void pgtable_cache_init(void)
{
pgtable_cache_add(PGD_INDEX_SIZE);
@@ -163,4 +165,6 @@ void pgtable_cache_init(void)
*/
if (PUD_CACHE_INDEX)
pgtable_cache_add(PUD_CACHE_INDEX);
+
+ pt_frag_init();
}
@@ -15,108 +15,317 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
+struct pt_frag {
+ struct page *page;
+ struct list_head list;
+ int cpu;
+ unsigned int nr_free;
+ void *free_ptr;
+ spinlock_t locks[];
+};
+
+struct pt_frag_alloc {
+ /*
+ * The lock must disable bh because pte frags can be freed by RCU
+ * when it runs in softirq context.
+ */
+ spinlock_t lock;
+ size_t nr_free;
+ struct list_head freelist;
+ /* XXX: could make a remote freelist and only that needs locking,
+ * atomic nr_allocated and the first freer would be responsible
+ * for putting it on the correct queue
+ */
+};
+
+static DEFINE_PER_CPU(struct pt_frag_alloc, pte_frag_alloc);
+static DEFINE_PER_CPU(struct pt_frag_alloc, pte_frag_alloc_kernel);
+static DEFINE_PER_CPU(struct pt_frag_alloc, pmd_frag_alloc);
+
void pte_frag_destroy(void *pte_frag)
{
- int count;
- struct page *page;
+}
- page = virt_to_page(pte_frag);
- /* drop all the pending references */
- count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
- /* We allow PTE_FRAG_NR fragments from a PTE page */
- if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_pte_page_dtor(page);
- __free_page(page);
+void pt_frag_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct pt_frag_alloc *alloc;
+
+ alloc = per_cpu_ptr(&pte_frag_alloc, cpu);
+ spin_lock_init(&alloc->lock);
+ INIT_LIST_HEAD(&alloc->freelist);
+
+ alloc = per_cpu_ptr(&pte_frag_alloc_kernel, cpu);
+ spin_lock_init(&alloc->lock);
+ INIT_LIST_HEAD(&alloc->freelist);
+
+ alloc = per_cpu_ptr(&pmd_frag_alloc, cpu);
+ spin_lock_init(&alloc->lock);
+ INIT_LIST_HEAD(&alloc->freelist);
}
}
-static pte_t *get_pte_from_cache(struct mm_struct *mm)
+static unsigned long pte_frag_idx(void *frag)
{
- void *pte_frag, *ret;
+ return ((unsigned long)frag & (PAGE_SIZE - 1)) >> PTE_FRAG_SIZE_SHIFT;
+}
- if (PTE_FRAG_NR == 1)
- return NULL;
+static unsigned long pmd_frag_idx(void *frag)
+{
+ return ((unsigned long)frag & (PAGE_SIZE - 1)) >> PMD_FRAG_SIZE_SHIFT;
+}
- spin_lock(&mm->page_table_lock);
- ret = pte_frag_get(&mm->context);
- if (ret) {
- pte_frag = ret + PTE_FRAG_SIZE;
- /*
- * If we have taken up all the fragments mark PTE page NULL
- */
- if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
- pte_frag = NULL;
- pte_frag_set(&mm->context, pte_frag);
+static void *get_pt_from_cache(struct mm_struct *mm, bool pte, bool kernel)
+{
+ struct pt_frag_alloc *alloc;
+
+ if (pte) {
+ if (kernel)
+ alloc = get_cpu_ptr(&pte_frag_alloc_kernel);
+ else
+ alloc = get_cpu_ptr(&pte_frag_alloc);
+ } else {
+ alloc = get_cpu_ptr(&pmd_frag_alloc);
}
- spin_unlock(&mm->page_table_lock);
- return (pte_t *)ret;
+
+ spin_lock_bh(&alloc->lock);
+ if (!list_empty(&alloc->freelist)) {
+ struct pt_frag *pt_frag = list_first_entry(&alloc->freelist,
+ struct pt_frag, list);
+ void *frag;
+
+ frag = pt_frag->free_ptr;
+ pt_frag->free_ptr = *((void **)frag);
+ *((void **)frag) = NULL;
+
+ pt_frag->nr_free--;
+ if (pt_frag->nr_free == 0)
+ list_del(&pt_frag->list);
+ alloc->nr_free--;
+ spin_unlock_bh(&alloc->lock);
+ put_cpu_ptr(alloc);
+
+ if (pte)
+ spin_lock_init(&pt_frag->locks[pte_frag_idx(frag)]);
+ else
+ spin_lock_init(&pt_frag->locks[pmd_frag_idx(frag)]);
+
+ return frag;
+ }
+
+ spin_unlock_bh(&alloc->lock);
+ put_cpu_ptr(alloc);
+
+ return NULL;
}
-static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
+static void *__alloc_for_ptcache(struct mm_struct *mm, bool pte, bool kernel)
{
- void *ret = NULL;
+ size_t frag_size, frag_nr;
+ struct pt_frag_alloc *alloc;
+ void *frag;
struct page *page;
+ struct pt_frag *pt_frag;
+ unsigned long i;
+
+ if (pte) {
+ frag_size = PTE_FRAG_SIZE;
+ frag_nr = PTE_FRAG_NR;
+
+ if (!kernel) {
+ page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
+ if (!page)
+ return NULL;
+ if (!pgtable_pte_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ } else {
+ page = alloc_page(PGALLOC_GFP);
+ if (!page)
+ return NULL;
+ }
- if (!kernel) {
- page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
+ } else {
+ /* This is slightly different from PTE, for some reason */
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
+
+ frag_size = PMD_FRAG_SIZE;
+ frag_nr = PMD_FRAG_NR;
+
+ if (kernel)
+ gfp &= ~__GFP_ACCOUNT;
+ page = alloc_page(gfp);
if (!page)
return NULL;
- if (!pgtable_pte_page_ctor(page)) {
+ if (!pgtable_pmd_page_ctor(page)) {
__free_page(page);
return NULL;
}
- } else {
- page = alloc_page(PGALLOC_GFP);
- if (!page)
- return NULL;
}
- atomic_set(&page->pt_frag_refcount, 1);
+ pt_frag = kmalloc(sizeof(struct pt_frag) + sizeof(spinlock_t) * frag_nr, GFP_KERNEL);
+ if (!pt_frag) {
+ if (!pte)
+ pgtable_pmd_page_dtor(page);
+ else if (!kernel)
+ pgtable_pte_page_dtor(page);
+ __free_page(page);
+ return NULL;
+ }
- ret = page_address(page);
- /*
- * if we support only one fragment just return the
- * allocated page.
- */
- if (PTE_FRAG_NR == 1)
- return ret;
- spin_lock(&mm->page_table_lock);
- /*
- * If we find pgtable_page set, we return
- * the allocated page with single fragment
- * count.
- */
- if (likely(!pte_frag_get(&mm->context))) {
- atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
- pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE);
+ pt_frag->page = page;
+ pt_frag->nr_free = frag_nr - 1;
+
+ frag = page_address(page);
+
+ for (i = frag_size; i < PAGE_SIZE - frag_size; i += frag_size)
+ *((void **)(frag + i)) = frag + i + frag_size;
+ /* Last one will be NULL */
+
+ pt_frag->free_ptr = frag + frag_size;
+
+ page->pt_frag = pt_frag;
+
+ if (pte) {
+ if (kernel)
+ alloc = get_cpu_ptr(&pte_frag_alloc_kernel);
+ else
+ alloc = get_cpu_ptr(&pte_frag_alloc);
+ } else {
+ alloc = get_cpu_ptr(&pmd_frag_alloc);
}
- spin_unlock(&mm->page_table_lock);
- return (pte_t *)ret;
+ /* XXX: Confirm CPU (or at least node) here */
+
+ pt_frag->cpu = smp_processor_id();
+
+ spin_lock_bh(&alloc->lock);
+ alloc->nr_free += frag_nr - 1;
+ list_add_tail(&pt_frag->list, &alloc->freelist);
+ spin_unlock_bh(&alloc->lock);
+
+ put_cpu_ptr(alloc);
+
+ spin_lock_init(&pt_frag->locks[0]);
+
+ return frag;
}
-pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
+static void *pt_fragment_alloc(struct mm_struct *mm, bool pte, bool kernel)
{
- pte_t *pte;
-
- pte = get_pte_from_cache(mm);
- if (pte)
- return pte;
+ void *pt;
- return __alloc_for_ptecache(mm, kernel);
+ pt = get_pt_from_cache(mm, pte, kernel);
+ if (unlikely(!pt))
+ pt = __alloc_for_ptcache(mm, pte, kernel);
+ return pt;
}
-void pte_fragment_free(unsigned long *table, int kernel)
+static void pt_fragment_free(void *frag, bool pte, bool kernel)
{
- struct page *page = virt_to_page(table);
+ struct pt_frag_alloc *alloc;
+ struct page *page;
+ struct pt_frag *pt_frag;
+ size_t frag_nr;
+
+ page = virt_to_page(frag);
+ pt_frag = page->pt_frag;
+
+ if (pte) {
+ frag_nr = PTE_FRAG_NR;
+
+ if (unlikely(PageReserved(page)))
+ return free_reserved_page(page);
- if (PageReserved(page))
- return free_reserved_page(page);
+ if (kernel)
+ alloc = per_cpu_ptr(&pte_frag_alloc_kernel, pt_frag->cpu);
+ else
+ alloc = per_cpu_ptr(&pte_frag_alloc, pt_frag->cpu);
+ } else {
+ frag_nr = PMD_FRAG_NR;
+
+ alloc = per_cpu_ptr(&pmd_frag_alloc, pt_frag->cpu);
+ }
+
+ spin_lock_bh(&alloc->lock);
+
+ if (pt_frag->nr_free == 0)
+ list_add_tail(&pt_frag->list, &alloc->freelist);
+
+ pt_frag->nr_free++;
+
+ *((void **)frag) = pt_frag->free_ptr;
+ pt_frag->free_ptr = frag;
- BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
- if (atomic_dec_and_test(&page->pt_frag_refcount)) {
- if (!kernel)
+ alloc->nr_free++;
+
+ if (alloc->nr_free >= frag_nr * 2 && pt_frag->nr_free == frag_nr) {
+ list_del(&pt_frag->list);
+ alloc->nr_free -= frag_nr;
+ spin_unlock_bh(&alloc->lock);
+ if (!pte)
+ pgtable_pmd_page_dtor(page);
+ else if (!kernel)
pgtable_pte_page_dtor(page);
__free_page(page);
+ kfree(pt_frag);
+ } else {
+ spin_unlock_bh(&alloc->lock);
}
}
+
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
+{
+ return pt_fragment_alloc(mm, true, !!kernel);
+}
+
+void pte_fragment_free(unsigned long *pte, int kernel)
+{
+ pt_fragment_free(pte, true, !!kernel);
+}
+
+pmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
+{
+ bool kernel = (mm == &init_mm);
+
+ return pt_fragment_alloc(mm, false, kernel);
+}
+
+void pmd_fragment_free(unsigned long *pmd)
+{
+ pt_fragment_free(pmd, false, false /* XXX? */);
+}
+
+spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
+{
+ struct page *page;
+ struct pt_frag *pt_frag;
+ void *frag;
+
+ frag = (void *)pmd_page_vaddr(*pmd);
+ page = virt_to_page(frag);
+ pt_frag = page->pt_frag;
+
+ return &pt_frag->locks[pte_frag_idx(frag)];
+}
+
+spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
+{
+ struct page *page;
+ struct pt_frag *pt_frag;
+ void *frag;
+
+ frag = (void *)pmd;
+ page = pmd_to_page(pmd);
+ pt_frag = page->pt_frag;
+
+ return &pt_frag->locks[pmd_frag_idx(frag)];
+}
+
+bool ptlock_init(struct page *page)
+{
+ return true;
+}
@@ -2286,6 +2286,10 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
}
#endif /* ALLOC_SPLIT_PTLOCKS */
+spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd);
+bool ptlock_init(struct page *page);
+
+#if 0
static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return ptlock_ptr(pmd_page(*pmd));
@@ -2306,6 +2310,7 @@ static inline bool ptlock_init(struct page *page)
spin_lock_init(ptlock_ptr(page));
return true;
}
+#endif
#else /* !USE_SPLIT_PTE_PTLOCKS */
/*
@@ -2371,16 +2376,19 @@ static inline void pgtable_pte_page_dtor(struct page *page)
#if USE_SPLIT_PMD_PTLOCKS
-static struct page *pmd_to_page(pmd_t *pmd)
+static inline struct page *pmd_to_page(pmd_t *pmd)
{
unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
return virt_to_page((void *)((unsigned long) pmd & mask));
}
+#if 0
static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return ptlock_ptr(pmd_to_page(pmd));
}
+#endif
+spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd);
static inline bool pmd_ptlock_init(struct page *page)
{
@@ -152,6 +152,7 @@ struct page {
unsigned long _pt_pad_2; /* mapping */
union {
struct mm_struct *pt_mm; /* x86 pgds only */
+ void *pt_frag; /* powerpc */
atomic_t pt_frag_refcount; /* powerpc */
};
#if ALLOC_SPLIT_PTLOCKS