Message ID | 1379575763-2091-7-git-send-email-Bharat.Bhushan@freescale.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: > Some function defined in vfio_iommu_type1.c were common and > we want to use these for FSL IOMMU (PAMU) and iommu-none driver. > So some of them are moved to vfio_iommu_common.c > > I think we can do more of that but we will take this step by step. > > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com> > --- > drivers/vfio/Makefile | 4 +- > drivers/vfio/vfio_iommu_common.c | 235 ++++++++++++++++++++++++++++++++++++++ > drivers/vfio/vfio_iommu_common.h | 30 +++++ > drivers/vfio/vfio_iommu_type1.c | 206 +--------------------------------- > 4 files changed, 268 insertions(+), 207 deletions(-) > create mode 100644 drivers/vfio/vfio_iommu_common.c > create mode 100644 drivers/vfio/vfio_iommu_common.h > > diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile > index 72bfabc..c5792ec 100644 > --- a/drivers/vfio/Makefile > +++ b/drivers/vfio/Makefile > @@ -1,4 +1,4 @@ > obj-$(CONFIG_VFIO) += vfio.o > -obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o > -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o > +obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o vfio_iommu_type1.o > +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o vfio_iommu_spapr_tce.o > obj-$(CONFIG_VFIO_PCI) += pci/ > diff --git a/drivers/vfio/vfio_iommu_common.c b/drivers/vfio/vfio_iommu_common.c > new file mode 100644 > index 0000000..8bdc0ea > --- /dev/null > +++ b/drivers/vfio/vfio_iommu_common.c > @@ -0,0 +1,235 @@ > +/* > + * VFIO: Common code for vfio IOMMU support > + * > + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. > + * Author: Alex Williamson <alex.williamson@redhat.com> > + * Author: Bharat Bhushan <bharat.bhushan@freescale.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * Derived from original vfio: > + * Copyright 2010 Cisco Systems, Inc. All rights reserved. > + * Author: Tom Lyon, pugs@cisco.com > + */ > + > +#include <linux/compat.h> > +#include <linux/device.h> > +#include <linux/fs.h> > +#include <linux/iommu.h> > +#include <linux/module.h> > +#include <linux/mm.h> > +#include <linux/pci.h> /* pci_bus_type */ > +#include <linux/rbtree.h> > +#include <linux/sched.h> > +#include <linux/slab.h> > +#include <linux/uaccess.h> > +#include <linux/vfio.h> > +#include <linux/workqueue.h> Please cleanup includes on both the source and target files. You obviously don't need linux/pci.h here for one. > + > +static bool disable_hugepages; > +module_param_named(disable_hugepages, > + disable_hugepages, bool, S_IRUGO | S_IWUSR); > +MODULE_PARM_DESC(disable_hugepages, > + "Disable VFIO IOMMU support for IOMMU hugepages."); > + > +struct vwork { > + struct mm_struct *mm; > + long npage; > + struct work_struct work; > +}; > + > +/* delayed decrement/increment for locked_vm */ > +void vfio_lock_acct_bg(struct work_struct *work) > +{ > + struct vwork *vwork = container_of(work, struct vwork, work); > + struct mm_struct *mm; > + > + mm = vwork->mm; > + down_write(&mm->mmap_sem); > + mm->locked_vm += vwork->npage; > + up_write(&mm->mmap_sem); > + mmput(mm); > + kfree(vwork); > +} > + > +void vfio_lock_acct(long npage) > +{ > + struct vwork *vwork; > + struct mm_struct *mm; > + > + if (!current->mm || !npage) > + return; /* process exited or nothing to do */ > + > + if (down_write_trylock(¤t->mm->mmap_sem)) { > + current->mm->locked_vm += npage; > + up_write(¤t->mm->mmap_sem); > + return; > + } > + > + /* > + * Couldn't get mmap_sem lock, so must setup to update > + * mm->locked_vm later. If locked_vm were atomic, we > + * wouldn't need this silliness > + */ > + vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); > + if (!vwork) > + return; > + mm = get_task_mm(current); > + if (!mm) { > + kfree(vwork); > + return; > + } > + INIT_WORK(&vwork->work, vfio_lock_acct_bg); > + vwork->mm = mm; > + vwork->npage = npage; > + schedule_work(&vwork->work); > +} > + > +/* > + * Some mappings aren't backed by a struct page, for example an mmap'd > + * MMIO range for our own or another device. These use a different > + * pfn conversion and shouldn't be tracked as locked pages. > + */ > +bool is_invalid_reserved_pfn(unsigned long pfn) > +{ > + if (pfn_valid(pfn)) { > + bool reserved; > + struct page *tail = pfn_to_page(pfn); > + struct page *head = compound_trans_head(tail); > + reserved = !!(PageReserved(head)); > + if (head != tail) { > + /* > + * "head" is not a dangling pointer > + * (compound_trans_head takes care of that) > + * but the hugepage may have been split > + * from under us (and we may not hold a > + * reference count on the head page so it can > + * be reused before we run PageReferenced), so > + * we've to check PageTail before returning > + * what we just read. > + */ > + smp_rmb(); > + if (PageTail(tail)) > + return reserved; > + } > + return PageReserved(tail); > + } > + > + return true; > +} > + > +int put_pfn(unsigned long pfn, int prot) > +{ > + if (!is_invalid_reserved_pfn(pfn)) { > + struct page *page = pfn_to_page(pfn); > + if (prot & IOMMU_WRITE) > + SetPageDirty(page); > + put_page(page); > + return 1; > + } > + return 0; > +} > + > +static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) > +{ > + struct page *page[1]; > + struct vm_area_struct *vma; > + int ret = -EFAULT; > + > + if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { > + *pfn = page_to_pfn(page[0]); > + return 0; > + } > + > + printk("via vma\n"); > + down_read(¤t->mm->mmap_sem); > + > + vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); > + > + if (vma && vma->vm_flags & VM_PFNMAP) { > + *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; > + if (is_invalid_reserved_pfn(*pfn)) > + ret = 0; > + } > + > + up_read(¤t->mm->mmap_sem); > + > + return ret; > +} > + > +/* > + * Attempt to pin pages. We really don't want to track all the pfns and > + * the iommu can only map chunks of consecutive pfns anyway, so get the > + * first page and all consecutive pages with the same locking. > + */ > +long vfio_pin_pages(unsigned long vaddr, long npage, > + int prot, unsigned long *pfn_base) > +{ > + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > + bool lock_cap = capable(CAP_IPC_LOCK); > + long ret, i; > + > + if (!current->mm) > + return -ENODEV; > + > + ret = vaddr_get_pfn(vaddr, prot, pfn_base); > + if (ret) > + return ret; > + > + if (is_invalid_reserved_pfn(*pfn_base)) > + return 1; > + > + if (!lock_cap && current->mm->locked_vm + 1 > limit) { > + put_pfn(*pfn_base, prot); > + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, > + limit << PAGE_SHIFT); > + return -ENOMEM; > + } > + > + if (unlikely(disable_hugepages)) { > + vfio_lock_acct(1); > + return 1; > + } > + > + /* Lock all the consecutive pages from pfn_base */ > + for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { > + unsigned long pfn = 0; > + > + ret = vaddr_get_pfn(vaddr, prot, &pfn); > + if (ret) > + break; > + > + if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { > + put_pfn(pfn, prot); > + break; > + } > + > + if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { > + put_pfn(pfn, prot); > + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", > + __func__, limit << PAGE_SHIFT); > + break; > + } > + } > + > + vfio_lock_acct(i); > + > + return i; > +} > + > +long vfio_unpin_pages(unsigned long pfn, long npage, > + int prot, bool do_accounting) > +{ > + unsigned long unlocked = 0; > + long i; > + > + for (i = 0; i < npage; i++) > + unlocked += put_pfn(pfn++, prot); > + > + if (do_accounting) > + vfio_lock_acct(-unlocked); > + > + return unlocked; > +} > diff --git a/drivers/vfio/vfio_iommu_common.h b/drivers/vfio/vfio_iommu_common.h > new file mode 100644 > index 0000000..4738391 > --- /dev/null > +++ b/drivers/vfio/vfio_iommu_common.h > @@ -0,0 +1,30 @@ > +/* > + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. > + * Copyright (C) 2013 Freescale Semiconductor, Inc. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms of the GNU General Public License version 2 as published > + * by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + */ > + > +#ifndef _VFIO_IOMMU_COMMON_H > +#define _VFIO_IOMMU_COMMON_H > + > +void vfio_lock_acct_bg(struct work_struct *work); Does this need to be exposed? > +void vfio_lock_acct(long npage); > +bool is_invalid_reserved_pfn(unsigned long pfn); > +int put_pfn(unsigned long pfn, int prot); Why are these exposed, they only seem to be used by functions in the new common file. > +long vfio_pin_pages(unsigned long vaddr, long npage, int prot, > + unsigned long *pfn_base); > +long vfio_unpin_pages(unsigned long pfn, long npage, > + int prot, bool do_accounting); Can we get by with just these two and vfio_lock_acct()? > +#endif > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index a9807de..e9a58fa 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -37,6 +37,7 @@ > #include <linux/uaccess.h> > #include <linux/vfio.h> > #include <linux/workqueue.h> > +#include "vfio_iommu_common.h" > > #define DRIVER_VERSION "0.2" > #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" > @@ -48,12 +49,6 @@ module_param_named(allow_unsafe_interrupts, > MODULE_PARM_DESC(allow_unsafe_interrupts, > "Enable VFIO IOMMU support for on platforms without interrupt remapping support."); > > -static bool disable_hugepages; > -module_param_named(disable_hugepages, > - disable_hugepages, bool, S_IRUGO | S_IWUSR); > -MODULE_PARM_DESC(disable_hugepages, > - "Disable VFIO IOMMU support for IOMMU hugepages."); > - > struct vfio_iommu { > struct iommu_domain *domain; > struct mutex lock; > @@ -123,205 +118,6 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *old) > rb_erase(&old->node, &iommu->dma_list); > } > > -struct vwork { > - struct mm_struct *mm; > - long npage; > - struct work_struct work; > -}; > - > -/* delayed decrement/increment for locked_vm */ > -static void vfio_lock_acct_bg(struct work_struct *work) > -{ > - struct vwork *vwork = container_of(work, struct vwork, work); > - struct mm_struct *mm; > - > - mm = vwork->mm; > - down_write(&mm->mmap_sem); > - mm->locked_vm += vwork->npage; > - up_write(&mm->mmap_sem); > - mmput(mm); > - kfree(vwork); > -} > - > -static void vfio_lock_acct(long npage) > -{ > - struct vwork *vwork; > - struct mm_struct *mm; > - > - if (!current->mm || !npage) > - return; /* process exited or nothing to do */ > - > - if (down_write_trylock(¤t->mm->mmap_sem)) { > - current->mm->locked_vm += npage; > - up_write(¤t->mm->mmap_sem); > - return; > - } > - > - /* > - * Couldn't get mmap_sem lock, so must setup to update > - * mm->locked_vm later. If locked_vm were atomic, we > - * wouldn't need this silliness > - */ > - vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); > - if (!vwork) > - return; > - mm = get_task_mm(current); > - if (!mm) { > - kfree(vwork); > - return; > - } > - INIT_WORK(&vwork->work, vfio_lock_acct_bg); > - vwork->mm = mm; > - vwork->npage = npage; > - schedule_work(&vwork->work); > -} > - > -/* > - * Some mappings aren't backed by a struct page, for example an mmap'd > - * MMIO range for our own or another device. These use a different > - * pfn conversion and shouldn't be tracked as locked pages. > - */ > -static bool is_invalid_reserved_pfn(unsigned long pfn) > -{ > - if (pfn_valid(pfn)) { > - bool reserved; > - struct page *tail = pfn_to_page(pfn); > - struct page *head = compound_trans_head(tail); > - reserved = !!(PageReserved(head)); > - if (head != tail) { > - /* > - * "head" is not a dangling pointer > - * (compound_trans_head takes care of that) > - * but the hugepage may have been split > - * from under us (and we may not hold a > - * reference count on the head page so it can > - * be reused before we run PageReferenced), so > - * we've to check PageTail before returning > - * what we just read. > - */ > - smp_rmb(); > - if (PageTail(tail)) > - return reserved; > - } > - return PageReserved(tail); > - } > - > - return true; > -} > - > -static int put_pfn(unsigned long pfn, int prot) > -{ > - if (!is_invalid_reserved_pfn(pfn)) { > - struct page *page = pfn_to_page(pfn); > - if (prot & IOMMU_WRITE) > - SetPageDirty(page); > - put_page(page); > - return 1; > - } > - return 0; > -} > - > -static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) > -{ > - struct page *page[1]; > - struct vm_area_struct *vma; > - int ret = -EFAULT; > - > - if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { > - *pfn = page_to_pfn(page[0]); > - return 0; > - } > - > - down_read(¤t->mm->mmap_sem); > - > - vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); > - > - if (vma && vma->vm_flags & VM_PFNMAP) { > - *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; > - if (is_invalid_reserved_pfn(*pfn)) > - ret = 0; > - } > - > - up_read(¤t->mm->mmap_sem); > - > - return ret; > -} > - > -/* > - * Attempt to pin pages. We really don't want to track all the pfns and > - * the iommu can only map chunks of consecutive pfns anyway, so get the > - * first page and all consecutive pages with the same locking. > - */ > -static long vfio_pin_pages(unsigned long vaddr, long npage, > - int prot, unsigned long *pfn_base) > -{ > - unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - bool lock_cap = capable(CAP_IPC_LOCK); > - long ret, i; > - > - if (!current->mm) > - return -ENODEV; > - > - ret = vaddr_get_pfn(vaddr, prot, pfn_base); > - if (ret) > - return ret; > - > - if (is_invalid_reserved_pfn(*pfn_base)) > - return 1; > - > - if (!lock_cap && current->mm->locked_vm + 1 > limit) { > - put_pfn(*pfn_base, prot); > - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, > - limit << PAGE_SHIFT); > - return -ENOMEM; > - } > - > - if (unlikely(disable_hugepages)) { > - vfio_lock_acct(1); > - return 1; > - } > - > - /* Lock all the consecutive pages from pfn_base */ > - for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { > - unsigned long pfn = 0; > - > - ret = vaddr_get_pfn(vaddr, prot, &pfn); > - if (ret) > - break; > - > - if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { > - put_pfn(pfn, prot); > - break; > - } > - > - if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { > - put_pfn(pfn, prot); > - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", > - __func__, limit << PAGE_SHIFT); > - break; > - } > - } > - > - vfio_lock_acct(i); > - > - return i; > -} > - > -static long vfio_unpin_pages(unsigned long pfn, long npage, > - int prot, bool do_accounting) > -{ > - unsigned long unlocked = 0; > - long i; > - > - for (i = 0; i < npage; i++) > - unlocked += put_pfn(pfn++, prot); > - > - if (do_accounting) > - vfio_lock_acct(-unlocked); > - > - return unlocked; > -} > - > static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, > dma_addr_t iova, size_t *size) > {
> -----Original Message----- > From: linux-pci-owner@vger.kernel.org [mailto:linux-pci-owner@vger.kernel.org] > On Behalf Of Alex Williamson > Sent: Wednesday, September 25, 2013 10:33 PM > To: Bhushan Bharat-R65777 > Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux- > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux- > pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- > foundation.org; Bhushan Bharat-R65777 > Subject: Re: [PATCH 6/7] vfio: moving some functions in common file > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote: > > Some function defined in vfio_iommu_type1.c were common and we want to > > use these for FSL IOMMU (PAMU) and iommu-none driver. > > So some of them are moved to vfio_iommu_common.c > > > > I think we can do more of that but we will take this step by step. > > > > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com> > > --- > > drivers/vfio/Makefile | 4 +- > > drivers/vfio/vfio_iommu_common.c | 235 > ++++++++++++++++++++++++++++++++++++++ > > drivers/vfio/vfio_iommu_common.h | 30 +++++ > > drivers/vfio/vfio_iommu_type1.c | 206 > > +--------------------------------- > > 4 files changed, 268 insertions(+), 207 deletions(-) create mode > > 100644 drivers/vfio/vfio_iommu_common.c create mode 100644 > > drivers/vfio/vfio_iommu_common.h > > > > diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index > > 72bfabc..c5792ec 100644 > > --- a/drivers/vfio/Makefile > > +++ b/drivers/vfio/Makefile > > @@ -1,4 +1,4 @@ > > obj-$(CONFIG_VFIO) += vfio.o > > -obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o > > -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o > > +obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o > > +vfio_iommu_type1.o > > +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o > > +vfio_iommu_spapr_tce.o > > obj-$(CONFIG_VFIO_PCI) += pci/ > > diff --git a/drivers/vfio/vfio_iommu_common.c > > b/drivers/vfio/vfio_iommu_common.c > > new file mode 100644 > > index 0000000..8bdc0ea > > --- /dev/null > > +++ b/drivers/vfio/vfio_iommu_common.c > > @@ -0,0 +1,235 @@ > > +/* > > + * VFIO: Common code for vfio IOMMU support > > + * > > + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. > > + * Author: Alex Williamson <alex.williamson@redhat.com> > > + * Author: Bharat Bhushan <bharat.bhushan@freescale.com> > > + * > > + * This program is free software; you can redistribute it and/or > > +modify > > + * it under the terms of the GNU General Public License version 2 as > > + * published by the Free Software Foundation. > > + * > > + * Derived from original vfio: > > + * Copyright 2010 Cisco Systems, Inc. All rights reserved. > > + * Author: Tom Lyon, pugs@cisco.com > > + */ > > + > > +#include <linux/compat.h> > > +#include <linux/device.h> > > +#include <linux/fs.h> > > +#include <linux/iommu.h> > > +#include <linux/module.h> > > +#include <linux/mm.h> > > +#include <linux/pci.h> /* pci_bus_type */ > > +#include <linux/rbtree.h> > > +#include <linux/sched.h> > > +#include <linux/slab.h> > > +#include <linux/uaccess.h> > > +#include <linux/vfio.h> > > +#include <linux/workqueue.h> > > Please cleanup includes on both the source and target files. You obviously > don't need linux/pci.h here for one. Will do. > > > + > > +static bool disable_hugepages; > > +module_param_named(disable_hugepages, > > + disable_hugepages, bool, S_IRUGO | S_IWUSR); > > +MODULE_PARM_DESC(disable_hugepages, > > + "Disable VFIO IOMMU support for IOMMU hugepages."); > > + > > +struct vwork { > > + struct mm_struct *mm; > > + long npage; > > + struct work_struct work; > > +}; > > + > > +/* delayed decrement/increment for locked_vm */ void > > +vfio_lock_acct_bg(struct work_struct *work) { > > + struct vwork *vwork = container_of(work, struct vwork, work); > > + struct mm_struct *mm; > > + > > + mm = vwork->mm; > > + down_write(&mm->mmap_sem); > > + mm->locked_vm += vwork->npage; > > + up_write(&mm->mmap_sem); > > + mmput(mm); > > + kfree(vwork); > > +} > > + > > +void vfio_lock_acct(long npage) > > +{ > > + struct vwork *vwork; > > + struct mm_struct *mm; > > + > > + if (!current->mm || !npage) > > + return; /* process exited or nothing to do */ > > + > > + if (down_write_trylock(¤t->mm->mmap_sem)) { > > + current->mm->locked_vm += npage; > > + up_write(¤t->mm->mmap_sem); > > + return; > > + } > > + > > + /* > > + * Couldn't get mmap_sem lock, so must setup to update > > + * mm->locked_vm later. If locked_vm were atomic, we > > + * wouldn't need this silliness > > + */ > > + vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); > > + if (!vwork) > > + return; > > + mm = get_task_mm(current); > > + if (!mm) { > > + kfree(vwork); > > + return; > > + } > > + INIT_WORK(&vwork->work, vfio_lock_acct_bg); > > + vwork->mm = mm; > > + vwork->npage = npage; > > + schedule_work(&vwork->work); > > +} > > + > > +/* > > + * Some mappings aren't backed by a struct page, for example an > > +mmap'd > > + * MMIO range for our own or another device. These use a different > > + * pfn conversion and shouldn't be tracked as locked pages. > > + */ > > +bool is_invalid_reserved_pfn(unsigned long pfn) { > > + if (pfn_valid(pfn)) { > > + bool reserved; > > + struct page *tail = pfn_to_page(pfn); > > + struct page *head = compound_trans_head(tail); > > + reserved = !!(PageReserved(head)); > > + if (head != tail) { > > + /* > > + * "head" is not a dangling pointer > > + * (compound_trans_head takes care of that) > > + * but the hugepage may have been split > > + * from under us (and we may not hold a > > + * reference count on the head page so it can > > + * be reused before we run PageReferenced), so > > + * we've to check PageTail before returning > > + * what we just read. > > + */ > > + smp_rmb(); > > + if (PageTail(tail)) > > + return reserved; > > + } > > + return PageReserved(tail); > > + } > > + > > + return true; > > +} > > + > > +int put_pfn(unsigned long pfn, int prot) { > > + if (!is_invalid_reserved_pfn(pfn)) { > > + struct page *page = pfn_to_page(pfn); > > + if (prot & IOMMU_WRITE) > > + SetPageDirty(page); > > + put_page(page); > > + return 1; > > + } > > + return 0; > > +} > > + > > +static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long > > +*pfn) { > > + struct page *page[1]; > > + struct vm_area_struct *vma; > > + int ret = -EFAULT; > > + > > + if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { > > + *pfn = page_to_pfn(page[0]); > > + return 0; > > + } > > + > > + printk("via vma\n"); > > + down_read(¤t->mm->mmap_sem); > > + > > + vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); > > + > > + if (vma && vma->vm_flags & VM_PFNMAP) { > > + *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; > > + if (is_invalid_reserved_pfn(*pfn)) > > + ret = 0; > > + } > > + > > + up_read(¤t->mm->mmap_sem); > > + > > + return ret; > > +} > > + > > +/* > > + * Attempt to pin pages. We really don't want to track all the pfns > > +and > > + * the iommu can only map chunks of consecutive pfns anyway, so get > > +the > > + * first page and all consecutive pages with the same locking. > > + */ > > +long vfio_pin_pages(unsigned long vaddr, long npage, > > + int prot, unsigned long *pfn_base) { > > + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > > + bool lock_cap = capable(CAP_IPC_LOCK); > > + long ret, i; > > + > > + if (!current->mm) > > + return -ENODEV; > > + > > + ret = vaddr_get_pfn(vaddr, prot, pfn_base); > > + if (ret) > > + return ret; > > + > > + if (is_invalid_reserved_pfn(*pfn_base)) > > + return 1; > > + > > + if (!lock_cap && current->mm->locked_vm + 1 > limit) { > > + put_pfn(*pfn_base, prot); > > + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, > > + limit << PAGE_SHIFT); > > + return -ENOMEM; > > + } > > + > > + if (unlikely(disable_hugepages)) { > > + vfio_lock_acct(1); > > + return 1; > > + } > > + > > + /* Lock all the consecutive pages from pfn_base */ > > + for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { > > + unsigned long pfn = 0; > > + > > + ret = vaddr_get_pfn(vaddr, prot, &pfn); > > + if (ret) > > + break; > > + > > + if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { > > + put_pfn(pfn, prot); > > + break; > > + } > > + > > + if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { > > + put_pfn(pfn, prot); > > + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", > > + __func__, limit << PAGE_SHIFT); > > + break; > > + } > > + } > > + > > + vfio_lock_acct(i); > > + > > + return i; > > +} > > + > > +long vfio_unpin_pages(unsigned long pfn, long npage, > > + int prot, bool do_accounting) { > > + unsigned long unlocked = 0; > > + long i; > > + > > + for (i = 0; i < npage; i++) > > + unlocked += put_pfn(pfn++, prot); > > + > > + if (do_accounting) > > + vfio_lock_acct(-unlocked); > > + > > + return unlocked; > > +} > > diff --git a/drivers/vfio/vfio_iommu_common.h > > b/drivers/vfio/vfio_iommu_common.h > > new file mode 100644 > > index 0000000..4738391 > > --- /dev/null > > +++ b/drivers/vfio/vfio_iommu_common.h > > @@ -0,0 +1,30 @@ > > +/* > > + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. > > + * Copyright (C) 2013 Freescale Semiconductor, Inc. > > + * > > + * This program is free software; you can redistribute it and/or > > +modify it > > + * under the terms of the GNU General Public License version 2 as > > +published > > + * by the Free Software Foundation. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with this program; if not, write to the Free Software > > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA > > +02111-1307 USA */ > > + > > +#ifndef _VFIO_IOMMU_COMMON_H > > +#define _VFIO_IOMMU_COMMON_H > > + > > +void vfio_lock_acct_bg(struct work_struct *work); > > Does this need to be exposed? No, will make this and other which does not need to be exposed as static to this file Thanks -Bharat > > > +void vfio_lock_acct(long npage); > > +bool is_invalid_reserved_pfn(unsigned long pfn); int put_pfn(unsigned > > +long pfn, int prot); > > Why are these exposed, they only seem to be used by functions in the new common > file. > > > +long vfio_pin_pages(unsigned long vaddr, long npage, int prot, > > + unsigned long *pfn_base); > > +long vfio_unpin_pages(unsigned long pfn, long npage, > > + int prot, bool do_accounting); > > Can we get by with just these two and vfio_lock_acct()? > > > +#endif > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > b/drivers/vfio/vfio_iommu_type1.c index a9807de..e9a58fa 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -37,6 +37,7 @@ > > #include <linux/uaccess.h> > > #include <linux/vfio.h> > > #include <linux/workqueue.h> > > +#include "vfio_iommu_common.h" > > > > #define DRIVER_VERSION "0.2" > > #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" > > @@ -48,12 +49,6 @@ module_param_named(allow_unsafe_interrupts, > > MODULE_PARM_DESC(allow_unsafe_interrupts, > > "Enable VFIO IOMMU support for on platforms without interrupt > > remapping support."); > > > > -static bool disable_hugepages; > > -module_param_named(disable_hugepages, > > - disable_hugepages, bool, S_IRUGO | S_IWUSR); > > -MODULE_PARM_DESC(disable_hugepages, > > - "Disable VFIO IOMMU support for IOMMU hugepages."); > > - > > struct vfio_iommu { > > struct iommu_domain *domain; > > struct mutex lock; > > @@ -123,205 +118,6 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, > struct vfio_dma *old) > > rb_erase(&old->node, &iommu->dma_list); } > > > > -struct vwork { > > - struct mm_struct *mm; > > - long npage; > > - struct work_struct work; > > -}; > > - > > -/* delayed decrement/increment for locked_vm */ -static void > > vfio_lock_acct_bg(struct work_struct *work) -{ > > - struct vwork *vwork = container_of(work, struct vwork, work); > > - struct mm_struct *mm; > > - > > - mm = vwork->mm; > > - down_write(&mm->mmap_sem); > > - mm->locked_vm += vwork->npage; > > - up_write(&mm->mmap_sem); > > - mmput(mm); > > - kfree(vwork); > > -} > > - > > -static void vfio_lock_acct(long npage) -{ > > - struct vwork *vwork; > > - struct mm_struct *mm; > > - > > - if (!current->mm || !npage) > > - return; /* process exited or nothing to do */ > > - > > - if (down_write_trylock(¤t->mm->mmap_sem)) { > > - current->mm->locked_vm += npage; > > - up_write(¤t->mm->mmap_sem); > > - return; > > - } > > - > > - /* > > - * Couldn't get mmap_sem lock, so must setup to update > > - * mm->locked_vm later. If locked_vm were atomic, we > > - * wouldn't need this silliness > > - */ > > - vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); > > - if (!vwork) > > - return; > > - mm = get_task_mm(current); > > - if (!mm) { > > - kfree(vwork); > > - return; > > - } > > - INIT_WORK(&vwork->work, vfio_lock_acct_bg); > > - vwork->mm = mm; > > - vwork->npage = npage; > > - schedule_work(&vwork->work); > > -} > > - > > -/* > > - * Some mappings aren't backed by a struct page, for example an > > mmap'd > > - * MMIO range for our own or another device. These use a different > > - * pfn conversion and shouldn't be tracked as locked pages. > > - */ > > -static bool is_invalid_reserved_pfn(unsigned long pfn) -{ > > - if (pfn_valid(pfn)) { > > - bool reserved; > > - struct page *tail = pfn_to_page(pfn); > > - struct page *head = compound_trans_head(tail); > > - reserved = !!(PageReserved(head)); > > - if (head != tail) { > > - /* > > - * "head" is not a dangling pointer > > - * (compound_trans_head takes care of that) > > - * but the hugepage may have been split > > - * from under us (and we may not hold a > > - * reference count on the head page so it can > > - * be reused before we run PageReferenced), so > > - * we've to check PageTail before returning > > - * what we just read. > > - */ > > - smp_rmb(); > > - if (PageTail(tail)) > > - return reserved; > > - } > > - return PageReserved(tail); > > - } > > - > > - return true; > > -} > > - > > -static int put_pfn(unsigned long pfn, int prot) -{ > > - if (!is_invalid_reserved_pfn(pfn)) { > > - struct page *page = pfn_to_page(pfn); > > - if (prot & IOMMU_WRITE) > > - SetPageDirty(page); > > - put_page(page); > > - return 1; > > - } > > - return 0; > > -} > > - > > -static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long > > *pfn) -{ > > - struct page *page[1]; > > - struct vm_area_struct *vma; > > - int ret = -EFAULT; > > - > > - if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { > > - *pfn = page_to_pfn(page[0]); > > - return 0; > > - } > > - > > - down_read(¤t->mm->mmap_sem); > > - > > - vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); > > - > > - if (vma && vma->vm_flags & VM_PFNMAP) { > > - *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; > > - if (is_invalid_reserved_pfn(*pfn)) > > - ret = 0; > > - } > > - > > - up_read(¤t->mm->mmap_sem); > > - > > - return ret; > > -} > > - > > -/* > > - * Attempt to pin pages. We really don't want to track all the pfns > > and > > - * the iommu can only map chunks of consecutive pfns anyway, so get > > the > > - * first page and all consecutive pages with the same locking. > > - */ > > -static long vfio_pin_pages(unsigned long vaddr, long npage, > > - int prot, unsigned long *pfn_base) > > -{ > > - unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > > - bool lock_cap = capable(CAP_IPC_LOCK); > > - long ret, i; > > - > > - if (!current->mm) > > - return -ENODEV; > > - > > - ret = vaddr_get_pfn(vaddr, prot, pfn_base); > > - if (ret) > > - return ret; > > - > > - if (is_invalid_reserved_pfn(*pfn_base)) > > - return 1; > > - > > - if (!lock_cap && current->mm->locked_vm + 1 > limit) { > > - put_pfn(*pfn_base, prot); > > - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, > > - limit << PAGE_SHIFT); > > - return -ENOMEM; > > - } > > - > > - if (unlikely(disable_hugepages)) { > > - vfio_lock_acct(1); > > - return 1; > > - } > > - > > - /* Lock all the consecutive pages from pfn_base */ > > - for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { > > - unsigned long pfn = 0; > > - > > - ret = vaddr_get_pfn(vaddr, prot, &pfn); > > - if (ret) > > - break; > > - > > - if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { > > - put_pfn(pfn, prot); > > - break; > > - } > > - > > - if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { > > - put_pfn(pfn, prot); > > - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", > > - __func__, limit << PAGE_SHIFT); > > - break; > > - } > > - } > > - > > - vfio_lock_acct(i); > > - > > - return i; > > -} > > - > > -static long vfio_unpin_pages(unsigned long pfn, long npage, > > - int prot, bool do_accounting) > > -{ > > - unsigned long unlocked = 0; > > - long i; > > - > > - for (i = 0; i < npage; i++) > > - unlocked += put_pfn(pfn++, prot); > > - > > - if (do_accounting) > > - vfio_lock_acct(-unlocked); > > - > > - return unlocked; > > -} > > - > > static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, > > dma_addr_t iova, size_t *size) { > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body > of a message to majordomo@vger.kernel.org More majordomo info at > http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 72bfabc..c5792ec 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_VFIO) += vfio.o -obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o +obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_common.o vfio_iommu_type1.o +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_common.o vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/vfio_iommu_common.c b/drivers/vfio/vfio_iommu_common.c new file mode 100644 index 0000000..8bdc0ea --- /dev/null +++ b/drivers/vfio/vfio_iommu_common.c @@ -0,0 +1,235 @@ +/* + * VFIO: Common code for vfio IOMMU support + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * Author: Bharat Bhushan <bharat.bhushan@freescale.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +#include <linux/compat.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/pci.h> /* pci_bus_type */ +#include <linux/rbtree.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> +#include <linux/workqueue.h> + +static bool disable_hugepages; +module_param_named(disable_hugepages, + disable_hugepages, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(disable_hugepages, + "Disable VFIO IOMMU support for IOMMU hugepages."); + +struct vwork { + struct mm_struct *mm; + long npage; + struct work_struct work; +}; + +/* delayed decrement/increment for locked_vm */ +void vfio_lock_acct_bg(struct work_struct *work) +{ + struct vwork *vwork = container_of(work, struct vwork, work); + struct mm_struct *mm; + + mm = vwork->mm; + down_write(&mm->mmap_sem); + mm->locked_vm += vwork->npage; + up_write(&mm->mmap_sem); + mmput(mm); + kfree(vwork); +} + +void vfio_lock_acct(long npage) +{ + struct vwork *vwork; + struct mm_struct *mm; + + if (!current->mm || !npage) + return; /* process exited or nothing to do */ + + if (down_write_trylock(¤t->mm->mmap_sem)) { + current->mm->locked_vm += npage; + up_write(¤t->mm->mmap_sem); + return; + } + + /* + * Couldn't get mmap_sem lock, so must setup to update + * mm->locked_vm later. If locked_vm were atomic, we + * wouldn't need this silliness + */ + vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); + if (!vwork) + return; + mm = get_task_mm(current); + if (!mm) { + kfree(vwork); + return; + } + INIT_WORK(&vwork->work, vfio_lock_acct_bg); + vwork->mm = mm; + vwork->npage = npage; + schedule_work(&vwork->work); +} + +/* + * Some mappings aren't backed by a struct page, for example an mmap'd + * MMIO range for our own or another device. These use a different + * pfn conversion and shouldn't be tracked as locked pages. + */ +bool is_invalid_reserved_pfn(unsigned long pfn) +{ + if (pfn_valid(pfn)) { + bool reserved; + struct page *tail = pfn_to_page(pfn); + struct page *head = compound_trans_head(tail); + reserved = !!(PageReserved(head)); + if (head != tail) { + /* + * "head" is not a dangling pointer + * (compound_trans_head takes care of that) + * but the hugepage may have been split + * from under us (and we may not hold a + * reference count on the head page so it can + * be reused before we run PageReferenced), so + * we've to check PageTail before returning + * what we just read. + */ + smp_rmb(); + if (PageTail(tail)) + return reserved; + } + return PageReserved(tail); + } + + return true; +} + +int put_pfn(unsigned long pfn, int prot) +{ + if (!is_invalid_reserved_pfn(pfn)) { + struct page *page = pfn_to_page(pfn); + if (prot & IOMMU_WRITE) + SetPageDirty(page); + put_page(page); + return 1; + } + return 0; +} + +static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) +{ + struct page *page[1]; + struct vm_area_struct *vma; + int ret = -EFAULT; + + if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { + *pfn = page_to_pfn(page[0]); + return 0; + } + + printk("via vma\n"); + down_read(¤t->mm->mmap_sem); + + vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); + + if (vma && vma->vm_flags & VM_PFNMAP) { + *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + if (is_invalid_reserved_pfn(*pfn)) + ret = 0; + } + + up_read(¤t->mm->mmap_sem); + + return ret; +} + +/* + * Attempt to pin pages. We really don't want to track all the pfns and + * the iommu can only map chunks of consecutive pfns anyway, so get the + * first page and all consecutive pages with the same locking. + */ +long vfio_pin_pages(unsigned long vaddr, long npage, + int prot, unsigned long *pfn_base) +{ + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + bool lock_cap = capable(CAP_IPC_LOCK); + long ret, i; + + if (!current->mm) + return -ENODEV; + + ret = vaddr_get_pfn(vaddr, prot, pfn_base); + if (ret) + return ret; + + if (is_invalid_reserved_pfn(*pfn_base)) + return 1; + + if (!lock_cap && current->mm->locked_vm + 1 > limit) { + put_pfn(*pfn_base, prot); + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, + limit << PAGE_SHIFT); + return -ENOMEM; + } + + if (unlikely(disable_hugepages)) { + vfio_lock_acct(1); + return 1; + } + + /* Lock all the consecutive pages from pfn_base */ + for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { + unsigned long pfn = 0; + + ret = vaddr_get_pfn(vaddr, prot, &pfn); + if (ret) + break; + + if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { + put_pfn(pfn, prot); + break; + } + + if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { + put_pfn(pfn, prot); + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", + __func__, limit << PAGE_SHIFT); + break; + } + } + + vfio_lock_acct(i); + + return i; +} + +long vfio_unpin_pages(unsigned long pfn, long npage, + int prot, bool do_accounting) +{ + unsigned long unlocked = 0; + long i; + + for (i = 0; i < npage; i++) + unlocked += put_pfn(pfn++, prot); + + if (do_accounting) + vfio_lock_acct(-unlocked); + + return unlocked; +} diff --git a/drivers/vfio/vfio_iommu_common.h b/drivers/vfio/vfio_iommu_common.h new file mode 100644 index 0000000..4738391 --- /dev/null +++ b/drivers/vfio/vfio_iommu_common.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _VFIO_IOMMU_COMMON_H +#define _VFIO_IOMMU_COMMON_H + +void vfio_lock_acct_bg(struct work_struct *work); +void vfio_lock_acct(long npage); +bool is_invalid_reserved_pfn(unsigned long pfn); +int put_pfn(unsigned long pfn, int prot); +long vfio_pin_pages(unsigned long vaddr, long npage, int prot, + unsigned long *pfn_base); +long vfio_unpin_pages(unsigned long pfn, long npage, + int prot, bool do_accounting); +#endif diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index a9807de..e9a58fa 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -37,6 +37,7 @@ #include <linux/uaccess.h> #include <linux/vfio.h> #include <linux/workqueue.h> +#include "vfio_iommu_common.h" #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" @@ -48,12 +49,6 @@ module_param_named(allow_unsafe_interrupts, MODULE_PARM_DESC(allow_unsafe_interrupts, "Enable VFIO IOMMU support for on platforms without interrupt remapping support."); -static bool disable_hugepages; -module_param_named(disable_hugepages, - disable_hugepages, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(disable_hugepages, - "Disable VFIO IOMMU support for IOMMU hugepages."); - struct vfio_iommu { struct iommu_domain *domain; struct mutex lock; @@ -123,205 +118,6 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *old) rb_erase(&old->node, &iommu->dma_list); } -struct vwork { - struct mm_struct *mm; - long npage; - struct work_struct work; -}; - -/* delayed decrement/increment for locked_vm */ -static void vfio_lock_acct_bg(struct work_struct *work) -{ - struct vwork *vwork = container_of(work, struct vwork, work); - struct mm_struct *mm; - - mm = vwork->mm; - down_write(&mm->mmap_sem); - mm->locked_vm += vwork->npage; - up_write(&mm->mmap_sem); - mmput(mm); - kfree(vwork); -} - -static void vfio_lock_acct(long npage) -{ - struct vwork *vwork; - struct mm_struct *mm; - - if (!current->mm || !npage) - return; /* process exited or nothing to do */ - - if (down_write_trylock(¤t->mm->mmap_sem)) { - current->mm->locked_vm += npage; - up_write(¤t->mm->mmap_sem); - return; - } - - /* - * Couldn't get mmap_sem lock, so must setup to update - * mm->locked_vm later. If locked_vm were atomic, we - * wouldn't need this silliness - */ - vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); - if (!vwork) - return; - mm = get_task_mm(current); - if (!mm) { - kfree(vwork); - return; - } - INIT_WORK(&vwork->work, vfio_lock_acct_bg); - vwork->mm = mm; - vwork->npage = npage; - schedule_work(&vwork->work); -} - -/* - * Some mappings aren't backed by a struct page, for example an mmap'd - * MMIO range for our own or another device. These use a different - * pfn conversion and shouldn't be tracked as locked pages. - */ -static bool is_invalid_reserved_pfn(unsigned long pfn) -{ - if (pfn_valid(pfn)) { - bool reserved; - struct page *tail = pfn_to_page(pfn); - struct page *head = compound_trans_head(tail); - reserved = !!(PageReserved(head)); - if (head != tail) { - /* - * "head" is not a dangling pointer - * (compound_trans_head takes care of that) - * but the hugepage may have been split - * from under us (and we may not hold a - * reference count on the head page so it can - * be reused before we run PageReferenced), so - * we've to check PageTail before returning - * what we just read. - */ - smp_rmb(); - if (PageTail(tail)) - return reserved; - } - return PageReserved(tail); - } - - return true; -} - -static int put_pfn(unsigned long pfn, int prot) -{ - if (!is_invalid_reserved_pfn(pfn)) { - struct page *page = pfn_to_page(pfn); - if (prot & IOMMU_WRITE) - SetPageDirty(page); - put_page(page); - return 1; - } - return 0; -} - -static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) -{ - struct page *page[1]; - struct vm_area_struct *vma; - int ret = -EFAULT; - - if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { - *pfn = page_to_pfn(page[0]); - return 0; - } - - down_read(¤t->mm->mmap_sem); - - vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); - - if (vma && vma->vm_flags & VM_PFNMAP) { - *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (is_invalid_reserved_pfn(*pfn)) - ret = 0; - } - - up_read(¤t->mm->mmap_sem); - - return ret; -} - -/* - * Attempt to pin pages. We really don't want to track all the pfns and - * the iommu can only map chunks of consecutive pfns anyway, so get the - * first page and all consecutive pages with the same locking. - */ -static long vfio_pin_pages(unsigned long vaddr, long npage, - int prot, unsigned long *pfn_base) -{ - unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - bool lock_cap = capable(CAP_IPC_LOCK); - long ret, i; - - if (!current->mm) - return -ENODEV; - - ret = vaddr_get_pfn(vaddr, prot, pfn_base); - if (ret) - return ret; - - if (is_invalid_reserved_pfn(*pfn_base)) - return 1; - - if (!lock_cap && current->mm->locked_vm + 1 > limit) { - put_pfn(*pfn_base, prot); - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, - limit << PAGE_SHIFT); - return -ENOMEM; - } - - if (unlikely(disable_hugepages)) { - vfio_lock_acct(1); - return 1; - } - - /* Lock all the consecutive pages from pfn_base */ - for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) { - unsigned long pfn = 0; - - ret = vaddr_get_pfn(vaddr, prot, &pfn); - if (ret) - break; - - if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) { - put_pfn(pfn, prot); - break; - } - - if (!lock_cap && current->mm->locked_vm + i + 1 > limit) { - put_pfn(pfn, prot); - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", - __func__, limit << PAGE_SHIFT); - break; - } - } - - vfio_lock_acct(i); - - return i; -} - -static long vfio_unpin_pages(unsigned long pfn, long npage, - int prot, bool do_accounting) -{ - unsigned long unlocked = 0; - long i; - - for (i = 0; i < npage; i++) - unlocked += put_pfn(pfn++, prot); - - if (do_accounting) - vfio_lock_acct(-unlocked); - - return unlocked; -} - static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma_addr_t iova, size_t *size) {
Some function defined in vfio_iommu_type1.c were common and we want to use these for FSL IOMMU (PAMU) and iommu-none driver. So some of them are moved to vfio_iommu_common.c I think we can do more of that but we will take this step by step. Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com> --- drivers/vfio/Makefile | 4 +- drivers/vfio/vfio_iommu_common.c | 235 ++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio_iommu_common.h | 30 +++++ drivers/vfio/vfio_iommu_type1.c | 206 +--------------------------------- 4 files changed, 268 insertions(+), 207 deletions(-) create mode 100644 drivers/vfio/vfio_iommu_common.c create mode 100644 drivers/vfio/vfio_iommu_common.h