diff mbox

[v13,11/22] vfio iommu: Add blocking notifier to notify DMA_UNMAP

Message ID 20161115151950.1e8ab7d6@t450s.home
State New
Headers show

Commit Message

Alex Williamson Nov. 15, 2016, 10:19 p.m. UTC
On Tue, 15 Nov 2016 20:59:54 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers
> about DMA_UNMAP.
> Exported two APIs vfio_register_notifier() and vfio_unregister_notifier().
> Notifier should be registered, if external user wants to use
> vfio_pin_pages()/vfio_unpin_pages() APIs to pin/unpin pages.
> Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate
> mappings.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Signed-off-by: Neo Jia <cjia@nvidia.com>
> Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271
> ---
>  drivers/vfio/vfio.c             | 73 +++++++++++++++++++++++++++++++++++++++++
>  drivers/vfio/vfio_iommu_type1.c | 63 +++++++++++++++++++++++++++++------
>  include/linux/vfio.h            | 11 +++++++
>  3 files changed, 137 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 3bf8a01bf67b..fa121d983991 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1902,6 +1902,79 @@ err_unpin_pages:
>  }
>  EXPORT_SYMBOL(vfio_unpin_pages);
>  
> +int vfio_register_notifier(struct device *dev, struct notifier_block *nb)
> +{
> +	struct vfio_container *container;
> +	struct vfio_group *group;
> +	struct vfio_iommu_driver *driver;
> +	ssize_t ret;
> +
> +	if (!dev || !nb)
> +		return -EINVAL;
> +
> +	group = vfio_group_get_from_dev(dev);
> +	if (IS_ERR(group))
> +		return PTR_ERR(group);
> +
> +	ret = vfio_group_add_container_user(group);
> +	if (ret)
> +		goto err_register_nb;
> +
> +	container = group->container;
> +	down_read(&container->group_lock);
> +
> +	driver = container->iommu_driver;
> +	if (likely(driver && driver->ops->register_notifier))
> +		ret = driver->ops->register_notifier(container->iommu_data, nb);
> +	else
> +		ret = -ENOTTY;
> +
> +	up_read(&container->group_lock);
> +	vfio_group_try_dissolve_container(group);
> +
> +err_register_nb:
> +	vfio_group_put(group);
> +	return ret;
> +}
> +EXPORT_SYMBOL(vfio_register_notifier);
> +
> +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb)
> +{
> +	struct vfio_container *container;
> +	struct vfio_group *group;
> +	struct vfio_iommu_driver *driver;
> +	ssize_t ret;
> +
> +	if (!dev || !nb)
> +		return -EINVAL;
> +
> +	group = vfio_group_get_from_dev(dev);
> +	if (IS_ERR(group))
> +		return PTR_ERR(group);
> +
> +	ret = vfio_group_add_container_user(group);
> +	if (ret)
> +		goto err_unregister_nb;
> +
> +	container = group->container;
> +	down_read(&container->group_lock);
> +
> +	driver = container->iommu_driver;
> +	if (likely(driver && driver->ops->unregister_notifier))
> +		ret = driver->ops->unregister_notifier(container->iommu_data,
> +						       nb);
> +	else
> +		ret = -ENOTTY;
> +
> +	up_read(&container->group_lock);
> +	vfio_group_try_dissolve_container(group);
> +
> +err_unregister_nb:
> +	vfio_group_put(group);
> +	return ret;
> +}
> +EXPORT_SYMBOL(vfio_unregister_notifier);
> +
>  /**
>   * Module/class support
>   */
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 0de7c20f66b1..c45a4822784e 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -38,6 +38,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/pid_namespace.h>
>  #include <linux/mdev.h>
> +#include <linux/notifier.h>
>  
>  #define DRIVER_VERSION  "0.2"
>  #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
> @@ -60,6 +61,7 @@ struct vfio_iommu {
>  	struct vfio_domain	*external_domain; /* domain for external user */
>  	struct mutex		lock;
>  	struct rb_root		dma_list;
> +	struct blocking_notifier_head notifier;
>  	bool			v2;
>  	bool			nesting;
>  };
> @@ -571,7 +573,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
>  
>  	mutex_lock(&iommu->lock);
>  
> -	if (!iommu->external_domain) {
> +	/* Fail if notifier list is empty */
> +	if ((!iommu->external_domain) || (!iommu->notifier.head)) {
>  		ret = -EINVAL;
>  		goto pin_done;
>  	}
> @@ -854,7 +857,28 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>  		 */
>  		if (dma->task->mm != current->mm)
>  			break;
> +
>  		unmapped += dma->size;
> +
> +		if (iommu->external_domain && !RB_EMPTY_ROOT(&dma->pfn_list)) {
> +			struct vfio_iommu_type1_dma_unmap nb_unmap;
> +
> +			nb_unmap.iova = dma->iova;
> +			nb_unmap.size = dma->size;
> +
> +			/*
> +			 * Notifier callback would call vfio_unpin_pages() which
> +			 * would acquire iommu->lock. Release lock here and
> +			 * reacquire it again.
> +			 */
> +			mutex_unlock(&iommu->lock);
> +			blocking_notifier_call_chain(&iommu->notifier,
> +						    VFIO_IOMMU_NOTIFY_DMA_UNMAP,
> +						    &nb_unmap);
> +			mutex_lock(&iommu->lock);
> +			if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)))
> +				break;
> +		}


Why exactly do we need to notify per vfio_dma rather than per unmap
request?  If we do the latter we can send the notify first, limiting us
to races where a page is pinned between the notify and the locking,
whereas here, even our dma pointer is suspect once we re-acquire the
lock, we don't technically know if another unmap could have removed
that already.  Perhaps something like this (untested):



>  		vfio_remove_dma(iommu, dma);
>  	}
>  
> @@ -1439,6 +1463,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
>  	INIT_LIST_HEAD(&iommu->domain_list);
>  	iommu->dma_list = RB_ROOT;
>  	mutex_init(&iommu->lock);
> +	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
>  
>  	return iommu;
>  }
> @@ -1574,16 +1599,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>  	return -ENOTTY;
>  }
>  
> +static int vfio_iommu_type1_register_notifier(void *iommu_data,
> +					      struct notifier_block *nb)
> +{
> +	struct vfio_iommu *iommu = iommu_data;
> +
> +	return blocking_notifier_chain_register(&iommu->notifier, nb);
> +}
> +
> +static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
> +						struct notifier_block *nb)
> +{
> +	struct vfio_iommu *iommu = iommu_data;
> +
> +	return blocking_notifier_chain_unregister(&iommu->notifier, nb);
> +}
> +
>  static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
> -	.name		= "vfio-iommu-type1",
> -	.owner		= THIS_MODULE,
> -	.open		= vfio_iommu_type1_open,
> -	.release	= vfio_iommu_type1_release,
> -	.ioctl		= vfio_iommu_type1_ioctl,
> -	.attach_group	= vfio_iommu_type1_attach_group,
> -	.detach_group	= vfio_iommu_type1_detach_group,
> -	.pin_pages	= vfio_iommu_type1_pin_pages,
> -	.unpin_pages	= vfio_iommu_type1_unpin_pages,
> +	.name			= "vfio-iommu-type1",
> +	.owner			= THIS_MODULE,
> +	.open			= vfio_iommu_type1_open,
> +	.release		= vfio_iommu_type1_release,
> +	.ioctl			= vfio_iommu_type1_ioctl,
> +	.attach_group		= vfio_iommu_type1_attach_group,
> +	.detach_group		= vfio_iommu_type1_detach_group,
> +	.pin_pages		= vfio_iommu_type1_pin_pages,
> +	.unpin_pages		= vfio_iommu_type1_unpin_pages,
> +	.register_notifier	= vfio_iommu_type1_register_notifier,
> +	.unregister_notifier	= vfio_iommu_type1_unregister_notifier,
>  };
>  
>  static int __init vfio_iommu_type1_init(void)
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 420cdc928786..997442398c09 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops {
>  				     unsigned long *phys_pfn);
>  	int		(*unpin_pages)(void *iommu_data,
>  				       unsigned long *user_pfn, int npage);
> +	int		(*register_notifier)(void *iommu_data,
> +					     struct notifier_block *nb);
> +	int		(*unregister_notifier)(void *iommu_data,
> +					       struct notifier_block *nb);
>  };
>  
>  extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
> @@ -139,6 +143,13 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
>  extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
>  			    int npage);
>  
> +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP	1
> +
> +extern int vfio_register_notifier(struct device *dev,
> +				  struct notifier_block *nb);
> +
> +extern int vfio_unregister_notifier(struct device *dev,
> +				    struct notifier_block *nb);
>  /*
>   * IRQfd - generic
>   */

Comments

Kirti Wankhede Nov. 16, 2016, 2:46 a.m. UTC | #1
On 11/16/2016 3:49 AM, Alex Williamson wrote:
> On Tue, 15 Nov 2016 20:59:54 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
...

>> @@ -854,7 +857,28 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>>  		 */
>>  		if (dma->task->mm != current->mm)
>>  			break;
>> +
>>  		unmapped += dma->size;
>> +
>> +		if (iommu->external_domain && !RB_EMPTY_ROOT(&dma->pfn_list)) {
>> +			struct vfio_iommu_type1_dma_unmap nb_unmap;
>> +
>> +			nb_unmap.iova = dma->iova;
>> +			nb_unmap.size = dma->size;
>> +
>> +			/*
>> +			 * Notifier callback would call vfio_unpin_pages() which
>> +			 * would acquire iommu->lock. Release lock here and
>> +			 * reacquire it again.
>> +			 */
>> +			mutex_unlock(&iommu->lock);
>> +			blocking_notifier_call_chain(&iommu->notifier,
>> +						    VFIO_IOMMU_NOTIFY_DMA_UNMAP,
>> +						    &nb_unmap);
>> +			mutex_lock(&iommu->lock);
>> +			if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)))
>> +				break;
>> +		}
> 
> 
> Why exactly do we need to notify per vfio_dma rather than per unmap
> request?  If we do the latter we can send the notify first, limiting us
> to races where a page is pinned between the notify and the locking,
> whereas here, even our dma pointer is suspect once we re-acquire the
> lock, we don't technically know if another unmap could have removed
> that already.  Perhaps something like this (untested):
> 

There are checks to validate unmap request, like v2 check and who is
calling unmap and is it allowed for that task to unmap. Before these
checks its not sure that unmap region range which asked for would be
unmapped all. Notify call should be at the place where its sure that the
range provided to notify call is definitely going to be removed. My
change do that.

Thanks,
Kirti


> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index ee9a680..8504501 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -785,6 +785,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>  	struct vfio_dma *dma;
>  	size_t unmapped = 0;
>  	int ret = 0;
> +	struct vfio_iommu_type1_dma_unmap nb_unmap = { .iova = unmap->iova,
> +						       .size = unmap->size };
>  
>  	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
>  
> @@ -795,6 +797,14 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>  
>  	WARN_ON(mask & PAGE_MASK);
>  
> +	/*
> +	 * Notify anyone (mdev vendor drivers) to invalidate and unmap
> +	 * iovas within the range we're about to unmap.  Vendor drivers MUST
> +	 * unpin pages in response to an invalidation.
> +	 */
> +	blocking_notifier_call_chain(&iommu->notifier,
> +				     VFIO_IOMMU_NOTIFY_DMA_UNMAP, &nb_unmap);
> +
>  	mutex_lock(&iommu->lock);
>  
>  	/*
> @@ -853,25 +863,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>  
>  		unmapped += dma->size;
>  
> -		if (iommu->external_domain && !RB_EMPTY_ROOT(&dma->pfn_list)) {
> -			struct vfio_iommu_type1_dma_unmap nb_unmap;
> +		WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list));
>  
> -			nb_unmap.iova = dma->iova;
> -			nb_unmap.size = dma->size;
> -
> -			/*
> -			 * Notifier callback would call vfio_unpin_pages() which
> -			 * would acquire iommu->lock. Release lock here and
> -			 * reacquire it again.
> -			 */
> -			mutex_unlock(&iommu->lock);
> -			blocking_notifier_call_chain(&iommu->notifier,
> -						    VFIO_IOMMU_NOTIFY_DMA_UNMAP,
> -						    &nb_unmap);
> -			mutex_lock(&iommu->lock);
> -			if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)))
> -				break;
> -		}
>  		vfio_remove_dma(iommu, dma);
>  	}
>  
> 
> 
>>  		vfio_remove_dma(iommu, dma);
>>  	}
>>
diff mbox

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index ee9a680..8504501 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -785,6 +785,8 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 	struct vfio_dma *dma;
 	size_t unmapped = 0;
 	int ret = 0;
+	struct vfio_iommu_type1_dma_unmap nb_unmap = { .iova = unmap->iova,
+						       .size = unmap->size };
 
 	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
 
@@ -795,6 +797,14 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 
 	WARN_ON(mask & PAGE_MASK);
 
+	/*
+	 * Notify anyone (mdev vendor drivers) to invalidate and unmap
+	 * iovas within the range we're about to unmap.  Vendor drivers MUST
+	 * unpin pages in response to an invalidation.
+	 */
+	blocking_notifier_call_chain(&iommu->notifier,
+				     VFIO_IOMMU_NOTIFY_DMA_UNMAP, &nb_unmap);
+
 	mutex_lock(&iommu->lock);
 
 	/*
@@ -853,25 +863,8 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 
 		unmapped += dma->size;
 
-		if (iommu->external_domain && !RB_EMPTY_ROOT(&dma->pfn_list)) {
-			struct vfio_iommu_type1_dma_unmap nb_unmap;
+		WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list));
 
-			nb_unmap.iova = dma->iova;
-			nb_unmap.size = dma->size;
-
-			/*
-			 * Notifier callback would call vfio_unpin_pages() which
-			 * would acquire iommu->lock. Release lock here and
-			 * reacquire it again.
-			 */
-			mutex_unlock(&iommu->lock);
-			blocking_notifier_call_chain(&iommu->notifier,
-						    VFIO_IOMMU_NOTIFY_DMA_UNMAP,
-						    &nb_unmap);
-			mutex_lock(&iommu->lock);
-			if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)))
-				break;
-		}
 		vfio_remove_dma(iommu, dma);
 	}