diff mbox

[v8,6/6] Add common functions for SET_IRQS and GET_REGION_INFO ioctls

Message ID 1476131317-358-7-git-send-email-kwankhede@nvidia.com
State New
Headers show

Commit Message

Kirti Wankhede Oct. 10, 2016, 8:28 p.m. UTC
Add common functions for SET_IRQS and to add capability buffer for
GET_REGION_INFO ioctls

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Change-Id: Id9e976a2c08b9b2b37da77dac4365ae8f6024b4a
---
 drivers/vfio/pci/vfio_pci.c | 103 +++++++++++++++------------------------
 drivers/vfio/vfio.c         | 116 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/vfio.h        |   7 +++
 3 files changed, 162 insertions(+), 64 deletions(-)

Comments

Alex Williamson Oct. 11, 2016, 11:18 p.m. UTC | #1
On Tue, 11 Oct 2016 01:58:37 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> Add common functions for SET_IRQS and to add capability buffer for
> GET_REGION_INFO ioctls

Clearly should be two (or more) separate patches since SET_IRQS and
REGION_INFO are unrelated changes.  Each of the two capabilities handled
could possibly be separate patches as well.

 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Signed-off-by: Neo Jia <cjia@nvidia.com>
> Change-Id: Id9e976a2c08b9b2b37da77dac4365ae8f6024b4a
> ---
>  drivers/vfio/pci/vfio_pci.c | 103 +++++++++++++++------------------------
>  drivers/vfio/vfio.c         | 116 ++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/vfio.h        |   7 +++
>  3 files changed, 162 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index 188b1ff03f5f..f312cbb0eebc 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -478,12 +478,12 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
>  }
>  
>  static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
> +				struct vfio_region_info *info,
>  				struct vfio_info_cap *caps)
>  {
> -	struct vfio_info_cap_header *header;
>  	struct vfio_region_info_cap_sparse_mmap *sparse;
>  	size_t end, size;
> -	int nr_areas = 2, i = 0;
> +	int nr_areas = 2, i = 0, ret;
>  
>  	end = pci_resource_len(vdev->pdev, vdev->msix_bar);
>  
> @@ -494,13 +494,10 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>  
>  	size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
>  
> -	header = vfio_info_cap_add(caps, size,
> -				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
> -	if (IS_ERR(header))
> -		return PTR_ERR(header);
> +	sparse = kzalloc(size, GFP_KERNEL);
> +	if (!sparse)
> +		return -ENOMEM;
>  
> -	sparse = container_of(header,
> -			      struct vfio_region_info_cap_sparse_mmap, header);
>  	sparse->nr_areas = nr_areas;
>  
>  	if (vdev->msix_offset & PAGE_MASK) {
> @@ -516,24 +513,14 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>  		i++;
>  	}
>  
> -	return 0;
> -}
> -
> -static int region_type_cap(struct vfio_pci_device *vdev,
> -			   struct vfio_info_cap *caps,
> -			   unsigned int type, unsigned int subtype)
> -{
> -	struct vfio_info_cap_header *header;
> -	struct vfio_region_info_cap_type *cap;
> +	info->flags |= VFIO_REGION_INFO_FLAG_CAPS;
>  
> -	header = vfio_info_cap_add(caps, sizeof(*cap),
> -				   VFIO_REGION_INFO_CAP_TYPE, 1);
> -	if (IS_ERR(header))
> -		return PTR_ERR(header);
> +	ret = vfio_info_add_capability(info, caps,
> +				      VFIO_REGION_INFO_CAP_SPARSE_MMAP, sparse);
> +	kfree(sparse);
>  
> -	cap = container_of(header, struct vfio_region_info_cap_type, header);
> -	cap->type = type;
> -	cap->subtype = subtype;
> +	if (ret)
> +		return ret;
>  
>  	return 0;

Just: return ret;

>  }
> @@ -628,7 +615,8 @@ static long vfio_pci_ioctl(void *device_data,
>  			    IORESOURCE_MEM && info.size >= PAGE_SIZE) {
>  				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
>  				if (info.index == vdev->msix_bar) {
> -					ret = msix_sparse_mmap_cap(vdev, &caps);
> +					ret = msix_sparse_mmap_cap(vdev, &info,
> +								   &caps);
>  					if (ret)
>  						return ret;
>  				}
> @@ -676,6 +664,9 @@ static long vfio_pci_ioctl(void *device_data,
>  
>  			break;
>  		default:
> +		{
> +			struct vfio_region_info_cap_type cap_type;
> +
>  			if (info.index >=
>  			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
>  				return -EINVAL;
> @@ -684,29 +675,26 @@ static long vfio_pci_ioctl(void *device_data,
>  
>  			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
>  			info.size = vdev->region[i].size;
> -			info.flags = vdev->region[i].flags;
> +			info.flags = vdev->region[i].flags |
> +				     VFIO_REGION_INFO_FLAG_CAPS;
>  
> -			ret = region_type_cap(vdev, &caps,
> -					      vdev->region[i].type,
> -					      vdev->region[i].subtype);
> +			cap_type.type = vdev->region[i].type;
> +			cap_type.subtype = vdev->region[i].subtype;
> +
> +			ret = vfio_info_add_capability(&info, &caps,
> +						      VFIO_REGION_INFO_CAP_TYPE,
> +						      &cap_type);
>  			if (ret)
>  				return ret;
> +
> +		}
>  		}
>  
> -		if (caps.size) {
> -			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
> -			if (info.argsz < sizeof(info) + caps.size) {
> -				info.argsz = sizeof(info) + caps.size;
> -				info.cap_offset = 0;
> -			} else {
> -				vfio_info_cap_shift(&caps, sizeof(info));
> -				if (copy_to_user((void __user *)arg +
> -						  sizeof(info), caps.buf,
> -						  caps.size)) {
> -					kfree(caps.buf);
> -					return -EFAULT;
> -				}
> -				info.cap_offset = sizeof(info);
> +		if (info.cap_offset) {
> +			if (copy_to_user((void __user *)arg + info.cap_offset,
> +					 caps.buf, caps.size)) {
> +				kfree(caps.buf);
> +				return -EFAULT;
>  			}
>  
>  			kfree(caps.buf);
> @@ -754,35 +742,22 @@ static long vfio_pci_ioctl(void *device_data,
>  	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
>  		struct vfio_irq_set hdr;
>  		u8 *data = NULL;
> -		int ret = 0;
> +		int max, ret = 0, data_size = 0;
>  
>  		minsz = offsetofend(struct vfio_irq_set, count);
>  
>  		if (copy_from_user(&hdr, (void __user *)arg, minsz))
>  			return -EFAULT;
>  
> -		if (hdr.argsz < minsz || hdr.index >= VFIO_PCI_NUM_IRQS ||
> -		    hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
> -				  VFIO_IRQ_SET_ACTION_TYPE_MASK))
> -			return -EINVAL;
> -
> -		if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
> -			size_t size;
> -			int max = vfio_pci_get_irq_count(vdev, hdr.index);
> +		max = vfio_pci_get_irq_count(vdev, hdr.index);
>  
> -			if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL)
> -				size = sizeof(uint8_t);
> -			else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD)
> -				size = sizeof(int32_t);
> -			else
> -				return -EINVAL;
> -
> -			if (hdr.argsz - minsz < hdr.count * size ||
> -			    hdr.start >= max || hdr.start + hdr.count > max)
> -				return -EINVAL;


vfio_platform has very similar code that would also need to be updated.

> +		ret = vfio_set_irqs_validate_and_prepare(&hdr, max, &data_size);
> +		if (ret)
> +			return ret;
>  
> +		if (data_size) {
>  			data = memdup_user((void __user *)(arg + minsz),
> -					   hdr.count * size);
> +					    data_size);
>  			if (IS_ERR(data))
>  				return PTR_ERR(data);
>  		}
> @@ -790,7 +765,7 @@ static long vfio_pci_ioctl(void *device_data,
>  		mutex_lock(&vdev->igate);
>  
>  		ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
> -					      hdr.start, hdr.count, data);
> +				hdr.start, hdr.count, data);

White space bogosity.

>  
>  		mutex_unlock(&vdev->igate);
>  		kfree(data);
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index e3e342861e04..0185d5fb2c85 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1782,6 +1782,122 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>  }
>  EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
>  
> +static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
> +{
> +	struct vfio_info_cap_header *header;
> +	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
> +	size_t size;
> +
> +	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
> +	header = vfio_info_cap_add(caps, size,
> +				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
> +	if (IS_ERR(header))
> +		return PTR_ERR(header);
> +
> +	sparse_cap = container_of(header,
> +			struct vfio_region_info_cap_sparse_mmap, header);
> +	sparse_cap->nr_areas = sparse->nr_areas;
> +	memcpy(sparse_cap->areas, sparse->areas,
> +	       sparse->nr_areas * sizeof(*sparse->areas));
> +	return 0;
> +}
> +
> +static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
> +{
> +	struct vfio_info_cap_header *header;
> +	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
> +
> +	header = vfio_info_cap_add(caps, sizeof(*cap),
> +				   VFIO_REGION_INFO_CAP_TYPE, 1);
> +	if (IS_ERR(header))
> +		return PTR_ERR(header);
> +
> +	type_cap = container_of(header, struct vfio_region_info_cap_type,
> +				header);
> +	type_cap->type = cap->type;
> +	type_cap->subtype = cap->subtype;
> +	return 0;
> +}

Why can't we just do a memcpy of all the data past the header?  Do we
need separate functions for these?

vfio_info_cap_add() should now be static and unexported, right?

> +
> +int vfio_info_add_capability(struct vfio_region_info *info,
> +			     struct vfio_info_cap *caps,
> +			     int cap_type_id,
> +			     void *cap_type)
> +{
> +	int ret;
> +
> +	if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS) || !cap_type)

Why make the caller set flags, seems rather arbitrary since this
function controls the cap_offset and whether we actually end up copying
the data.

> +		return 0;
> +
> +	switch (cap_type_id) {
> +	case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
> +		ret = sparse_mmap_cap(caps, cap_type);
> +		if (ret)
> +			return ret;
> +		break;
> +
> +	case VFIO_REGION_INFO_CAP_TYPE:
> +		ret = region_type_cap(caps, cap_type);
> +		if (ret)
> +			return ret;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (caps->size) {
> +		if (info->argsz < sizeof(*info) + caps->size) {
> +			info->argsz = sizeof(*info) + caps->size;
> +			info->cap_offset = 0;
> +		} else {
> +			vfio_info_cap_shift(caps, sizeof(*info));
> +			info->cap_offset = sizeof(*info);
> +		}
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL(vfio_info_add_capability);
> +
> +int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
> +				       int *data_size)
> +{
> +	unsigned long minsz;
> +
> +	minsz = offsetofend(struct vfio_irq_set, count);
> +
> +	if ((hdr->argsz < minsz) || (hdr->index >= VFIO_PCI_NUM_IRQS) ||
> +	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
> +				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
> +		return -EINVAL;
> +
> +	if (data_size)
> +		*data_size = 0;
> +
> +	if (!(hdr->flags & VFIO_IRQ_SET_DATA_NONE)) {
> +		size_t size;
> +
> +		if (hdr->flags & VFIO_IRQ_SET_DATA_BOOL)
> +			size = sizeof(uint8_t);
> +		else if (hdr->flags & VFIO_IRQ_SET_DATA_EVENTFD)
> +			size = sizeof(int32_t);
> +		else
> +			return -EINVAL;
> +
> +		if ((hdr->argsz - minsz < hdr->count * size) ||
> +		    (hdr->start >= num_irqs) ||
> +		    (hdr->start + hdr->count > num_irqs))
> +			return -EINVAL;
> +
> +		if (!data_size)
> +			return -EINVAL;
> +
> +		*data_size = hdr->count * size;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
> +
>  static struct vfio_group *vfio_group_from_dev(struct device *dev)
>  {
>  	struct vfio_device *device;
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 0bd25ba6223d..5641dab72ded 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -108,6 +108,13 @@ extern struct vfio_info_cap_header *vfio_info_cap_add(
>  		struct vfio_info_cap *caps, size_t size, u16 id, u16 version);
>  extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
>  
> +extern int vfio_info_add_capability(struct vfio_region_info *info,
> +				    struct vfio_info_cap *caps,
> +				    int cap_type_id, void *cap_type);
> +
> +extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
> +					      int num_irqs, int *data_size);
> +
>  struct pci_dev;
>  #ifdef CONFIG_EEH
>  extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
Kirti Wankhede Oct. 12, 2016, 7:37 p.m. UTC | #2
On 10/12/2016 4:48 AM, Alex Williamson wrote:
> On Tue, 11 Oct 2016 01:58:37 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
>> Add common functions for SET_IRQS and to add capability buffer for
>> GET_REGION_INFO ioctls
> 
> Clearly should be two (or more) separate patches since SET_IRQS and
> REGION_INFO are unrelated changes.  Each of the two capabilities handled
> could possibly be separate patches as well.
> 

Ok. I'll have the two separated.

>  
...

>> @@ -754,35 +742,22 @@ static long vfio_pci_ioctl(void *device_data,
>>  	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
>>  		struct vfio_irq_set hdr;
>>  		u8 *data = NULL;
>> -		int ret = 0;
>> +		int max, ret = 0, data_size = 0;
>>  
>>  		minsz = offsetofend(struct vfio_irq_set, count);
>>  
>>  		if (copy_from_user(&hdr, (void __user *)arg, minsz))
>>  			return -EFAULT;
>>  
>> -		if (hdr.argsz < minsz || hdr.index >= VFIO_PCI_NUM_IRQS ||
>> -		    hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
>> -				  VFIO_IRQ_SET_ACTION_TYPE_MASK))
>> -			return -EINVAL;
>> -
>> -		if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
>> -			size_t size;
>> -			int max = vfio_pci_get_irq_count(vdev, hdr.index);
>> +		max = vfio_pci_get_irq_count(vdev, hdr.index);
>>  
>> -			if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL)
>> -				size = sizeof(uint8_t);
>> -			else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD)
>> -				size = sizeof(int32_t);
>> -			else
>> -				return -EINVAL;
>> -
>> -			if (hdr.argsz - minsz < hdr.count * size ||
>> -			    hdr.start >= max || hdr.start + hdr.count > max)
>> -				return -EINVAL;
> 
> 
> vfio_platform has very similar code that would also need to be updated.
>

Ok. Thanks for pointing that out. I'll update that too.


>> +		ret = vfio_set_irqs_validate_and_prepare(&hdr, max, &data_size);
>> +		if (ret)
>> +			return ret;
>>  
>> +		if (data_size) {
>>  			data = memdup_user((void __user *)(arg + minsz),
>> -					   hdr.count * size);
>> +					    data_size);
>>  			if (IS_ERR(data))
>>  				return PTR_ERR(data);
>>  		}
>> @@ -790,7 +765,7 @@ static long vfio_pci_ioctl(void *device_data,
>>  		mutex_lock(&vdev->igate);
>>  
>>  		ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
>> -					      hdr.start, hdr.count, data);
>> +				hdr.start, hdr.count, data);
> 
> White space bogosity.
> 
>>  
>>  		mutex_unlock(&vdev->igate);
>>  		kfree(data);
>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
>> index e3e342861e04..0185d5fb2c85 100644
>> --- a/drivers/vfio/vfio.c
>> +++ b/drivers/vfio/vfio.c
>> @@ -1782,6 +1782,122 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>>  }
>>  EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
>>  
>> +static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
>> +{
>> +	struct vfio_info_cap_header *header;
>> +	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
>> +	size_t size;
>> +
>> +	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
>> +	header = vfio_info_cap_add(caps, size,
>> +				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
>> +	if (IS_ERR(header))
>> +		return PTR_ERR(header);
>> +
>> +	sparse_cap = container_of(header,
>> +			struct vfio_region_info_cap_sparse_mmap, header);
>> +	sparse_cap->nr_areas = sparse->nr_areas;
>> +	memcpy(sparse_cap->areas, sparse->areas,
>> +	       sparse->nr_areas * sizeof(*sparse->areas));
>> +	return 0;
>> +}
>> +
>> +static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
>> +{
>> +	struct vfio_info_cap_header *header;
>> +	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
>> +
>> +	header = vfio_info_cap_add(caps, sizeof(*cap),
>> +				   VFIO_REGION_INFO_CAP_TYPE, 1);
>> +	if (IS_ERR(header))
>> +		return PTR_ERR(header);
>> +
>> +	type_cap = container_of(header, struct vfio_region_info_cap_type,
>> +				header);
>> +	type_cap->type = cap->type;
>> +	type_cap->subtype = cap->subtype;
>> +	return 0;
>> +}
> 
> Why can't we just do a memcpy of all the data past the header?  Do we
> need separate functions for these?
> 

In case of sparse_cap, data past header is variable, depends on
nr_areas. For region_type_cap, data is fixed. For both capabilities,
structures are different and id are different. I think we need seperate
functions.

> vfio_info_cap_add() should now be static and unexported, right?
> 

Yes.

>> +
>> +int vfio_info_add_capability(struct vfio_region_info *info,
>> +			     struct vfio_info_cap *caps,
>> +			     int cap_type_id,
>> +			     void *cap_type)
>> +{
>> +	int ret;
>> +
>> +	if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS) || !cap_type)
> 
> Why make the caller set flags, seems rather arbitrary since this
> function controls the cap_offset and whether we actually end up copying
> the data.
> 

Kept this flag to be set at caller side so that if caller sets this flag
it should also fill cap_type.
Yes, it could be moved in here, so in that case sanity check will be
only on !cap_type and based on this cap_type flag would be set.

Thanks,
Kirti
diff mbox

Patch

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 188b1ff03f5f..f312cbb0eebc 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -478,12 +478,12 @@  static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
 }
 
 static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
+				struct vfio_region_info *info,
 				struct vfio_info_cap *caps)
 {
-	struct vfio_info_cap_header *header;
 	struct vfio_region_info_cap_sparse_mmap *sparse;
 	size_t end, size;
-	int nr_areas = 2, i = 0;
+	int nr_areas = 2, i = 0, ret;
 
 	end = pci_resource_len(vdev->pdev, vdev->msix_bar);
 
@@ -494,13 +494,10 @@  static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
 
 	size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
 
-	header = vfio_info_cap_add(caps, size,
-				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
-	if (IS_ERR(header))
-		return PTR_ERR(header);
+	sparse = kzalloc(size, GFP_KERNEL);
+	if (!sparse)
+		return -ENOMEM;
 
-	sparse = container_of(header,
-			      struct vfio_region_info_cap_sparse_mmap, header);
 	sparse->nr_areas = nr_areas;
 
 	if (vdev->msix_offset & PAGE_MASK) {
@@ -516,24 +513,14 @@  static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
 		i++;
 	}
 
-	return 0;
-}
-
-static int region_type_cap(struct vfio_pci_device *vdev,
-			   struct vfio_info_cap *caps,
-			   unsigned int type, unsigned int subtype)
-{
-	struct vfio_info_cap_header *header;
-	struct vfio_region_info_cap_type *cap;
+	info->flags |= VFIO_REGION_INFO_FLAG_CAPS;
 
-	header = vfio_info_cap_add(caps, sizeof(*cap),
-				   VFIO_REGION_INFO_CAP_TYPE, 1);
-	if (IS_ERR(header))
-		return PTR_ERR(header);
+	ret = vfio_info_add_capability(info, caps,
+				      VFIO_REGION_INFO_CAP_SPARSE_MMAP, sparse);
+	kfree(sparse);
 
-	cap = container_of(header, struct vfio_region_info_cap_type, header);
-	cap->type = type;
-	cap->subtype = subtype;
+	if (ret)
+		return ret;
 
 	return 0;
 }
@@ -628,7 +615,8 @@  static long vfio_pci_ioctl(void *device_data,
 			    IORESOURCE_MEM && info.size >= PAGE_SIZE) {
 				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
 				if (info.index == vdev->msix_bar) {
-					ret = msix_sparse_mmap_cap(vdev, &caps);
+					ret = msix_sparse_mmap_cap(vdev, &info,
+								   &caps);
 					if (ret)
 						return ret;
 				}
@@ -676,6 +664,9 @@  static long vfio_pci_ioctl(void *device_data,
 
 			break;
 		default:
+		{
+			struct vfio_region_info_cap_type cap_type;
+
 			if (info.index >=
 			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
 				return -EINVAL;
@@ -684,29 +675,26 @@  static long vfio_pci_ioctl(void *device_data,
 
 			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
 			info.size = vdev->region[i].size;
-			info.flags = vdev->region[i].flags;
+			info.flags = vdev->region[i].flags |
+				     VFIO_REGION_INFO_FLAG_CAPS;
 
-			ret = region_type_cap(vdev, &caps,
-					      vdev->region[i].type,
-					      vdev->region[i].subtype);
+			cap_type.type = vdev->region[i].type;
+			cap_type.subtype = vdev->region[i].subtype;
+
+			ret = vfio_info_add_capability(&info, &caps,
+						      VFIO_REGION_INFO_CAP_TYPE,
+						      &cap_type);
 			if (ret)
 				return ret;
+
+		}
 		}
 
-		if (caps.size) {
-			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
-			if (info.argsz < sizeof(info) + caps.size) {
-				info.argsz = sizeof(info) + caps.size;
-				info.cap_offset = 0;
-			} else {
-				vfio_info_cap_shift(&caps, sizeof(info));
-				if (copy_to_user((void __user *)arg +
-						  sizeof(info), caps.buf,
-						  caps.size)) {
-					kfree(caps.buf);
-					return -EFAULT;
-				}
-				info.cap_offset = sizeof(info);
+		if (info.cap_offset) {
+			if (copy_to_user((void __user *)arg + info.cap_offset,
+					 caps.buf, caps.size)) {
+				kfree(caps.buf);
+				return -EFAULT;
 			}
 
 			kfree(caps.buf);
@@ -754,35 +742,22 @@  static long vfio_pci_ioctl(void *device_data,
 	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
 		struct vfio_irq_set hdr;
 		u8 *data = NULL;
-		int ret = 0;
+		int max, ret = 0, data_size = 0;
 
 		minsz = offsetofend(struct vfio_irq_set, count);
 
 		if (copy_from_user(&hdr, (void __user *)arg, minsz))
 			return -EFAULT;
 
-		if (hdr.argsz < minsz || hdr.index >= VFIO_PCI_NUM_IRQS ||
-		    hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
-				  VFIO_IRQ_SET_ACTION_TYPE_MASK))
-			return -EINVAL;
-
-		if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
-			size_t size;
-			int max = vfio_pci_get_irq_count(vdev, hdr.index);
+		max = vfio_pci_get_irq_count(vdev, hdr.index);
 
-			if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL)
-				size = sizeof(uint8_t);
-			else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD)
-				size = sizeof(int32_t);
-			else
-				return -EINVAL;
-
-			if (hdr.argsz - minsz < hdr.count * size ||
-			    hdr.start >= max || hdr.start + hdr.count > max)
-				return -EINVAL;
+		ret = vfio_set_irqs_validate_and_prepare(&hdr, max, &data_size);
+		if (ret)
+			return ret;
 
+		if (data_size) {
 			data = memdup_user((void __user *)(arg + minsz),
-					   hdr.count * size);
+					    data_size);
 			if (IS_ERR(data))
 				return PTR_ERR(data);
 		}
@@ -790,7 +765,7 @@  static long vfio_pci_ioctl(void *device_data,
 		mutex_lock(&vdev->igate);
 
 		ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
-					      hdr.start, hdr.count, data);
+				hdr.start, hdr.count, data);
 
 		mutex_unlock(&vdev->igate);
 		kfree(data);
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index e3e342861e04..0185d5fb2c85 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1782,6 +1782,122 @@  void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
 }
 EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
 
+static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
+	size_t size;
+
+	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
+	header = vfio_info_cap_add(caps, size,
+				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	sparse_cap = container_of(header,
+			struct vfio_region_info_cap_sparse_mmap, header);
+	sparse_cap->nr_areas = sparse->nr_areas;
+	memcpy(sparse_cap->areas, sparse->areas,
+	       sparse->nr_areas * sizeof(*sparse->areas));
+	return 0;
+}
+
+static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
+
+	header = vfio_info_cap_add(caps, sizeof(*cap),
+				   VFIO_REGION_INFO_CAP_TYPE, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	type_cap = container_of(header, struct vfio_region_info_cap_type,
+				header);
+	type_cap->type = cap->type;
+	type_cap->subtype = cap->subtype;
+	return 0;
+}
+
+int vfio_info_add_capability(struct vfio_region_info *info,
+			     struct vfio_info_cap *caps,
+			     int cap_type_id,
+			     void *cap_type)
+{
+	int ret;
+
+	if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS) || !cap_type)
+		return 0;
+
+	switch (cap_type_id) {
+	case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
+		ret = sparse_mmap_cap(caps, cap_type);
+		if (ret)
+			return ret;
+		break;
+
+	case VFIO_REGION_INFO_CAP_TYPE:
+		ret = region_type_cap(caps, cap_type);
+		if (ret)
+			return ret;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (caps->size) {
+		if (info->argsz < sizeof(*info) + caps->size) {
+			info->argsz = sizeof(*info) + caps->size;
+			info->cap_offset = 0;
+		} else {
+			vfio_info_cap_shift(caps, sizeof(*info));
+			info->cap_offset = sizeof(*info);
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(vfio_info_add_capability);
+
+int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
+				       int *data_size)
+{
+	unsigned long minsz;
+
+	minsz = offsetofend(struct vfio_irq_set, count);
+
+	if ((hdr->argsz < minsz) || (hdr->index >= VFIO_PCI_NUM_IRQS) ||
+	    (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
+				VFIO_IRQ_SET_ACTION_TYPE_MASK)))
+		return -EINVAL;
+
+	if (data_size)
+		*data_size = 0;
+
+	if (!(hdr->flags & VFIO_IRQ_SET_DATA_NONE)) {
+		size_t size;
+
+		if (hdr->flags & VFIO_IRQ_SET_DATA_BOOL)
+			size = sizeof(uint8_t);
+		else if (hdr->flags & VFIO_IRQ_SET_DATA_EVENTFD)
+			size = sizeof(int32_t);
+		else
+			return -EINVAL;
+
+		if ((hdr->argsz - minsz < hdr->count * size) ||
+		    (hdr->start >= num_irqs) ||
+		    (hdr->start + hdr->count > num_irqs))
+			return -EINVAL;
+
+		if (!data_size)
+			return -EINVAL;
+
+		*data_size = hdr->count * size;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
+
 static struct vfio_group *vfio_group_from_dev(struct device *dev)
 {
 	struct vfio_device *device;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 0bd25ba6223d..5641dab72ded 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -108,6 +108,13 @@  extern struct vfio_info_cap_header *vfio_info_cap_add(
 		struct vfio_info_cap *caps, size_t size, u16 id, u16 version);
 extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
 
+extern int vfio_info_add_capability(struct vfio_region_info *info,
+				    struct vfio_info_cap *caps,
+				    int cap_type_id, void *cap_type);
+
+extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
+					      int num_irqs, int *data_size);
+
 struct pci_dev;
 #ifdef CONFIG_EEH
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);