diff mbox series

[v6,rdma-next,1/6] RDMA/core: Create mmap database and cookie helper functions

Message ID 20190709141735.19193-2-michal.kalderon@marvell.com
State Awaiting Upstream
Delegated to: David Miller
Headers show
Series RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA | expand

Commit Message

Michal Kalderon July 9, 2019, 2:17 p.m. UTC
Create some common API's for adding entries to a xa_mmap.
Searching for an entry and freeing one.

The code was copied from the efa driver almost as is, just renamed
function to be generic and not efa specific.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
---
 drivers/infiniband/core/device.c      |   1 +
 drivers/infiniband/core/rdma_core.c   |   1 +
 drivers/infiniband/core/uverbs_cmd.c  |   1 +
 drivers/infiniband/core/uverbs_main.c | 135 ++++++++++++++++++++++++++++++++++
 include/rdma/ib_verbs.h               |  46 ++++++++++++
 5 files changed, 184 insertions(+)

Comments

Gal Pressman July 10, 2019, 12:19 p.m. UTC | #1
On 09/07/2019 17:17, Michal Kalderon wrote:
> Create some common API's for adding entries to a xa_mmap.
> Searching for an entry and freeing one.
> 
> The code was copied from the efa driver almost as is, just renamed
> function to be generic and not efa specific.
> 
> Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
> Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>

Reviewed-by: Gal Pressman <galpress@amazon.com>
Jason Gunthorpe July 25, 2019, 5:55 p.m. UTC | #2
On Tue, Jul 09, 2019 at 05:17:30PM +0300, Michal Kalderon wrote:
> Create some common API's for adding entries to a xa_mmap.
> Searching for an entry and freeing one.
> 
> The code was copied from the efa driver almost as is, just renamed
> function to be generic and not efa specific.
> 
> Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
> Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
>  drivers/infiniband/core/device.c      |   1 +
>  drivers/infiniband/core/rdma_core.c   |   1 +
>  drivers/infiniband/core/uverbs_cmd.c  |   1 +
>  drivers/infiniband/core/uverbs_main.c | 135 ++++++++++++++++++++++++++++++++++
>  include/rdma/ib_verbs.h               |  46 ++++++++++++
>  5 files changed, 184 insertions(+)
> 
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 8a6ccb936dfe..a830c2c5d691 100644
> +++ b/drivers/infiniband/core/device.c
> @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
>  	SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
>  	SET_DEVICE_OP(dev_ops, map_phys_fmr);
>  	SET_DEVICE_OP(dev_ops, mmap);
> +	SET_DEVICE_OP(dev_ops, mmap_free);
>  	SET_DEVICE_OP(dev_ops, modify_ah);
>  	SET_DEVICE_OP(dev_ops, modify_cq);
>  	SET_DEVICE_OP(dev_ops, modify_device);
> diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
> index ccf4d069c25c..1ed01b02401f 100644
> +++ b/drivers/infiniband/core/rdma_core.c
> @@ -816,6 +816,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
>  
>  	rdma_restrack_del(&ucontext->res);
>  
> +	rdma_user_mmap_entries_remove_free(ucontext);
>  	ib_dev->ops.dealloc_ucontext(ucontext);
>  	kfree(ucontext);
>  
> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> index 7ddd0e5bc6b3..44c0600245e4 100644
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -254,6 +254,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
>  
>  	mutex_init(&ucontext->per_mm_list_lock);
>  	INIT_LIST_HEAD(&ucontext->per_mm_list);
> +	xa_init(&ucontext->mmap_xa);
>  
>  	ret = get_unused_fd_flags(O_CLOEXEC);
>  	if (ret < 0)
> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
> index 11c13c1381cf..4b909d7b97de 100644
> +++ b/drivers/infiniband/core/uverbs_main.c
> @@ -965,6 +965,141 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
>  }
>  EXPORT_SYMBOL(rdma_user_mmap_io);
>  
> +static inline u64
> +rdma_user_mmap_get_key(const struct rdma_user_mmap_entry *entry)
> +{
> +	return (u64)entry->mmap_page << PAGE_SHIFT;
> +}
> +
> +/**
> + * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa.
> + *
> + * @ucontext: associated user context.
> + * @key: The key received from rdma_user_mmap_entry_insert which
> + *     is provided by user as the address to map.
> + * @len: The length the user wants to map
> + *
> + * This function is called when a user tries to mmap a key it
> + * initially received from the driver. They key was created by
> + * the function rdma_user_mmap_entry_insert.
> + *
> + * Return an entry if exists or NULL if there is no match.
> + */
> +struct rdma_user_mmap_entry *
> +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
> +{
> +	struct rdma_user_mmap_entry *entry;
> +	u64 mmap_page;
> +
> +	mmap_page = key >> PAGE_SHIFT;
> +	if (mmap_page > U32_MAX)
> +		return NULL;
> +
> +	entry = xa_load(&ucontext->mmap_xa, mmap_page);
> +	if (!entry || entry->length != len)
> +		return NULL;
> +
> +	ibdev_dbg(ucontext->device,
> +		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
> +		  entry->obj, key, entry->address, entry->length);
> +
> +	return entry;
> +}
> +EXPORT_SYMBOL(rdma_user_mmap_entry_get);

It is a mistake we keep making, and maybe the war is hopelessly lost
now, but functions called from a driver should not be part of the
ib_uverbs module - ideally uverbs is an optional module. They should
be in ib_core.

Maybe put this in ib_core_uverbs.c ?

Kamal, you've been tackling various cleanups, maybe making ib_uverbs
unloadable again is something you'd be keen on?

> +/**
> + * rdma_user_mmap_entry_insert() - Allocate and insert an entry to the mmap_xa.
> + *
> + * @ucontext: associated user context.
> + * @obj: opaque driver object that will be stored in the entry.
> + * @address: The address that will be mmapped to the user
> + * @length: Length of the address that will be mmapped
> + * @mmap_flag: opaque driver flags related to the address (For
> + *           example could be used for cachability)
> + *
> + * This function should be called by drivers that use the rdma_user_mmap
> + * interface for handling user mmapped addresses. The database is handled in
> + * the core and helper functions are provided to insert entries into the
> + * database and extract entries when the user call mmap with the given key.
> + * The function returns a unique key that should be provided to user, the user
> + * will use the key to map the given address.
> + *
> + * Note this locking scheme cannot support removal of entries,
> + * except during ucontext destruction when the core code
> + * guarentees no concurrency.
> + *
> + * Return: unique key or RDMA_USER_MMAP_INVALID if entry was not added.
> + */
> +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
> +				u64 address, u64 length, u8 mmap_flag)
> +{
> +	struct rdma_user_mmap_entry *entry;
> +	u32 next_mmap_page;
> +	int err;
> +
> +	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> +	if (!entry)
> +		return RDMA_USER_MMAP_INVALID;
> +
> +	entry->obj = obj;
> +	entry->address = address;
> +	entry->length = length;
> +	entry->mmap_flag = mmap_flag;
> +
> +	xa_lock(&ucontext->mmap_xa);
> +	if (check_add_overflow(ucontext->mmap_xa_page,
> +			       (u32)(length >> PAGE_SHIFT),

Should this be divide round up ?

> +			       &next_mmap_page))
> +		goto err_unlock;

I still don't like that this algorithm latches into a permanent
failure when the xa_page wraps.

It seems worth spending a bit more time here to tidy this.. Keep using
the mmap_xa_page scheme, but instead do something like

alloc_cyclic_range():

while () {
   // Find first empty element in a cyclic way
   xa_page_first = mmap_xa_page;
   xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)

   // Is there a enough room to have the range?
   if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
      mmap_xa_page = 0;
      continue;
   }

   // See if the element before intersects 
   elm = xa_find(xa, &zero, xa_page_end, 0);
   if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm->last)) {
      mmap_xa_page = elm->last + 1;
      continue
   }
  
   // xa_page_first -> xa_page_end should now be free
   xa_insert(xa, xa_page_start, entry);
   mmap_xa_page = xa_page_end + 1;
   return xa_page_start;
}

Approximately, please check it.

> @@ -2199,6 +2201,17 @@ struct iw_cm_conn_param;
>  
>  #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
>  
> +#define RDMA_USER_MMAP_FLAG_SHIFT 56
> +#define RDMA_USER_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
> +#define RDMA_USER_MMAP_INVALID U64_MAX
> +struct rdma_user_mmap_entry {
> +	void *obj;
> +	u64 address;
> +	u64 length;
> +	u32 mmap_page;
> +	u8 mmap_flag;
> +};
> +
>  /**
>   * struct ib_device_ops - InfiniBand device operations
>   * This structure defines all the InfiniBand device operations, providers will
> @@ -2311,6 +2324,19 @@ struct ib_device_ops {
>  			      struct ib_udata *udata);
>  	void (*dealloc_ucontext)(struct ib_ucontext *context);
>  	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
> +	/**
> +	 * Memory that is mapped to the user can only be freed once the
> +	 * ucontext of the application is destroyed. This is for
> +	 * security reasons where we don't want an application to have a
> +	 * mapping to phyiscal memory that is freed and allocated to
> +	 * another application. For this reason, all the entries are
> +	 * stored in ucontext and once ucontext is freed mmap_free is
> +	 * called on each of the entries. They type of the memory that

They -> the

> +	 * was mapped may differ between entries and is opaque to the
> +	 * rdma_user_mmap interface. Therefore needs to be implemented
> +	 * by the driver in mmap_free.
> +	 */
> +	void (*mmap_free)(struct rdma_user_mmap_entry *entry);
>  	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
>  	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
>  	void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
> @@ -2709,6 +2735,11 @@ void ib_set_device_ops(struct ib_device *device,
>  #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
>  int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
>  		      unsigned long pfn, unsigned long size, pgprot_t prot);
> +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
> +				u64 address, u64 length, u8 mmap_flag);
> +struct rdma_user_mmap_entry *
> +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len);
> +void rdma_user_mmap_entries_remove_free(struct ib_ucontext
> *ucontext);

Should remove_free should be in the core-priv header?

Jason
Michal Kalderon July 25, 2019, 7:34 p.m. UTC | #3
> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> owner@vger.kernel.org> On Behalf Of Jason Gunthorpe
> 
> On Tue, Jul 09, 2019 at 05:17:30PM +0300, Michal Kalderon wrote:
> > Create some common API's for adding entries to a xa_mmap.
> > Searching for an entry and freeing one.
> >
> > The code was copied from the efa driver almost as is, just renamed
> > function to be generic and not efa specific.
> >
> > Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
> > Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
> >  drivers/infiniband/core/device.c      |   1 +
> >  drivers/infiniband/core/rdma_core.c   |   1 +
> >  drivers/infiniband/core/uverbs_cmd.c  |   1 +
> >  drivers/infiniband/core/uverbs_main.c | 135
> ++++++++++++++++++++++++++++++++++
> >  include/rdma/ib_verbs.h               |  46 ++++++++++++
> >  5 files changed, 184 insertions(+)
> >
> > diff --git a/drivers/infiniband/core/device.c
> > b/drivers/infiniband/core/device.c
> > index 8a6ccb936dfe..a830c2c5d691 100644
> > +++ b/drivers/infiniband/core/device.c
> > @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev,
> const struct ib_device_ops *ops)
> >  	SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
> >  	SET_DEVICE_OP(dev_ops, map_phys_fmr);
> >  	SET_DEVICE_OP(dev_ops, mmap);
> > +	SET_DEVICE_OP(dev_ops, mmap_free);
> >  	SET_DEVICE_OP(dev_ops, modify_ah);
> >  	SET_DEVICE_OP(dev_ops, modify_cq);
> >  	SET_DEVICE_OP(dev_ops, modify_device); diff --git
> > a/drivers/infiniband/core/rdma_core.c
> > b/drivers/infiniband/core/rdma_core.c
> > index ccf4d069c25c..1ed01b02401f 100644
> > +++ b/drivers/infiniband/core/rdma_core.c
> > @@ -816,6 +816,7 @@ static void ufile_destroy_ucontext(struct
> > ib_uverbs_file *ufile,
> >
> >  	rdma_restrack_del(&ucontext->res);
> >
> > +	rdma_user_mmap_entries_remove_free(ucontext);
> >  	ib_dev->ops.dealloc_ucontext(ucontext);
> >  	kfree(ucontext);
> >
> > diff --git a/drivers/infiniband/core/uverbs_cmd.c
> > b/drivers/infiniband/core/uverbs_cmd.c
> > index 7ddd0e5bc6b3..44c0600245e4 100644
> > +++ b/drivers/infiniband/core/uverbs_cmd.c
> > @@ -254,6 +254,7 @@ static int ib_uverbs_get_context(struct
> > uverbs_attr_bundle *attrs)
> >
> >  	mutex_init(&ucontext->per_mm_list_lock);
> >  	INIT_LIST_HEAD(&ucontext->per_mm_list);
> > +	xa_init(&ucontext->mmap_xa);
> >
> >  	ret = get_unused_fd_flags(O_CLOEXEC);
> >  	if (ret < 0)
> > diff --git a/drivers/infiniband/core/uverbs_main.c
> > b/drivers/infiniband/core/uverbs_main.c
> > index 11c13c1381cf..4b909d7b97de 100644
> > +++ b/drivers/infiniband/core/uverbs_main.c
> > @@ -965,6 +965,141 @@ int rdma_user_mmap_io(struct ib_ucontext
> > *ucontext, struct vm_area_struct *vma,  }
> > EXPORT_SYMBOL(rdma_user_mmap_io);
> >
> > +static inline u64
> > +rdma_user_mmap_get_key(const struct rdma_user_mmap_entry
> *entry) {
> > +	return (u64)entry->mmap_page << PAGE_SHIFT; }
> > +
> > +/**
> > + * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa.
> > + *
> > + * @ucontext: associated user context.
> > + * @key: The key received from rdma_user_mmap_entry_insert which
> > + *     is provided by user as the address to map.
> > + * @len: The length the user wants to map
> > + *
> > + * This function is called when a user tries to mmap a key it
> > + * initially received from the driver. They key was created by
> > + * the function rdma_user_mmap_entry_insert.
> > + *
> > + * Return an entry if exists or NULL if there is no match.
> > + */
> > +struct rdma_user_mmap_entry *
> > +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64
> > +len) {
> > +	struct rdma_user_mmap_entry *entry;
> > +	u64 mmap_page;
> > +
> > +	mmap_page = key >> PAGE_SHIFT;
> > +	if (mmap_page > U32_MAX)
> > +		return NULL;
> > +
> > +	entry = xa_load(&ucontext->mmap_xa, mmap_page);
> > +	if (!entry || entry->length != len)
> > +		return NULL;
> > +
> > +	ibdev_dbg(ucontext->device,
> > +		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> removed\n",
> > +		  entry->obj, key, entry->address, entry->length);
> > +
> > +	return entry;
> > +}
> > +EXPORT_SYMBOL(rdma_user_mmap_entry_get);
> 
> It is a mistake we keep making, and maybe the war is hopelessly lost now,
> but functions called from a driver should not be part of the ib_uverbs module
> - ideally uverbs is an optional module. They should be in ib_core.
> 
> Maybe put this in ib_core_uverbs.c ?
But if there isn't ib_uverbs user apps can't be run right ? and then these functions
Won't get called anyway ? 


> 
> Kamal, you've been tackling various cleanups, maybe making ib_uverbs
> unloadable again is something you'd be keen on?
> 
> > +/**
> > + * rdma_user_mmap_entry_insert() - Allocate and insert an entry to the
> mmap_xa.
> > + *
> > + * @ucontext: associated user context.
> > + * @obj: opaque driver object that will be stored in the entry.
> > + * @address: The address that will be mmapped to the user
> > + * @length: Length of the address that will be mmapped
> > + * @mmap_flag: opaque driver flags related to the address (For
> > + *           example could be used for cachability)
> > + *
> > + * This function should be called by drivers that use the
> > +rdma_user_mmap
> > + * interface for handling user mmapped addresses. The database is
> > +handled in
> > + * the core and helper functions are provided to insert entries into
> > +the
> > + * database and extract entries when the user call mmap with the given
> key.
> > + * The function returns a unique key that should be provided to user,
> > +the user
> > + * will use the key to map the given address.
> > + *
> > + * Note this locking scheme cannot support removal of entries,
> > + * except during ucontext destruction when the core code
> > + * guarentees no concurrency.
> > + *
> > + * Return: unique key or RDMA_USER_MMAP_INVALID if entry was not
> added.
> > + */
> > +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void
> *obj,
> > +				u64 address, u64 length, u8 mmap_flag) {
> > +	struct rdma_user_mmap_entry *entry;
> > +	u32 next_mmap_page;
> > +	int err;
> > +
> > +	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> > +	if (!entry)
> > +		return RDMA_USER_MMAP_INVALID;
> > +
> > +	entry->obj = obj;
> > +	entry->address = address;
> > +	entry->length = length;
> > +	entry->mmap_flag = mmap_flag;
> > +
> > +	xa_lock(&ucontext->mmap_xa);
> > +	if (check_add_overflow(ucontext->mmap_xa_page,
> > +			       (u32)(length >> PAGE_SHIFT),
> 
> Should this be divide round up ?
For cases that length is not rounded to PAGE_SHIFT? 

> 
> > +			       &next_mmap_page))
> > +		goto err_unlock;
> 
> I still don't like that this algorithm latches into a permanent failure when the
> xa_page wraps.
> 
> It seems worth spending a bit more time here to tidy this.. Keep using the
> mmap_xa_page scheme, but instead do something like
> 
> alloc_cyclic_range():
> 
> while () {
>    // Find first empty element in a cyclic way
>    xa_page_first = mmap_xa_page;
>    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> 
>    // Is there a enough room to have the range?
>    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
>       mmap_xa_page = 0;
>       continue;
>    }
> 
>    // See if the element before intersects
>    elm = xa_find(xa, &zero, xa_page_end, 0);
>    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm->last)) {
>       mmap_xa_page = elm->last + 1;
>       continue
>    }
> 
>    // xa_page_first -> xa_page_end should now be free
>    xa_insert(xa, xa_page_start, entry);
>    mmap_xa_page = xa_page_end + 1;
>    return xa_page_start;
> }
> 
> Approximately, please check it.
But we don't free entires from the xa_array ( only when ucontext is destroyed) so how will 
There be an empty element after we wrap ?  

> 
> > @@ -2199,6 +2201,17 @@ struct iw_cm_conn_param;
> >
> >  #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
> >
> > +#define RDMA_USER_MMAP_FLAG_SHIFT 56
> > +#define RDMA_USER_MMAP_PAGE_MASK
> GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
> > +#define RDMA_USER_MMAP_INVALID U64_MAX struct
> rdma_user_mmap_entry {
> > +	void *obj;
> > +	u64 address;
> > +	u64 length;
> > +	u32 mmap_page;
> > +	u8 mmap_flag;
> > +};
> > +
> >  /**
> >   * struct ib_device_ops - InfiniBand device operations
> >   * This structure defines all the InfiniBand device operations,
> > providers will @@ -2311,6 +2324,19 @@ struct ib_device_ops {
> >  			      struct ib_udata *udata);
> >  	void (*dealloc_ucontext)(struct ib_ucontext *context);
> >  	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct
> > *vma);
> > +	/**
> > +	 * Memory that is mapped to the user can only be freed once the
> > +	 * ucontext of the application is destroyed. This is for
> > +	 * security reasons where we don't want an application to have a
> > +	 * mapping to phyiscal memory that is freed and allocated to
> > +	 * another application. For this reason, all the entries are
> > +	 * stored in ucontext and once ucontext is freed mmap_free is
> > +	 * called on each of the entries. They type of the memory that
> 
> They -> the
ok
> 
> > +	 * was mapped may differ between entries and is opaque to the
> > +	 * rdma_user_mmap interface. Therefore needs to be implemented
> > +	 * by the driver in mmap_free.
> > +	 */
> > +	void (*mmap_free)(struct rdma_user_mmap_entry *entry);
> >  	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
> >  	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
> >  	void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); @@
> > -2709,6 +2735,11 @@ void ib_set_device_ops(struct ib_device *device,
> > #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
> >  int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct
> vm_area_struct *vma,
> >  		      unsigned long pfn, unsigned long size, pgprot_t prot);
> > +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void
> *obj,
> > +				u64 address, u64 length, u8 mmap_flag);
> struct
> > +rdma_user_mmap_entry * rdma_user_mmap_entry_get(struct
> ib_ucontext
> > +*ucontext, u64 key, u64 len); void
> > +rdma_user_mmap_entries_remove_free(struct ib_ucontext
> > *ucontext);
> 
> Should remove_free should be in the core-priv header?
Yes, thanks.
> 
> Jason
Jason Gunthorpe July 25, 2019, 7:52 p.m. UTC | #4
On Thu, Jul 25, 2019 at 07:34:15PM +0000, Michal Kalderon wrote:
> > > +	ibdev_dbg(ucontext->device,
> > > +		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> > removed\n",
> > > +		  entry->obj, key, entry->address, entry->length);
> > > +
> > > +	return entry;
> > > +}
> > > +EXPORT_SYMBOL(rdma_user_mmap_entry_get);
> > 
> > It is a mistake we keep making, and maybe the war is hopelessly lost now,
> > but functions called from a driver should not be part of the ib_uverbs module
> > - ideally uverbs is an optional module. They should be in ib_core.
> > 
> > Maybe put this in ib_core_uverbs.c ?

> But if there isn't ib_uverbs user apps can't be run right ? and then
> these functions Won't get called anyway ?

Right, but, we don't want loading the driver to force creating
/dev/infiniband/uverbs - so the driver support component of uverbs
should live in ib_core, and the /dev/ component should be in ib_uverbs

> > > +	xa_lock(&ucontext->mmap_xa);
> > > +	if (check_add_overflow(ucontext->mmap_xa_page,
> > > +			       (u32)(length >> PAGE_SHIFT),
> > 
> > Should this be divide round up ?

> For cases that length is not rounded to PAGE_SHIFT? 

It should never happen, but yes
 
> > 
> > > +			       &next_mmap_page))
> > > +		goto err_unlock;
> > 
> > I still don't like that this algorithm latches into a permanent failure when the
> > xa_page wraps.
> > 
> > It seems worth spending a bit more time here to tidy this.. Keep using the
> > mmap_xa_page scheme, but instead do something like
> > 
> > alloc_cyclic_range():
> > 
> > while () {
> >    // Find first empty element in a cyclic way
> >    xa_page_first = mmap_xa_page;
> >    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> > 
> >    // Is there a enough room to have the range?
> >    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
> >       mmap_xa_page = 0;
> >       continue;
> >    }
> > 
> >    // See if the element before intersects
> >    elm = xa_find(xa, &zero, xa_page_end, 0);
> >    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm->last)) {
> >       mmap_xa_page = elm->last + 1;
> >       continue
> >    }
> > 
> >    // xa_page_first -> xa_page_end should now be free
> >    xa_insert(xa, xa_page_start, entry);
> >    mmap_xa_page = xa_page_end + 1;
> >    return xa_page_start;
> > }
> > 
> > Approximately, please check it.

> But we don't free entires from the xa_array ( only when ucontext is destroyed) so how will 
> There be an empty element after we wrap ?  

Oh!

That should be fixed up too, in the general case if a user is
creating/destroying driver objects in loop we don't want memory usage
to be unbounded.

The rdma_user_mmap stuff has VMA ops that can refcount the xa entry
and now that this is core code it is easy enough to harmonize the two
things and track the xa side from the struct rdma_umap_priv

The question is, does EFA or qedr have a use model for this that
allows a userspace verb to create/destroy in a loop? ie do we need to
fix this right now?

Jason
Michal Kalderon July 26, 2019, 8:42 a.m. UTC | #5
> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> owner@vger.kernel.org> On Behalf Of Jason Gunthorpe
> 
> On Thu, Jul 25, 2019 at 07:34:15PM +0000, Michal Kalderon wrote:
> > > > +	ibdev_dbg(ucontext->device,
> > > > +		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx]
> > > removed\n",
> > > > +		  entry->obj, key, entry->address, entry->length);
> > > > +
> > > > +	return entry;
> > > > +}
> > > > +EXPORT_SYMBOL(rdma_user_mmap_entry_get);
> > >
> > > It is a mistake we keep making, and maybe the war is hopelessly lost
> > > now, but functions called from a driver should not be part of the
> > > ib_uverbs module
> > > - ideally uverbs is an optional module. They should be in ib_core.
> > >
> > > Maybe put this in ib_core_uverbs.c ?
> 
> > But if there isn't ib_uverbs user apps can't be run right ? and then
> > these functions Won't get called anyway ?
> 
> Right, but, we don't want loading the driver to force creating
> /dev/infiniband/uverbs - so the driver support component of uverbs should
> live in ib_core, and the /dev/ component should be in ib_uverbs
> 
> > > > +	xa_lock(&ucontext->mmap_xa);
> > > > +	if (check_add_overflow(ucontext->mmap_xa_page,
> > > > +			       (u32)(length >> PAGE_SHIFT),
> > >
> > > Should this be divide round up ?
> 
> > For cases that length is not rounded to PAGE_SHIFT?
> 
> It should never happen, but yes
ok
> 
> > >
> > > > +			       &next_mmap_page))
> > > > +		goto err_unlock;
> > >
> > > I still don't like that this algorithm latches into a permanent
> > > failure when the xa_page wraps.
> > >
> > > It seems worth spending a bit more time here to tidy this.. Keep
> > > using the mmap_xa_page scheme, but instead do something like
> > >
> > > alloc_cyclic_range():
> > >
> > > while () {
> > >    // Find first empty element in a cyclic way
> > >    xa_page_first = mmap_xa_page;
> > >    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> > >
> > >    // Is there a enough room to have the range?
> > >    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
> > >       mmap_xa_page = 0;
> > >       continue;
> > >    }
> > >
> > >    // See if the element before intersects
> > >    elm = xa_find(xa, &zero, xa_page_end, 0);
> > >    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm-
> >last)) {
> > >       mmap_xa_page = elm->last + 1;
> > >       continue
> > >    }
> > >
> > >    // xa_page_first -> xa_page_end should now be free
> > >    xa_insert(xa, xa_page_start, entry);
> > >    mmap_xa_page = xa_page_end + 1;
> > >    return xa_page_start;
> > > }
> > >
> > > Approximately, please check it.
> 
> > But we don't free entires from the xa_array ( only when ucontext is
> > destroyed) so how will There be an empty element after we wrap ?
> 
> Oh!
> 
> That should be fixed up too, in the general case if a user is
> creating/destroying driver objects in loop we don't want memory usage to
> be unbounded.
> 
> The rdma_user_mmap stuff has VMA ops that can refcount the xa entry and
> now that this is core code it is easy enough to harmonize the two things and
> track the xa side from the struct rdma_umap_priv
> 
> The question is, does EFA or qedr have a use model for this that allows a
> userspace verb to create/destroy in a loop? ie do we need to fix this right
> now?
The mapping occurs for every qp and cq creation. So yes. 
So do you mean add a ref-cnt to the xarray entry and from umap decrease the refcnt and free? 

> 
> Jason
Jason Gunthorpe July 26, 2019, 1:23 p.m. UTC | #6
On Fri, Jul 26, 2019 at 08:42:07AM +0000, Michal Kalderon wrote:

> > > But we don't free entires from the xa_array ( only when ucontext is
> > > destroyed) so how will There be an empty element after we wrap ?
> > 
> > Oh!
> > 
> > That should be fixed up too, in the general case if a user is
> > creating/destroying driver objects in loop we don't want memory usage to
> > be unbounded.
> > 
> > The rdma_user_mmap stuff has VMA ops that can refcount the xa entry and
> > now that this is core code it is easy enough to harmonize the two things and
> > track the xa side from the struct rdma_umap_priv
> > 
> > The question is, does EFA or qedr have a use model for this that allows a
> > userspace verb to create/destroy in a loop? ie do we need to fix this right
> > now?

> The mapping occurs for every qp and cq creation. So yes.
>
> So do you mean add a ref-cnt to the xarray entry and from umap
> decrease the refcnt and free?

Yes, free the entry (release the HW resource) and release the xa_array
ID.

Then, may as well don't use cyclic allocation for the xa, just the
algorithm above would be OK.

The zap should also clear the refs, and then when the ucontext is
destroyed we can just WARN_ON the xarray is empty. Either all the vmas
were destroyed or all were zapped.

Jason
Gal Pressman July 28, 2019, 8:45 a.m. UTC | #7
On 26/07/2019 16:23, Jason Gunthorpe wrote:
> On Fri, Jul 26, 2019 at 08:42:07AM +0000, Michal Kalderon wrote:
> 
>>>> But we don't free entires from the xa_array ( only when ucontext is
>>>> destroyed) so how will There be an empty element after we wrap ?
>>>
>>> Oh!
>>>
>>> That should be fixed up too, in the general case if a user is
>>> creating/destroying driver objects in loop we don't want memory usage to
>>> be unbounded.
>>>
>>> The rdma_user_mmap stuff has VMA ops that can refcount the xa entry and
>>> now that this is core code it is easy enough to harmonize the two things and
>>> track the xa side from the struct rdma_umap_priv
>>>
>>> The question is, does EFA or qedr have a use model for this that allows a
>>> userspace verb to create/destroy in a loop? ie do we need to fix this right
>>> now?
> 
>> The mapping occurs for every qp and cq creation. So yes.
>>
>> So do you mean add a ref-cnt to the xarray entry and from umap
>> decrease the refcnt and free?
> 
> Yes, free the entry (release the HW resource) and release the xa_array
> ID.

This is a bit tricky for EFA.
The UAR BAR resources (LLQ for example) aren't cleaned up until the UAR is
deallocated, so many of the entries won't really be freed when the refcount
reaches zero (i.e the HW considers these entries as refcounted as long as the
UAR exists). The best we can do is free the DMA buffers for appropriate entries.
Kamal Heib July 28, 2019, 9:30 a.m. UTC | #8
On Thu, Jul 25, 2019 at 02:55:40PM -0300, Jason Gunthorpe wrote:
> On Tue, Jul 09, 2019 at 05:17:30PM +0300, Michal Kalderon wrote:
> > Create some common API's for adding entries to a xa_mmap.
> > Searching for an entry and freeing one.
> > 
> > The code was copied from the efa driver almost as is, just renamed
> > function to be generic and not efa specific.
> > 
> > Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
> > Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
> >  drivers/infiniband/core/device.c      |   1 +
> >  drivers/infiniband/core/rdma_core.c   |   1 +
> >  drivers/infiniband/core/uverbs_cmd.c  |   1 +
> >  drivers/infiniband/core/uverbs_main.c | 135 ++++++++++++++++++++++++++++++++++
> >  include/rdma/ib_verbs.h               |  46 ++++++++++++
> >  5 files changed, 184 insertions(+)
> > 
> > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> > index 8a6ccb936dfe..a830c2c5d691 100644
> > +++ b/drivers/infiniband/core/device.c
> > @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
> >  	SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
> >  	SET_DEVICE_OP(dev_ops, map_phys_fmr);
> >  	SET_DEVICE_OP(dev_ops, mmap);
> > +	SET_DEVICE_OP(dev_ops, mmap_free);
> >  	SET_DEVICE_OP(dev_ops, modify_ah);
> >  	SET_DEVICE_OP(dev_ops, modify_cq);
> >  	SET_DEVICE_OP(dev_ops, modify_device);
> > diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
> > index ccf4d069c25c..1ed01b02401f 100644
> > +++ b/drivers/infiniband/core/rdma_core.c
> > @@ -816,6 +816,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
> >  
> >  	rdma_restrack_del(&ucontext->res);
> >  
> > +	rdma_user_mmap_entries_remove_free(ucontext);
> >  	ib_dev->ops.dealloc_ucontext(ucontext);
> >  	kfree(ucontext);
> >  
> > diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> > index 7ddd0e5bc6b3..44c0600245e4 100644
> > +++ b/drivers/infiniband/core/uverbs_cmd.c
> > @@ -254,6 +254,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
> >  
> >  	mutex_init(&ucontext->per_mm_list_lock);
> >  	INIT_LIST_HEAD(&ucontext->per_mm_list);
> > +	xa_init(&ucontext->mmap_xa);
> >  
> >  	ret = get_unused_fd_flags(O_CLOEXEC);
> >  	if (ret < 0)
> > diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
> > index 11c13c1381cf..4b909d7b97de 100644
> > +++ b/drivers/infiniband/core/uverbs_main.c
> > @@ -965,6 +965,141 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
> >  }
> >  EXPORT_SYMBOL(rdma_user_mmap_io);
> >  
> > +static inline u64
> > +rdma_user_mmap_get_key(const struct rdma_user_mmap_entry *entry)
> > +{
> > +	return (u64)entry->mmap_page << PAGE_SHIFT;
> > +}
> > +
> > +/**
> > + * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa.
> > + *
> > + * @ucontext: associated user context.
> > + * @key: The key received from rdma_user_mmap_entry_insert which
> > + *     is provided by user as the address to map.
> > + * @len: The length the user wants to map
> > + *
> > + * This function is called when a user tries to mmap a key it
> > + * initially received from the driver. They key was created by
> > + * the function rdma_user_mmap_entry_insert.
> > + *
> > + * Return an entry if exists or NULL if there is no match.
> > + */
> > +struct rdma_user_mmap_entry *
> > +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
> > +{
> > +	struct rdma_user_mmap_entry *entry;
> > +	u64 mmap_page;
> > +
> > +	mmap_page = key >> PAGE_SHIFT;
> > +	if (mmap_page > U32_MAX)
> > +		return NULL;
> > +
> > +	entry = xa_load(&ucontext->mmap_xa, mmap_page);
> > +	if (!entry || entry->length != len)
> > +		return NULL;
> > +
> > +	ibdev_dbg(ucontext->device,
> > +		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
> > +		  entry->obj, key, entry->address, entry->length);
> > +
> > +	return entry;
> > +}
> > +EXPORT_SYMBOL(rdma_user_mmap_entry_get);
> 
> It is a mistake we keep making, and maybe the war is hopelessly lost
> now, but functions called from a driver should not be part of the
> ib_uverbs module - ideally uverbs is an optional module. They should
> be in ib_core.
> 
> Maybe put this in ib_core_uverbs.c ?
> 
> Kamal, you've been tackling various cleanups, maybe making ib_uverbs
> unloadable again is something you'd be keen on?
>

Yes, Could you please give some background on that?


> > +/**
> > + * rdma_user_mmap_entry_insert() - Allocate and insert an entry to the mmap_xa.
> > + *
> > + * @ucontext: associated user context.
> > + * @obj: opaque driver object that will be stored in the entry.
> > + * @address: The address that will be mmapped to the user
> > + * @length: Length of the address that will be mmapped
> > + * @mmap_flag: opaque driver flags related to the address (For
> > + *           example could be used for cachability)
> > + *
> > + * This function should be called by drivers that use the rdma_user_mmap
> > + * interface for handling user mmapped addresses. The database is handled in
> > + * the core and helper functions are provided to insert entries into the
> > + * database and extract entries when the user call mmap with the given key.
> > + * The function returns a unique key that should be provided to user, the user
> > + * will use the key to map the given address.
> > + *
> > + * Note this locking scheme cannot support removal of entries,
> > + * except during ucontext destruction when the core code
> > + * guarentees no concurrency.
> > + *
> > + * Return: unique key or RDMA_USER_MMAP_INVALID if entry was not added.
> > + */
> > +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
> > +				u64 address, u64 length, u8 mmap_flag)
> > +{
> > +	struct rdma_user_mmap_entry *entry;
> > +	u32 next_mmap_page;
> > +	int err;
> > +
> > +	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> > +	if (!entry)
> > +		return RDMA_USER_MMAP_INVALID;
> > +
> > +	entry->obj = obj;
> > +	entry->address = address;
> > +	entry->length = length;
> > +	entry->mmap_flag = mmap_flag;
> > +
> > +	xa_lock(&ucontext->mmap_xa);
> > +	if (check_add_overflow(ucontext->mmap_xa_page,
> > +			       (u32)(length >> PAGE_SHIFT),
> 
> Should this be divide round up ?
> 
> > +			       &next_mmap_page))
> > +		goto err_unlock;
> 
> I still don't like that this algorithm latches into a permanent
> failure when the xa_page wraps.
> 
> It seems worth spending a bit more time here to tidy this.. Keep using
> the mmap_xa_page scheme, but instead do something like
> 
> alloc_cyclic_range():
> 
> while () {
>    // Find first empty element in a cyclic way
>    xa_page_first = mmap_xa_page;
>    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> 
>    // Is there a enough room to have the range?
>    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
>       mmap_xa_page = 0;
>       continue;
>    }
> 
>    // See if the element before intersects 
>    elm = xa_find(xa, &zero, xa_page_end, 0);
>    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm->last)) {
>       mmap_xa_page = elm->last + 1;
>       continue
>    }
>   
>    // xa_page_first -> xa_page_end should now be free
>    xa_insert(xa, xa_page_start, entry);
>    mmap_xa_page = xa_page_end + 1;
>    return xa_page_start;
> }
> 
> Approximately, please check it.
> 
> > @@ -2199,6 +2201,17 @@ struct iw_cm_conn_param;
> >  
> >  #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
> >  
> > +#define RDMA_USER_MMAP_FLAG_SHIFT 56
> > +#define RDMA_USER_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
> > +#define RDMA_USER_MMAP_INVALID U64_MAX
> > +struct rdma_user_mmap_entry {
> > +	void *obj;
> > +	u64 address;
> > +	u64 length;
> > +	u32 mmap_page;
> > +	u8 mmap_flag;
> > +};
> > +
> >  /**
> >   * struct ib_device_ops - InfiniBand device operations
> >   * This structure defines all the InfiniBand device operations, providers will
> > @@ -2311,6 +2324,19 @@ struct ib_device_ops {
> >  			      struct ib_udata *udata);
> >  	void (*dealloc_ucontext)(struct ib_ucontext *context);
> >  	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
> > +	/**
> > +	 * Memory that is mapped to the user can only be freed once the
> > +	 * ucontext of the application is destroyed. This is for
> > +	 * security reasons where we don't want an application to have a
> > +	 * mapping to phyiscal memory that is freed and allocated to
> > +	 * another application. For this reason, all the entries are
> > +	 * stored in ucontext and once ucontext is freed mmap_free is
> > +	 * called on each of the entries. They type of the memory that
> 
> They -> the
> 
> > +	 * was mapped may differ between entries and is opaque to the
> > +	 * rdma_user_mmap interface. Therefore needs to be implemented
> > +	 * by the driver in mmap_free.
> > +	 */
> > +	void (*mmap_free)(struct rdma_user_mmap_entry *entry);
> >  	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
> >  	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
> >  	void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
> > @@ -2709,6 +2735,11 @@ void ib_set_device_ops(struct ib_device *device,
> >  #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
> >  int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
> >  		      unsigned long pfn, unsigned long size, pgprot_t prot);
> > +u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
> > +				u64 address, u64 length, u8 mmap_flag);
> > +struct rdma_user_mmap_entry *
> > +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len);
> > +void rdma_user_mmap_entries_remove_free(struct ib_ucontext
> > *ucontext);
> 
> Should remove_free should be in the core-priv header?
> 
> Jason
Michal Kalderon July 29, 2019, 12:58 p.m. UTC | #9
> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> owner@vger.kernel.org> On Behalf Of Jason Gunthorpe
> 
> > +	xa_lock(&ucontext->mmap_xa);
> > +	if (check_add_overflow(ucontext->mmap_xa_page,
> > +			       (u32)(length >> PAGE_SHIFT),
> > +			       &next_mmap_page))
> > +		goto err_unlock;
> 
> I still don't like that this algorithm latches into a permanent failure when the
> xa_page wraps.
> 
> It seems worth spending a bit more time here to tidy this.. Keep using the
> mmap_xa_page scheme, but instead do something like
> 
> alloc_cyclic_range():
> 
> while () {
>    // Find first empty element in a cyclic way
>    xa_page_first = mmap_xa_page;
>    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> 
>    // Is there a enough room to have the range?
>    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
>       mmap_xa_page = 0;
>       continue;
>    }
> 
>    // See if the element before intersects
>    elm = xa_find(xa, &zero, xa_page_end, 0);
>    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm->last)) {
>       mmap_xa_page = elm->last + 1;
>       continue
>    }
> 
>    // xa_page_first -> xa_page_end should now be free
>    xa_insert(xa, xa_page_start, entry);
>    mmap_xa_page = xa_page_end + 1;
>    return xa_page_start;
> }
> 
> Approximately, please check it.
Gal & Jason, 

Coming back to the mmap_xa_page algorithm. I couldn't find some background on this. 
Why do you need the length to be represented in the mmap_xa_page ?  
Why not simply use xa_alloc_cyclic ( like in siw ) 
This is simply a key to a mmap object... 

Thanks,
Michal
Gal Pressman July 29, 2019, 1:53 p.m. UTC | #10
On 29/07/2019 15:58, Michal Kalderon wrote:
>> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
>> owner@vger.kernel.org> On Behalf Of Jason Gunthorpe
>>
>>> +	xa_lock(&ucontext->mmap_xa);
>>> +	if (check_add_overflow(ucontext->mmap_xa_page,
>>> +			       (u32)(length >> PAGE_SHIFT),
>>> +			       &next_mmap_page))
>>> +		goto err_unlock;
>>
>> I still don't like that this algorithm latches into a permanent failure when the
>> xa_page wraps.
>>
>> It seems worth spending a bit more time here to tidy this.. Keep using the
>> mmap_xa_page scheme, but instead do something like
>>
>> alloc_cyclic_range():
>>
>> while () {
>>    // Find first empty element in a cyclic way
>>    xa_page_first = mmap_xa_page;
>>    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
>>
>>    // Is there a enough room to have the range?
>>    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
>>       mmap_xa_page = 0;
>>       continue;
>>    }
>>
>>    // See if the element before intersects
>>    elm = xa_find(xa, &zero, xa_page_end, 0);
>>    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm->last)) {
>>       mmap_xa_page = elm->last + 1;
>>       continue
>>    }
>>
>>    // xa_page_first -> xa_page_end should now be free
>>    xa_insert(xa, xa_page_start, entry);
>>    mmap_xa_page = xa_page_end + 1;
>>    return xa_page_start;
>> }
>>
>> Approximately, please check it.
> Gal & Jason, 
> 
> Coming back to the mmap_xa_page algorithm. I couldn't find some background on this. 
> Why do you need the length to be represented in the mmap_xa_page ?  
> Why not simply use xa_alloc_cyclic ( like in siw ) 
> This is simply a key to a mmap object... 

The intention was that the entry would "occupy" number of xarray elements
according to its size (in pages). It wasn't initially like this, but IIRC this
was preferred by Jason.
Jason Gunthorpe July 29, 2019, 2:04 p.m. UTC | #11
On Mon, Jul 29, 2019 at 04:53:38PM +0300, Gal Pressman wrote:
> On 29/07/2019 15:58, Michal Kalderon wrote:
> >> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> >> owner@vger.kernel.org> On Behalf Of Jason Gunthorpe
> >>
> >>> +	xa_lock(&ucontext->mmap_xa);
> >>> +	if (check_add_overflow(ucontext->mmap_xa_page,
> >>> +			       (u32)(length >> PAGE_SHIFT),
> >>> +			       &next_mmap_page))
> >>> +		goto err_unlock;
> >>
> >> I still don't like that this algorithm latches into a permanent failure when the
> >> xa_page wraps.
> >>
> >> It seems worth spending a bit more time here to tidy this.. Keep using the
> >> mmap_xa_page scheme, but instead do something like
> >>
> >> alloc_cyclic_range():
> >>
> >> while () {
> >>    // Find first empty element in a cyclic way
> >>    xa_page_first = mmap_xa_page;
> >>    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> >>
> >>    // Is there a enough room to have the range?
> >>    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
> >>       mmap_xa_page = 0;
> >>       continue;
> >>    }
> >>
> >>    // See if the element before intersects
> >>    elm = xa_find(xa, &zero, xa_page_end, 0);
> >>    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm->last)) {
> >>       mmap_xa_page = elm->last + 1;
> >>       continue
> >>    }
> >>
> >>    // xa_page_first -> xa_page_end should now be free
> >>    xa_insert(xa, xa_page_start, entry);
> >>    mmap_xa_page = xa_page_end + 1;
> >>    return xa_page_start;
> >> }
> >>
> >> Approximately, please check it.
> > Gal & Jason, 
> > 
> > Coming back to the mmap_xa_page algorithm. I couldn't find some background on this. 
> > Why do you need the length to be represented in the mmap_xa_page ?  
> > Why not simply use xa_alloc_cyclic ( like in siw )

I think siw is dealing with only PAGE_SIZE objects, efa had variable
sized ones.

> > This is simply a key to a mmap object... 
> 
> The intention was that the entry would "occupy" number of xarray elements
> according to its size (in pages). It wasn't initially like this, but IIRC this
> was preferred by Jason.

It is not so critical, maybe we could drop it if it is really
simplifiying. But it doesn't look so hard to make an xa algorithm that
will be OK.

The offset/length is shown in things like lsof and what not, and from
a debugging perspective it makes a lot more sense if the offset/length
are sensible, ie they should not overlap.

Jason
Jason Gunthorpe July 29, 2019, 2:06 p.m. UTC | #12
On Sun, Jul 28, 2019 at 11:45:56AM +0300, Gal Pressman wrote:
> On 26/07/2019 16:23, Jason Gunthorpe wrote:
> > On Fri, Jul 26, 2019 at 08:42:07AM +0000, Michal Kalderon wrote:
> > 
> >>>> But we don't free entires from the xa_array ( only when ucontext is
> >>>> destroyed) so how will There be an empty element after we wrap ?
> >>>
> >>> Oh!
> >>>
> >>> That should be fixed up too, in the general case if a user is
> >>> creating/destroying driver objects in loop we don't want memory usage to
> >>> be unbounded.
> >>>
> >>> The rdma_user_mmap stuff has VMA ops that can refcount the xa entry and
> >>> now that this is core code it is easy enough to harmonize the two things and
> >>> track the xa side from the struct rdma_umap_priv
> >>>
> >>> The question is, does EFA or qedr have a use model for this that allows a
> >>> userspace verb to create/destroy in a loop? ie do we need to fix this right
> >>> now?
> > 
> >> The mapping occurs for every qp and cq creation. So yes.
> >>
> >> So do you mean add a ref-cnt to the xarray entry and from umap
> >> decrease the refcnt and free?
> > 
> > Yes, free the entry (release the HW resource) and release the xa_array
> > ID.
> 
> This is a bit tricky for EFA.
> The UAR BAR resources (LLQ for example) aren't cleaned up until the UAR is
> deallocated, so many of the entries won't really be freed when the refcount
> reaches zero (i.e the HW considers these entries as refcounted as long as the
> UAR exists). The best we can do is free the DMA buffers for appropriate entries.

Drivers can still defer HW destruction until the ucontext destroys,
but this gives an option to move it sooner, which looks like the other
drivers do need as they can allocate these things in userspace loops.

Jason
Michal Kalderon July 29, 2019, 2:07 p.m. UTC | #13
> From: Gal Pressman <galpress@amazon.com>
> Sent: Monday, July 29, 2019 4:54 PM
> 
> On 29/07/2019 15:58, Michal Kalderon wrote:
> >> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> >> owner@vger.kernel.org> On Behalf Of Jason Gunthorpe
> >>
> >>> +	xa_lock(&ucontext->mmap_xa);
> >>> +	if (check_add_overflow(ucontext->mmap_xa_page,
> >>> +			       (u32)(length >> PAGE_SHIFT),
> >>> +			       &next_mmap_page))
> >>> +		goto err_unlock;
> >>
> >> I still don't like that this algorithm latches into a permanent
> >> failure when the xa_page wraps.
> >>
> >> It seems worth spending a bit more time here to tidy this.. Keep
> >> using the mmap_xa_page scheme, but instead do something like
> >>
> >> alloc_cyclic_range():
> >>
> >> while () {
> >>    // Find first empty element in a cyclic way
> >>    xa_page_first = mmap_xa_page;
> >>    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> >>
> >>    // Is there a enough room to have the range?
> >>    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
> >>       mmap_xa_page = 0;
> >>       continue;
> >>    }
> >>
> >>    // See if the element before intersects
> >>    elm = xa_find(xa, &zero, xa_page_end, 0);
> >>    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm-
> >last)) {
> >>       mmap_xa_page = elm->last + 1;
> >>       continue
> >>    }
> >>
> >>    // xa_page_first -> xa_page_end should now be free
> >>    xa_insert(xa, xa_page_start, entry);
> >>    mmap_xa_page = xa_page_end + 1;
> >>    return xa_page_start;
> >> }
> >>
> >> Approximately, please check it.
> > Gal & Jason,
> >
> > Coming back to the mmap_xa_page algorithm. I couldn't find some
> background on this.
> > Why do you need the length to be represented in the mmap_xa_page ?
> > Why not simply use xa_alloc_cyclic ( like in siw ) This is simply a
> > key to a mmap object...
> 
> The intention was that the entry would "occupy" number of xarray elements
> according to its size (in pages). It wasn't initially like this, but IIRC this was
> preferred by Jason.

Thanks, so Jason, if we're now freeing the objects, can we simply us xa_alloc_cyclic instead? 
Thanks,
Michal
Jason Gunthorpe July 29, 2019, 2:11 p.m. UTC | #14
On Sun, Jul 28, 2019 at 12:30:51PM +0300, Kamal Heib wrote:
> > Maybe put this in ib_core_uverbs.c ?
> > 
> > Kamal, you've been tackling various cleanups, maybe making ib_uverbs
> > unloadable again is something you'd be keen on?
> >
> 
> Yes, Could you please give some background on that?

Most of it is my fault from being too careless, but the general notion
is that all of these

$ grep EXPORT_SYMBOL uverbs_main.c uverbs_cmd.c  uverbs_marshall.c  rdma_core.c uverbs_std_types*.c uverbs_uapi.c 
uverbs_main.c:EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
uverbs_main.c:EXPORT_SYMBOL(rdma_user_mmap_io);
uverbs_cmd.c:EXPORT_SYMBOL(flow_resources_alloc);
uverbs_cmd.c:EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
uverbs_cmd.c:EXPORT_SYMBOL(flow_resources_add);
uverbs_marshall.c:EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
uverbs_marshall.c:EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
uverbs_marshall.c:EXPORT_SYMBOL(ib_copy_path_rec_to_user);
uverbs_marshall.c:EXPORT_SYMBOL(ib_copy_path_rec_from_user);
rdma_core.c:EXPORT_SYMBOL(uverbs_idr_class);
rdma_core.c:EXPORT_SYMBOL(uverbs_close_fd);
rdma_core.c:EXPORT_SYMBOL(uverbs_fd_class);
uverbs_std_types.c:EXPORT_SYMBOL(uverbs_destroy_def_handler);

Need to go into some 'ib_core uverbs support' .c file in the ib_core,
be moved to a header inline, or moved otherwise

Maybe it is now unrealistic that the uapi is so complicated, ie
uverbs_close_fd is just not easy to fixup..

Maybe the only ones that need fixing are ib_uverbs_get_ucontext_file
rdma_user_mmap_io as alot of drivers are entangled on those now.

The other stuff is much harder..

Jason
Michal Kalderon July 29, 2019, 3:26 p.m. UTC | #15
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Monday, July 29, 2019 5:05 PM
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Mon, Jul 29, 2019 at 04:53:38PM +0300, Gal Pressman wrote:
> > On 29/07/2019 15:58, Michal Kalderon wrote:
> > >> From: linux-rdma-owner@vger.kernel.org <linux-rdma-
> > >> owner@vger.kernel.org> On Behalf Of Jason Gunthorpe
> > >>
> > >>> +	xa_lock(&ucontext->mmap_xa);
> > >>> +	if (check_add_overflow(ucontext->mmap_xa_page,
> > >>> +			       (u32)(length >> PAGE_SHIFT),
> > >>> +			       &next_mmap_page))
> > >>> +		goto err_unlock;
> > >>
> > >> I still don't like that this algorithm latches into a permanent
> > >> failure when the xa_page wraps.
> > >>
> > >> It seems worth spending a bit more time here to tidy this.. Keep
> > >> using the mmap_xa_page scheme, but instead do something like
> > >>
> > >> alloc_cyclic_range():
> > >>
> > >> while () {
> > >>    // Find first empty element in a cyclic way
> > >>    xa_page_first = mmap_xa_page;
> > >>    xa_find(xa, &xa_page_first, U32_MAX, XA_FREE_MARK)
> > >>
> > >>    // Is there a enough room to have the range?
> > >>    if (check_add_overflow(xa_page_first, npages, &xa_page_end)) {
> > >>       mmap_xa_page = 0;
> > >>       continue;
> > >>    }
> > >>
> > >>    // See if the element before intersects
> > >>    elm = xa_find(xa, &zero, xa_page_end, 0);
> > >>    if (elm && intersects(xa_page_first, xa_page_last, elm->first, elm-
> >last)) {
> > >>       mmap_xa_page = elm->last + 1;
> > >>       continue
> > >>    }
> > >>
> > >>    // xa_page_first -> xa_page_end should now be free
> > >>    xa_insert(xa, xa_page_start, entry);
> > >>    mmap_xa_page = xa_page_end + 1;
> > >>    return xa_page_start;
> > >> }
> > >>
> > >> Approximately, please check it.
> > > Gal & Jason,
> > >
> > > Coming back to the mmap_xa_page algorithm. I couldn't find some
> background on this.
> > > Why do you need the length to be represented in the mmap_xa_page ?
> > > Why not simply use xa_alloc_cyclic ( like in siw )
> 
> I think siw is dealing with only PAGE_SIZE objects, efa had variable sized
> ones.
> 
> > > This is simply a key to a mmap object...
> >
> > The intention was that the entry would "occupy" number of xarray
> > elements according to its size (in pages). It wasn't initially like
> > this, but IIRC this was preferred by Jason.
> 
> It is not so critical, maybe we could drop it if it is really simplifiying. But it
> doesn't look so hard to make an xa algorithm that will be OK.
> 
> The offset/length is shown in things like lsof and what not, and from a
> debugging perspective it makes a lot more sense if the offset/length are
> sensible, ie they should not overlap.
> 
Thanks for the clarification 

> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 8a6ccb936dfe..a830c2c5d691 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2521,6 +2521,7 @@  void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
 	SET_DEVICE_OP(dev_ops, map_phys_fmr);
 	SET_DEVICE_OP(dev_ops, mmap);
+	SET_DEVICE_OP(dev_ops, mmap_free);
 	SET_DEVICE_OP(dev_ops, modify_ah);
 	SET_DEVICE_OP(dev_ops, modify_cq);
 	SET_DEVICE_OP(dev_ops, modify_device);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index ccf4d069c25c..1ed01b02401f 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -816,6 +816,7 @@  static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
 
 	rdma_restrack_del(&ucontext->res);
 
+	rdma_user_mmap_entries_remove_free(ucontext);
 	ib_dev->ops.dealloc_ucontext(ucontext);
 	kfree(ucontext);
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 7ddd0e5bc6b3..44c0600245e4 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -254,6 +254,7 @@  static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 
 	mutex_init(&ucontext->per_mm_list_lock);
 	INIT_LIST_HEAD(&ucontext->per_mm_list);
+	xa_init(&ucontext->mmap_xa);
 
 	ret = get_unused_fd_flags(O_CLOEXEC);
 	if (ret < 0)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 11c13c1381cf..4b909d7b97de 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -965,6 +965,141 @@  int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL(rdma_user_mmap_io);
 
+static inline u64
+rdma_user_mmap_get_key(const struct rdma_user_mmap_entry *entry)
+{
+	return (u64)entry->mmap_page << PAGE_SHIFT;
+}
+
+/**
+ * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @key: The key received from rdma_user_mmap_entry_insert which
+ *     is provided by user as the address to map.
+ * @len: The length the user wants to map
+ *
+ * This function is called when a user tries to mmap a key it
+ * initially received from the driver. They key was created by
+ * the function rdma_user_mmap_entry_insert.
+ *
+ * Return an entry if exists or NULL if there is no match.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
+{
+	struct rdma_user_mmap_entry *entry;
+	u64 mmap_page;
+
+	mmap_page = key >> PAGE_SHIFT;
+	if (mmap_page > U32_MAX)
+		return NULL;
+
+	entry = xa_load(&ucontext->mmap_xa, mmap_page);
+	if (!entry || entry->length != len)
+		return NULL;
+
+	ibdev_dbg(ucontext->device,
+		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+		  entry->obj, key, entry->address, entry->length);
+
+	return entry;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get);
+
+/**
+ * rdma_user_mmap_entry_insert() - Allocate and insert an entry to the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @obj: opaque driver object that will be stored in the entry.
+ * @address: The address that will be mmapped to the user
+ * @length: Length of the address that will be mmapped
+ * @mmap_flag: opaque driver flags related to the address (For
+ *           example could be used for cachability)
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for handling user mmapped addresses. The database is handled in
+ * the core and helper functions are provided to insert entries into the
+ * database and extract entries when the user call mmap with the given key.
+ * The function returns a unique key that should be provided to user, the user
+ * will use the key to map the given address.
+ *
+ * Note this locking scheme cannot support removal of entries,
+ * except during ucontext destruction when the core code
+ * guarentees no concurrency.
+ *
+ * Return: unique key or RDMA_USER_MMAP_INVALID if entry was not added.
+ */
+u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
+				u64 address, u64 length, u8 mmap_flag)
+{
+	struct rdma_user_mmap_entry *entry;
+	u32 next_mmap_page;
+	int err;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return RDMA_USER_MMAP_INVALID;
+
+	entry->obj = obj;
+	entry->address = address;
+	entry->length = length;
+	entry->mmap_flag = mmap_flag;
+
+	xa_lock(&ucontext->mmap_xa);
+	if (check_add_overflow(ucontext->mmap_xa_page,
+			       (u32)(length >> PAGE_SHIFT),
+			       &next_mmap_page))
+		goto err_unlock;
+
+	entry->mmap_page = ucontext->mmap_xa_page;
+	ucontext->mmap_xa_page = next_mmap_page;
+	err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
+			  GFP_KERNEL);
+	if (err)
+		goto err_unlock;
+
+	xa_unlock(&ucontext->mmap_xa);
+
+	ibdev_dbg(ucontext->device,
+		  "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
+		  entry->obj, entry->address, entry->length,
+		  rdma_user_mmap_get_key(entry));
+
+	return rdma_user_mmap_get_key(entry);
+
+err_unlock:
+	xa_unlock(&ucontext->mmap_xa);
+	kfree(entry);
+	return RDMA_USER_MMAP_INVALID;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
+
+/*
+ * This is only called when the ucontext is destroyed and there can be no
+ * concurrent query via mmap or allocate on the xarray, thus we can be sure no
+ * other thread is using the entry pointer. We also know that all the BAR
+ * pages have either been zap'd or munmaped at this point.  Normal pages are
+ * refcounted and will be freed at the proper time.
+ */
+void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext)
+{
+	struct rdma_user_mmap_entry *entry;
+	unsigned long mmap_page;
+
+	xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
+		xa_erase(&ucontext->mmap_xa, mmap_page);
+
+		ibdev_dbg(ucontext->device,
+			  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
+			  entry->obj, rdma_user_mmap_get_key(entry),
+			  entry->address, entry->length);
+		if (ucontext->device->ops.mmap_free)
+			ucontext->device->ops.mmap_free(entry);
+		kfree(entry);
+	}
+}
+
 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 {
 	struct rdma_umap_priv *priv, *next_priv;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 26e9c2594913..1ba29a00f584 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1425,6 +1425,8 @@  struct ib_ucontext {
 	 * Implementation details of the RDMA core, don't use in drivers:
 	 */
 	struct rdma_restrack_entry res;
+	struct xarray mmap_xa;
+	u32 mmap_xa_page;
 };
 
 struct ib_uobject {
@@ -2199,6 +2201,17 @@  struct iw_cm_conn_param;
 
 #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
 
+#define RDMA_USER_MMAP_FLAG_SHIFT 56
+#define RDMA_USER_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
+#define RDMA_USER_MMAP_INVALID U64_MAX
+struct rdma_user_mmap_entry {
+	void *obj;
+	u64 address;
+	u64 length;
+	u32 mmap_page;
+	u8 mmap_flag;
+};
+
 /**
  * struct ib_device_ops - InfiniBand device operations
  * This structure defines all the InfiniBand device operations, providers will
@@ -2311,6 +2324,19 @@  struct ib_device_ops {
 			      struct ib_udata *udata);
 	void (*dealloc_ucontext)(struct ib_ucontext *context);
 	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+	/**
+	 * Memory that is mapped to the user can only be freed once the
+	 * ucontext of the application is destroyed. This is for
+	 * security reasons where we don't want an application to have a
+	 * mapping to phyiscal memory that is freed and allocated to
+	 * another application. For this reason, all the entries are
+	 * stored in ucontext and once ucontext is freed mmap_free is
+	 * called on each of the entries. They type of the memory that
+	 * was mapped may differ between entries and is opaque to the
+	 * rdma_user_mmap interface. Therefore needs to be implemented
+	 * by the driver in mmap_free.
+	 */
+	void (*mmap_free)(struct rdma_user_mmap_entry *entry);
 	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
 	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
 	void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
@@ -2709,6 +2735,11 @@  void ib_set_device_ops(struct ib_device *device,
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
 		      unsigned long pfn, unsigned long size, pgprot_t prot);
+u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
+				u64 address, u64 length, u8 mmap_flag);
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len);
+void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext);
 #else
 static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
 				    struct vm_area_struct *vma,
@@ -2717,6 +2748,21 @@  static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
 {
 	return -EINVAL;
 }
+
+static u64 rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, void *obj,
+				       u64 address, u64 length, u8 mmap_flag)
+{
+	return RDMA_USER_MMAP_INVALID;
+}
+
+static struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
+{
+	return NULL;
+}
+
+static void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext) {}
+
 #endif
 
 static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)