diff mbox series

[RFC,bpf-next,4/4] libbpf: don't remove eBPF resources when other xsks are present

Message ID 20190603131907.13395-5-maciej.fijalkowski@intel.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series [RFC,bpf-next,1/4] libbpf: fill the AF_XDP fill queue before bind() call | expand

Commit Message

Maciej Fijalkowski June 3, 2019, 1:19 p.m. UTC
In case where multiple xsk sockets are attached to a single interface
and one of them gets detached, the eBPF maps and program are removed.
This should not happen as the rest of xsksocks are still using these
resources.

In order to fix that, let's have an additional eBPF map with a single
entry that will be used as a xsks count. During the xsk_socket__delete,
remove the resources only when this count is equal to 0.  This map is
not being accessed from eBPF program, so the verifier is not associating
it with the prog, which in turn makes bpf_obj_get_info_by_fd not
reporting this map in nr_map_ids field of struct bpf_prog_info. The
described behaviour brings the need to have this map pinned, so in
case when socket is being created and the libbpf detects the presence of
bpf resources, it will be able to access that map.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
 tools/lib/bpf/xsk.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 8 deletions(-)

Comments

Jonathan Lemon June 3, 2019, 6:26 p.m. UTC | #1
On 3 Jun 2019, at 6:19, Maciej Fijalkowski wrote:

> In case where multiple xsk sockets are attached to a single interface
> and one of them gets detached, the eBPF maps and program are removed.
> This should not happen as the rest of xsksocks are still using these
> resources.

I'm not seeing that behavior - each xsk holds it's own reference to
xsks_maps, so when the map descriptor is closed, it doesn't necessarily
delete the map.

There's no refcount on the bpf program though; so the socket should not
be trying to remove the program - that should be done by the application.
Björn Töpel June 4, 2019, 8:08 a.m. UTC | #2
On 2019-06-03 15:19, Maciej Fijalkowski wrote:
> In case where multiple xsk sockets are attached to a single interface
> and one of them gets detached, the eBPF maps and program are removed.
> This should not happen as the rest of xsksocks are still using these
> resources.
> 
> In order to fix that, let's have an additional eBPF map with a single
> entry that will be used as a xsks count. During the xsk_socket__delete,
> remove the resources only when this count is equal to 0.  This map is
> not being accessed from eBPF program, so the verifier is not associating
> it with the prog, which in turn makes bpf_obj_get_info_by_fd not
> reporting this map in nr_map_ids field of struct bpf_prog_info. The
> described behaviour brings the need to have this map pinned, so in
> case when socket is being created and the libbpf detects the presence of
> bpf resources, it will be able to access that map.
>

This commit is only needed after #3 is applied, right? So, this is a way 
of refcounting XDP socks?


> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> ---
>   tools/lib/bpf/xsk.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--------
>   1 file changed, 51 insertions(+), 8 deletions(-)
> 
> diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> index e28bedb0b078..88d2c931ad14 100644
> --- a/tools/lib/bpf/xsk.c
> +++ b/tools/lib/bpf/xsk.c
> @@ -44,6 +44,8 @@
>    #define PF_XDP AF_XDP
>   #endif
>   
> +#define XSKS_CNT_MAP_PATH "/sys/fs/bpf/xsks_cnt_map"
> +
>   struct xsk_umem {
>   	struct xsk_ring_prod *fill;
>   	struct xsk_ring_cons *comp;
> @@ -65,6 +67,7 @@ struct xsk_socket {
>   	int prog_fd;
>   	int qidconf_map_fd;
>   	int xsks_map_fd;
> +	int xsks_cnt_map_fd;
>   	__u32 queue_id;
>   	char ifname[IFNAMSIZ];
>   };
> @@ -372,7 +375,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
>   static int xsk_create_bpf_maps(struct xsk_socket *xsk)
>   {
>   	int max_queues;
> -	int fd;
> +	int fd, ret;
>   
>   	max_queues = xsk_get_max_queues(xsk);
>   	if (max_queues < 0)
> @@ -392,6 +395,24 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
>   	}
>   	xsk->xsks_map_fd = fd;
>   
> +	fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "xsks_cnt_map",
> +				 sizeof(int), sizeof(int), 1, 0);
> +	if (fd < 0) {
> +		close(xsk->qidconf_map_fd);
> +		close(xsk->xsks_map_fd);
> +		return fd;
> +	}
> +
> +	ret = bpf_obj_pin(fd, XSKS_CNT_MAP_PATH);
> +	if (ret < 0) {
> +		pr_warning("pinning map failed; is bpffs mounted?\n");
> +		close(xsk->qidconf_map_fd);
> +		close(xsk->xsks_map_fd);
> +		close(fd);
> +		return ret;
> +	}
> +	xsk->xsks_cnt_map_fd = fd;
> +
>   	return 0;
>   }
>   
> @@ -456,8 +477,10 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
>   		close(fd);
>   	}
>   
> +	xsk->xsks_cnt_map_fd = bpf_obj_get(XSKS_CNT_MAP_PATH);
>   	err = 0;
> -	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
> +	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0 ||
> +	    xsk->xsks_cnt_map_fd < 0) {
>   		err = -ENOENT;
>   		xsk_delete_bpf_maps(xsk);
>   	}
> @@ -467,17 +490,25 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
>   	return err;
>   }
>   
> -static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
> +static void xsk_clear_bpf_maps(struct xsk_socket *xsk, long *xsks_cnt_ptr)
>   {
> +	long xsks_cnt, key = 0;
>   	int qid = false;
>   
>   	bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
>   	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
> +	bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
> +	if (xsks_cnt)
> +		xsks_cnt--;
> +	bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
> +	if (xsks_cnt_ptr)
> +		*xsks_cnt_ptr = xsks_cnt;

This refcount scheme will not work; There's no synchronization between 
the updates (cross process)!

>   }
>   
>   static int xsk_set_bpf_maps(struct xsk_socket *xsk)
>   {
>   	int qid = true, fd = xsk->fd, err;
> +	long xsks_cnt, key = 0;
>   
>   	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
>   	if (err)
> @@ -487,9 +518,18 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
>   	if (err)
>   		goto out;
>   
> +	err = bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
> +	if (err)
> +		goto out;
> +
> +	xsks_cnt++;
> +	err = bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
> +	if (err)
> +		goto out;
> +

Dito.

>   	return 0;
>   out:
> -	xsk_clear_bpf_maps(xsk);
> +	xsk_clear_bpf_maps(xsk, NULL);
>   	return err;
>   }
>   
> @@ -752,13 +792,18 @@ void xsk_socket__delete(struct xsk_socket *xsk)
>   	size_t desc_sz = sizeof(struct xdp_desc);
>   	struct xdp_mmap_offsets off;
>   	socklen_t optlen;
> +	long xsks_cnt;
>   	int err;
>   
>   	if (!xsk)
>   		return;
>   
> -	xsk_clear_bpf_maps(xsk);
> -	xsk_delete_bpf_maps(xsk);
> +	xsk_clear_bpf_maps(xsk, &xsks_cnt);
> +	unlink(XSKS_CNT_MAP_PATH);
> +	if (!xsks_cnt) {
> +		xsk_delete_bpf_maps(xsk);
> +		xsk_remove_xdp_prog(xsk);
> +	}
>   
>   	optlen = sizeof(off);
>   	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> @@ -774,8 +819,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
>   
>   	}
>   
> -	xsk_remove_xdp_prog(xsk);
> -
>   	xsk->umem->refcount--;
>   	/* Do not close an fd that also has an associated umem connected
>   	 * to it.
>
Maciej Fijalkowski June 4, 2019, 3:07 p.m. UTC | #3
On Tue, 4 Jun 2019 10:08:03 +0200
Björn Töpel <bjorn.topel@intel.com> wrote:

> On 2019-06-03 15:19, Maciej Fijalkowski wrote:
> > In case where multiple xsk sockets are attached to a single interface
> > and one of them gets detached, the eBPF maps and program are removed.
> > This should not happen as the rest of xsksocks are still using these
> > resources.
> > 
> > In order to fix that, let's have an additional eBPF map with a single
> > entry that will be used as a xsks count. During the xsk_socket__delete,
> > remove the resources only when this count is equal to 0.  This map is
> > not being accessed from eBPF program, so the verifier is not associating
> > it with the prog, which in turn makes bpf_obj_get_info_by_fd not
> > reporting this map in nr_map_ids field of struct bpf_prog_info. The
> > described behaviour brings the need to have this map pinned, so in
> > case when socket is being created and the libbpf detects the presence of
> > bpf resources, it will be able to access that map.
> >
> 
> This commit is only needed after #3 is applied, right? So, this is a way 
> of refcounting XDP socks?

Yes, but as you pointed out it needs synchronization.

> 
> 
> > Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > ---
> >   tools/lib/bpf/xsk.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--------
> >   1 file changed, 51 insertions(+), 8 deletions(-)
> > 
> > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > index e28bedb0b078..88d2c931ad14 100644
> > --- a/tools/lib/bpf/xsk.c
> > +++ b/tools/lib/bpf/xsk.c
> > @@ -44,6 +44,8 @@
> >    #define PF_XDP AF_XDP
> >   #endif
> >   
> > +#define XSKS_CNT_MAP_PATH "/sys/fs/bpf/xsks_cnt_map"
> > +
> >   struct xsk_umem {
> >   	struct xsk_ring_prod *fill;
> >   	struct xsk_ring_cons *comp;
> > @@ -65,6 +67,7 @@ struct xsk_socket {
> >   	int prog_fd;
> >   	int qidconf_map_fd;
> >   	int xsks_map_fd;
> > +	int xsks_cnt_map_fd;
> >   	__u32 queue_id;
> >   	char ifname[IFNAMSIZ];
> >   };
> > @@ -372,7 +375,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
> >   static int xsk_create_bpf_maps(struct xsk_socket *xsk)
> >   {
> >   	int max_queues;
> > -	int fd;
> > +	int fd, ret;
> >   
> >   	max_queues = xsk_get_max_queues(xsk);
> >   	if (max_queues < 0)
> > @@ -392,6 +395,24 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
> >   	}
> >   	xsk->xsks_map_fd = fd;
> >   
> > +	fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "xsks_cnt_map",
> > +				 sizeof(int), sizeof(int), 1, 0);
> > +	if (fd < 0) {
> > +		close(xsk->qidconf_map_fd);
> > +		close(xsk->xsks_map_fd);
> > +		return fd;
> > +	}
> > +
> > +	ret = bpf_obj_pin(fd, XSKS_CNT_MAP_PATH);
> > +	if (ret < 0) {
> > +		pr_warning("pinning map failed; is bpffs mounted?\n");
> > +		close(xsk->qidconf_map_fd);
> > +		close(xsk->xsks_map_fd);
> > +		close(fd);
> > +		return ret;
> > +	}
> > +	xsk->xsks_cnt_map_fd = fd;
> > +
> >   	return 0;
> >   }
> >   
> > @@ -456,8 +477,10 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
> >   		close(fd);
> >   	}
> >   
> > +	xsk->xsks_cnt_map_fd = bpf_obj_get(XSKS_CNT_MAP_PATH);
> >   	err = 0;
> > -	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
> > +	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0 ||
> > +	    xsk->xsks_cnt_map_fd < 0) {
> >   		err = -ENOENT;
> >   		xsk_delete_bpf_maps(xsk);
> >   	}
> > @@ -467,17 +490,25 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
> >   	return err;
> >   }
> >   
> > -static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
> > +static void xsk_clear_bpf_maps(struct xsk_socket *xsk, long *xsks_cnt_ptr)
> >   {
> > +	long xsks_cnt, key = 0;
> >   	int qid = false;
> >   
> >   	bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
> >   	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
> > +	bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
> > +	if (xsks_cnt)
> > +		xsks_cnt--;
> > +	bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
> > +	if (xsks_cnt_ptr)
> > +		*xsks_cnt_ptr = xsks_cnt;
> 
> This refcount scheme will not work; There's no synchronization between 
> the updates (cross process)!

Ugh, shame. Let me fix this in v2 :(

> 
> >   }
> >   
> >   static int xsk_set_bpf_maps(struct xsk_socket *xsk)
> >   {
> >   	int qid = true, fd = xsk->fd, err;
> > +	long xsks_cnt, key = 0;
> >   
> >   	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
> >   	if (err)
> > @@ -487,9 +518,18 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
> >   	if (err)
> >   		goto out;
> >   
> > +	err = bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
> > +	if (err)
> > +		goto out;
> > +
> > +	xsks_cnt++;
> > +	err = bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
> > +	if (err)
> > +		goto out;
> > +
> 
> Dito.
> 
> >   	return 0;
> >   out:
> > -	xsk_clear_bpf_maps(xsk);
> > +	xsk_clear_bpf_maps(xsk, NULL);
> >   	return err;
> >   }
> >   
> > @@ -752,13 +792,18 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> >   	size_t desc_sz = sizeof(struct xdp_desc);
> >   	struct xdp_mmap_offsets off;
> >   	socklen_t optlen;
> > +	long xsks_cnt;
> >   	int err;
> >   
> >   	if (!xsk)
> >   		return;
> >   
> > -	xsk_clear_bpf_maps(xsk);
> > -	xsk_delete_bpf_maps(xsk);
> > +	xsk_clear_bpf_maps(xsk, &xsks_cnt);
> > +	unlink(XSKS_CNT_MAP_PATH);
> > +	if (!xsks_cnt) {
> > +		xsk_delete_bpf_maps(xsk);
> > +		xsk_remove_xdp_prog(xsk);
> > +	}
> >   
> >   	optlen = sizeof(off);
> >   	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> > @@ -774,8 +819,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> >   
> >   	}
> >   
> > -	xsk_remove_xdp_prog(xsk);
> > -
> >   	xsk->umem->refcount--;
> >   	/* Do not close an fd that also has an associated umem connected
> >   	 * to it.
> >
Björn Töpel June 5, 2019, 9:26 a.m. UTC | #4
On Tue, 4 Jun 2019 at 17:07, Maciej Fijalkowski
<maciejromanfijalkowski@gmail.com> wrote:
>
> On Tue, 4 Jun 2019 10:08:03 +0200
> Björn Töpel <bjorn.topel@intel.com> wrote:
>
> > On 2019-06-03 15:19, Maciej Fijalkowski wrote:
> > > In case where multiple xsk sockets are attached to a single interface
> > > and one of them gets detached, the eBPF maps and program are removed.
> > > This should not happen as the rest of xsksocks are still using these
> > > resources.
> > >
> > > In order to fix that, let's have an additional eBPF map with a single
> > > entry that will be used as a xsks count. During the xsk_socket__delete,
> > > remove the resources only when this count is equal to 0.  This map is
> > > not being accessed from eBPF program, so the verifier is not associating
> > > it with the prog, which in turn makes bpf_obj_get_info_by_fd not
> > > reporting this map in nr_map_ids field of struct bpf_prog_info. The
> > > described behaviour brings the need to have this map pinned, so in
> > > case when socket is being created and the libbpf detects the presence of
> > > bpf resources, it will be able to access that map.
> > >
> >
> > This commit is only needed after #3 is applied, right? So, this is a way
> > of refcounting XDP socks?
>
> Yes, but as you pointed out it needs synchronization.
>
> >
> >
> > > Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > > ---
> > >   tools/lib/bpf/xsk.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--------
> > >   1 file changed, 51 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> > > index e28bedb0b078..88d2c931ad14 100644
> > > --- a/tools/lib/bpf/xsk.c
> > > +++ b/tools/lib/bpf/xsk.c
> > > @@ -44,6 +44,8 @@
> > >    #define PF_XDP AF_XDP
> > >   #endif
> > >
> > > +#define XSKS_CNT_MAP_PATH "/sys/fs/bpf/xsks_cnt_map"
> > > +
> > >   struct xsk_umem {
> > >     struct xsk_ring_prod *fill;
> > >     struct xsk_ring_cons *comp;
> > > @@ -65,6 +67,7 @@ struct xsk_socket {
> > >     int prog_fd;
> > >     int qidconf_map_fd;
> > >     int xsks_map_fd;
> > > +   int xsks_cnt_map_fd;
> > >     __u32 queue_id;
> > >     char ifname[IFNAMSIZ];
> > >   };
> > > @@ -372,7 +375,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
> > >   static int xsk_create_bpf_maps(struct xsk_socket *xsk)
> > >   {
> > >     int max_queues;
> > > -   int fd;
> > > +   int fd, ret;
> > >
> > >     max_queues = xsk_get_max_queues(xsk);
> > >     if (max_queues < 0)
> > > @@ -392,6 +395,24 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
> > >     }
> > >     xsk->xsks_map_fd = fd;
> > >
> > > +   fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "xsks_cnt_map",
> > > +                            sizeof(int), sizeof(int), 1, 0);
> > > +   if (fd < 0) {
> > > +           close(xsk->qidconf_map_fd);
> > > +           close(xsk->xsks_map_fd);
> > > +           return fd;
> > > +   }
> > > +
> > > +   ret = bpf_obj_pin(fd, XSKS_CNT_MAP_PATH);
> > > +   if (ret < 0) {
> > > +           pr_warning("pinning map failed; is bpffs mounted?\n");
> > > +           close(xsk->qidconf_map_fd);
> > > +           close(xsk->xsks_map_fd);
> > > +           close(fd);
> > > +           return ret;
> > > +   }
> > > +   xsk->xsks_cnt_map_fd = fd;
> > > +
> > >     return 0;
> > >   }
> > >
> > > @@ -456,8 +477,10 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
> > >             close(fd);
> > >     }
> > >
> > > +   xsk->xsks_cnt_map_fd = bpf_obj_get(XSKS_CNT_MAP_PATH);
> > >     err = 0;
> > > -   if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
> > > +   if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0 ||
> > > +       xsk->xsks_cnt_map_fd < 0) {
> > >             err = -ENOENT;
> > >             xsk_delete_bpf_maps(xsk);
> > >     }
> > > @@ -467,17 +490,25 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
> > >     return err;
> > >   }
> > >
> > > -static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
> > > +static void xsk_clear_bpf_maps(struct xsk_socket *xsk, long *xsks_cnt_ptr)
> > >   {
> > > +   long xsks_cnt, key = 0;
> > >     int qid = false;
> > >
> > >     bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
> > >     bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
> > > +   bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
> > > +   if (xsks_cnt)
> > > +           xsks_cnt--;
> > > +   bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
> > > +   if (xsks_cnt_ptr)
> > > +           *xsks_cnt_ptr = xsks_cnt;
> >
> > This refcount scheme will not work; There's no synchronization between
> > the updates (cross process)!
>
> Ugh, shame. Let me fix this in v2 :(
>

Ok!

Hmm, there's no way of doing lock_xadds from bpf syscall, right? Any
thoughts how to do this? Maybe there's a need for a sock_ops prog for
XDP sockets...


Björn



> >
> > >   }
> > >
> > >   static int xsk_set_bpf_maps(struct xsk_socket *xsk)
> > >   {
> > >     int qid = true, fd = xsk->fd, err;
> > > +   long xsks_cnt, key = 0;
> > >
> > >     err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
> > >     if (err)
> > > @@ -487,9 +518,18 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
> > >     if (err)
> > >             goto out;
> > >
> > > +   err = bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
> > > +   if (err)
> > > +           goto out;
> > > +
> > > +   xsks_cnt++;
> > > +   err = bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
> > > +   if (err)
> > > +           goto out;
> > > +
> >
> > Dito.
> >
> > >     return 0;
> > >   out:
> > > -   xsk_clear_bpf_maps(xsk);
> > > +   xsk_clear_bpf_maps(xsk, NULL);
> > >     return err;
> > >   }
> > >
> > > @@ -752,13 +792,18 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> > >     size_t desc_sz = sizeof(struct xdp_desc);
> > >     struct xdp_mmap_offsets off;
> > >     socklen_t optlen;
> > > +   long xsks_cnt;
> > >     int err;
> > >
> > >     if (!xsk)
> > >             return;
> > >
> > > -   xsk_clear_bpf_maps(xsk);
> > > -   xsk_delete_bpf_maps(xsk);
> > > +   xsk_clear_bpf_maps(xsk, &xsks_cnt);
> > > +   unlink(XSKS_CNT_MAP_PATH);
> > > +   if (!xsks_cnt) {
> > > +           xsk_delete_bpf_maps(xsk);
> > > +           xsk_remove_xdp_prog(xsk);
> > > +   }
> > >
> > >     optlen = sizeof(off);
> > >     err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> > > @@ -774,8 +819,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
> > >
> > >     }
> > >
> > > -   xsk_remove_xdp_prog(xsk);
> > > -
> > >     xsk->umem->refcount--;
> > >     /* Do not close an fd that also has an associated umem connected
> > >      * to it.
> > >
>
diff mbox series

Patch

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e28bedb0b078..88d2c931ad14 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -44,6 +44,8 @@ 
  #define PF_XDP AF_XDP
 #endif
 
+#define XSKS_CNT_MAP_PATH "/sys/fs/bpf/xsks_cnt_map"
+
 struct xsk_umem {
 	struct xsk_ring_prod *fill;
 	struct xsk_ring_cons *comp;
@@ -65,6 +67,7 @@  struct xsk_socket {
 	int prog_fd;
 	int qidconf_map_fd;
 	int xsks_map_fd;
+	int xsks_cnt_map_fd;
 	__u32 queue_id;
 	char ifname[IFNAMSIZ];
 };
@@ -372,7 +375,7 @@  static int xsk_get_max_queues(struct xsk_socket *xsk)
 static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 {
 	int max_queues;
-	int fd;
+	int fd, ret;
 
 	max_queues = xsk_get_max_queues(xsk);
 	if (max_queues < 0)
@@ -392,6 +395,24 @@  static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 	}
 	xsk->xsks_map_fd = fd;
 
+	fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "xsks_cnt_map",
+				 sizeof(int), sizeof(int), 1, 0);
+	if (fd < 0) {
+		close(xsk->qidconf_map_fd);
+		close(xsk->xsks_map_fd);
+		return fd;
+	}
+
+	ret = bpf_obj_pin(fd, XSKS_CNT_MAP_PATH);
+	if (ret < 0) {
+		pr_warning("pinning map failed; is bpffs mounted?\n");
+		close(xsk->qidconf_map_fd);
+		close(xsk->xsks_map_fd);
+		close(fd);
+		return ret;
+	}
+	xsk->xsks_cnt_map_fd = fd;
+
 	return 0;
 }
 
@@ -456,8 +477,10 @@  static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 		close(fd);
 	}
 
+	xsk->xsks_cnt_map_fd = bpf_obj_get(XSKS_CNT_MAP_PATH);
 	err = 0;
-	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
+	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0 ||
+	    xsk->xsks_cnt_map_fd < 0) {
 		err = -ENOENT;
 		xsk_delete_bpf_maps(xsk);
 	}
@@ -467,17 +490,25 @@  static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 	return err;
 }
 
-static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
+static void xsk_clear_bpf_maps(struct xsk_socket *xsk, long *xsks_cnt_ptr)
 {
+	long xsks_cnt, key = 0;
 	int qid = false;
 
 	bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
 	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
+	bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
+	if (xsks_cnt)
+		xsks_cnt--;
+	bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
+	if (xsks_cnt_ptr)
+		*xsks_cnt_ptr = xsks_cnt;
 }
 
 static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 {
 	int qid = true, fd = xsk->fd, err;
+	long xsks_cnt, key = 0;
 
 	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
 	if (err)
@@ -487,9 +518,18 @@  static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 	if (err)
 		goto out;
 
+	err = bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
+	if (err)
+		goto out;
+
+	xsks_cnt++;
+	err = bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
+	if (err)
+		goto out;
+
 	return 0;
 out:
-	xsk_clear_bpf_maps(xsk);
+	xsk_clear_bpf_maps(xsk, NULL);
 	return err;
 }
 
@@ -752,13 +792,18 @@  void xsk_socket__delete(struct xsk_socket *xsk)
 	size_t desc_sz = sizeof(struct xdp_desc);
 	struct xdp_mmap_offsets off;
 	socklen_t optlen;
+	long xsks_cnt;
 	int err;
 
 	if (!xsk)
 		return;
 
-	xsk_clear_bpf_maps(xsk);
-	xsk_delete_bpf_maps(xsk);
+	xsk_clear_bpf_maps(xsk, &xsks_cnt);
+	unlink(XSKS_CNT_MAP_PATH);
+	if (!xsks_cnt) {
+		xsk_delete_bpf_maps(xsk);
+		xsk_remove_xdp_prog(xsk);
+	}
 
 	optlen = sizeof(off);
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
@@ -774,8 +819,6 @@  void xsk_socket__delete(struct xsk_socket *xsk)
 
 	}
 
-	xsk_remove_xdp_prog(xsk);
-
 	xsk->umem->refcount--;
 	/* Do not close an fd that also has an associated umem connected
 	 * to it.