diff mbox series

[v5,1/5] ebpf: Added eBPF map update through mmap.

Message ID 20230802204125.33688-2-andrew@daynix.com
State New
Headers show
Series eBPF RSS through QMP support. | expand

Commit Message

Andrew Melnichenko Aug. 2, 2023, 8:41 p.m. UTC
Changed eBPF map updates through mmaped array.
Mmaped arrays provide direct access to map data.
It should omit using bpf_map_update_elem() call,
which may require capabilities that are not present.

Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
---
 ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
 ebpf/ebpf_rss.h |   5 +++
 2 files changed, 99 insertions(+), 23 deletions(-)

Comments

Jason Wang Aug. 8, 2023, 2:39 a.m. UTC | #1
On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <andrew@daynix.com> wrote:
>
> Changed eBPF map updates through mmaped array.
> Mmaped arrays provide direct access to map data.
> It should omit using bpf_map_update_elem() call,
> which may require capabilities that are not present.
>
> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> ---
>  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
>  ebpf/ebpf_rss.h |   5 +++
>  2 files changed, 99 insertions(+), 23 deletions(-)
>
> diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> index cee658c158b..247f5eee1b6 100644
> --- a/ebpf/ebpf_rss.c
> +++ b/ebpf/ebpf_rss.c
> @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
>  {
>      if (ctx != NULL) {
>          ctx->obj = NULL;
> +        ctx->program_fd = -1;
> +        ctx->map_configuration = -1;
> +        ctx->map_toeplitz_key = -1;
> +        ctx->map_indirections_table = -1;
> +
> +        ctx->mmap_configuration = NULL;
> +        ctx->mmap_toeplitz_key = NULL;
> +        ctx->mmap_indirections_table = NULL;
>      }
>  }
>
>  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
>  {
> -    return ctx != NULL && ctx->obj != NULL;
> +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> +}
> +
> +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> +{
> +    if (!ebpf_rss_is_loaded(ctx)) {
> +        return false;
> +    }
> +
> +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> +                                   ctx->map_configuration, 0);
> +    if (ctx->mmap_configuration == MAP_FAILED) {
> +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> +        return false;
> +    }
> +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> +                                   ctx->map_toeplitz_key, 0);
> +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> +        goto toeplitz_fail;
> +    }
> +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> +                                   ctx->map_indirections_table, 0);
> +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> +        goto indirection_fail;
> +    }
> +
> +    return true;
> +
> +indirection_fail:
> +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> +toeplitz_fail:
> +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> +
> +    ctx->mmap_configuration = NULL;
> +    ctx->mmap_toeplitz_key = NULL;
> +    ctx->mmap_indirections_table = NULL;
> +    return false;
> +}
> +
> +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> +{
> +    if (!ebpf_rss_is_loaded(ctx)) {
> +        return;
> +    }
> +
> +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> +
> +    ctx->mmap_configuration = NULL;
> +    ctx->mmap_toeplitz_key = NULL;
> +    ctx->mmap_indirections_table = NULL;
>  }
>
>  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
>  {
>      struct rss_bpf *rss_bpf_ctx;
>
> -    if (ctx == NULL) {
> +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
>          return false;
>      }
>
> @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
>      ctx->map_toeplitz_key = bpf_map__fd(
>              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
>
> +    if (!ebpf_rss_mmap(ctx)) {
> +        goto error;
> +    }
> +
>      return true;
>  error:
>      rss_bpf__destroy(rss_bpf_ctx);
>      ctx->obj = NULL;
> +    ctx->program_fd = -1;
> +    ctx->map_configuration = -1;
> +    ctx->map_toeplitz_key = -1;
> +    ctx->map_indirections_table = -1;
>
>      return false;
>  }
> @@ -77,15 +149,11 @@ error:
>  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
>                                  struct EBPFRSSConfig *config)
>  {
> -    uint32_t map_key = 0;
> -
>      if (!ebpf_rss_is_loaded(ctx)) {
>          return false;
>      }
> -    if (bpf_map_update_elem(ctx->map_configuration,
> -                            &map_key, config, 0) < 0) {
> -        return false;
> -    }
> +
> +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
>      return true;
>  }
>
> @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
>                                              uint16_t *indirections_table,
>                                              size_t len)
>  {
> -    uint32_t i = 0;
> -
>      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
>         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
>          return false;
>      }
>
> -    for (; i < len; ++i) {
> -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> -                                indirections_table + i, 0) < 0) {
> -            return false;
> -        }
> -    }
> +    memcpy(ctx->mmap_indirections_table, indirections_table,
> +            sizeof(*indirections_table) * len);

As discussed, should we stick the compatibility on the host without
bpf mmap support?

If we don't, we need at least probe BPF mmap and disable ebpf rss? If
yes, we should track if the map is mmaped and switch between memcpy
and syscall.

Thanks

>      return true;
>  }
>
>  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
>                                       uint8_t *toeplitz_key)
>  {
> -    uint32_t map_key = 0;
> -
>      /* prepare toeplitz key */
>      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
>
> @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
>      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
>      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
>
> -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> -                            0) < 0) {
> -        return false;
> -    }
> +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
>      return true;
>  }
>
> @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
>          return;
>      }
>
> -    rss_bpf__destroy(ctx->obj);
> +    ebpf_rss_munmap(ctx);
> +
> +    if (ctx->obj) {
> +        rss_bpf__destroy(ctx->obj);
> +    } else {
> +        close(ctx->program_fd);
> +        close(ctx->map_configuration);
> +        close(ctx->map_toeplitz_key);
> +        close(ctx->map_indirections_table);
> +    }
> +
>      ctx->obj = NULL;
> +    ctx->program_fd = -1;
> +    ctx->map_configuration = -1;
> +    ctx->map_toeplitz_key = -1;
> +    ctx->map_indirections_table = -1;
>  }
> diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> index bf3f2572c7c..ab08a7266d0 100644
> --- a/ebpf/ebpf_rss.h
> +++ b/ebpf/ebpf_rss.h
> @@ -20,6 +20,11 @@ struct EBPFRSSContext {
>      int map_configuration;
>      int map_toeplitz_key;
>      int map_indirections_table;
> +
> +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> +    void *mmap_configuration;
> +    void *mmap_toeplitz_key;
> +    void *mmap_indirections_table;
>  };
>
>  struct EBPFRSSConfig {
> --
> 2.41.0
>
Andrew Melnichenko Aug. 8, 2023, 11:15 p.m. UTC | #2
Hi all,

On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> >
> > Changed eBPF map updates through mmaped array.
> > Mmaped arrays provide direct access to map data.
> > It should omit using bpf_map_update_elem() call,
> > which may require capabilities that are not present.
> >
> > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > ---
> >  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> >  ebpf/ebpf_rss.h |   5 +++
> >  2 files changed, 99 insertions(+), 23 deletions(-)
> >
> > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > index cee658c158b..247f5eee1b6 100644
> > --- a/ebpf/ebpf_rss.c
> > +++ b/ebpf/ebpf_rss.c
> > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> >  {
> >      if (ctx != NULL) {
> >          ctx->obj = NULL;
> > +        ctx->program_fd = -1;
> > +        ctx->map_configuration = -1;
> > +        ctx->map_toeplitz_key = -1;
> > +        ctx->map_indirections_table = -1;
> > +
> > +        ctx->mmap_configuration = NULL;
> > +        ctx->mmap_toeplitz_key = NULL;
> > +        ctx->mmap_indirections_table = NULL;
> >      }
> >  }
> >
> >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> >  {
> > -    return ctx != NULL && ctx->obj != NULL;
> > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > +}
> > +
> > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > +{
> > +    if (!ebpf_rss_is_loaded(ctx)) {
> > +        return false;
> > +    }
> > +
> > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +                                   ctx->map_configuration, 0);
> > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> > +        return false;
> > +    }
> > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +                                   ctx->map_toeplitz_key, 0);
> > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > +        goto toeplitz_fail;
> > +    }
> > +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +                                   ctx->map_indirections_table, 0);
> > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> > +        goto indirection_fail;
> > +    }
> > +
> > +    return true;
> > +
> > +indirection_fail:
> > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > +toeplitz_fail:
> > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > +
> > +    ctx->mmap_configuration = NULL;
> > +    ctx->mmap_toeplitz_key = NULL;
> > +    ctx->mmap_indirections_table = NULL;
> > +    return false;
> > +}
> > +
> > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > +{
> > +    if (!ebpf_rss_is_loaded(ctx)) {
> > +        return;
> > +    }
> > +
> > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > +
> > +    ctx->mmap_configuration = NULL;
> > +    ctx->mmap_toeplitz_key = NULL;
> > +    ctx->mmap_indirections_table = NULL;
> >  }
> >
> >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> >  {
> >      struct rss_bpf *rss_bpf_ctx;
> >
> > -    if (ctx == NULL) {
> > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> >          return false;
> >      }
> >
> > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> >      ctx->map_toeplitz_key = bpf_map__fd(
> >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> >
> > +    if (!ebpf_rss_mmap(ctx)) {
> > +        goto error;
> > +    }
> > +
> >      return true;
> >  error:
> >      rss_bpf__destroy(rss_bpf_ctx);
> >      ctx->obj = NULL;
> > +    ctx->program_fd = -1;
> > +    ctx->map_configuration = -1;
> > +    ctx->map_toeplitz_key = -1;
> > +    ctx->map_indirections_table = -1;
> >
> >      return false;
> >  }
> > @@ -77,15 +149,11 @@ error:
> >  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> >                                  struct EBPFRSSConfig *config)
> >  {
> > -    uint32_t map_key = 0;
> > -
> >      if (!ebpf_rss_is_loaded(ctx)) {
> >          return false;
> >      }
> > -    if (bpf_map_update_elem(ctx->map_configuration,
> > -                            &map_key, config, 0) < 0) {
> > -        return false;
> > -    }
> > +
> > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> >      return true;
> >  }
> >
> > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
> >                                              uint16_t *indirections_table,
> >                                              size_t len)
> >  {
> > -    uint32_t i = 0;
> > -
> >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> >          return false;
> >      }
> >
> > -    for (; i < len; ++i) {
> > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > -                                indirections_table + i, 0) < 0) {
> > -            return false;
> > -        }
> > -    }
> > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > +            sizeof(*indirections_table) * len);
>
> As discussed, should we stick the compatibility on the host without
> bpf mmap support?
>
> If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> yes, we should track if the map is mmaped and switch between memcpy
> and syscall.
>
> Thanks

I've made some tests.
I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
1.0.1, 1.1.0, and last 1.2.0.
Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE map_flags.
Without this flag, the program can be loaded on every tested
kernel/libbpf configuration but Qemu can't mmap() the eBPF
fds(obviously).
Alternative to mmap() is bpf_map_update_elem() syscall/method which
would require capabilities for Qemu.
With this flag, kernel 5.4 + libbpf can't load eBPF object.
So, compatibility would require 2 different eBPF objects or some kind
of hack, also it would require additional capability for Qemu.
I don't think that we need checks for disabling eBPF RSS. It wouldn't
brake anything on the old kernel and after an update, it should work
ok.

>
> >      return true;
> >  }
> >
> >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> >                                       uint8_t *toeplitz_key)
> >  {
> > -    uint32_t map_key = 0;
> > -
> >      /* prepare toeplitz key */
> >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> >
> > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> >
> > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > -                            0) < 0) {
> > -        return false;
> > -    }
> > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> >      return true;
> >  }
> >
> > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> >          return;
> >      }
> >
> > -    rss_bpf__destroy(ctx->obj);
> > +    ebpf_rss_munmap(ctx);
> > +
> > +    if (ctx->obj) {
> > +        rss_bpf__destroy(ctx->obj);
> > +    } else {
> > +        close(ctx->program_fd);
> > +        close(ctx->map_configuration);
> > +        close(ctx->map_toeplitz_key);
> > +        close(ctx->map_indirections_table);
> > +    }
> > +
> >      ctx->obj = NULL;
> > +    ctx->program_fd = -1;
> > +    ctx->map_configuration = -1;
> > +    ctx->map_toeplitz_key = -1;
> > +    ctx->map_indirections_table = -1;
> >  }
> > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > index bf3f2572c7c..ab08a7266d0 100644
> > --- a/ebpf/ebpf_rss.h
> > +++ b/ebpf/ebpf_rss.h
> > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> >      int map_configuration;
> >      int map_toeplitz_key;
> >      int map_indirections_table;
> > +
> > +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> > +    void *mmap_configuration;
> > +    void *mmap_toeplitz_key;
> > +    void *mmap_indirections_table;
> >  };
> >
> >  struct EBPFRSSConfig {
> > --
> > 2.41.0
> >
>
Jason Wang Aug. 9, 2023, 2:21 a.m. UTC | #3
On Wed, Aug 9, 2023 at 7:15 AM Andrew Melnichenko <andrew@daynix.com> wrote:
>
> Hi all,
>
> On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > >
> > > Changed eBPF map updates through mmaped array.
> > > Mmaped arrays provide direct access to map data.
> > > It should omit using bpf_map_update_elem() call,
> > > which may require capabilities that are not present.
> > >
> > > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > ---
> > >  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> > >  ebpf/ebpf_rss.h |   5 +++
> > >  2 files changed, 99 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > > index cee658c158b..247f5eee1b6 100644
> > > --- a/ebpf/ebpf_rss.c
> > > +++ b/ebpf/ebpf_rss.c
> > > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > >  {
> > >      if (ctx != NULL) {
> > >          ctx->obj = NULL;
> > > +        ctx->program_fd = -1;
> > > +        ctx->map_configuration = -1;
> > > +        ctx->map_toeplitz_key = -1;
> > > +        ctx->map_indirections_table = -1;
> > > +
> > > +        ctx->mmap_configuration = NULL;
> > > +        ctx->mmap_toeplitz_key = NULL;
> > > +        ctx->mmap_indirections_table = NULL;
> > >      }
> > >  }
> > >
> > >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> > >  {
> > > -    return ctx != NULL && ctx->obj != NULL;
> > > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > > +}
> > > +
> > > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > > +{
> > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > +        return false;
> > > +    }
> > > +
> > > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > +                                   ctx->map_configuration, 0);
> > > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> > > +        return false;
> > > +    }
> > > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > +                                   ctx->map_toeplitz_key, 0);
> > > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > > +        goto toeplitz_fail;
> > > +    }
> > > +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > +                                   ctx->map_indirections_table, 0);
> > > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> > > +        goto indirection_fail;
> > > +    }
> > > +
> > > +    return true;
> > > +
> > > +indirection_fail:
> > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > +toeplitz_fail:
> > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > +
> > > +    ctx->mmap_configuration = NULL;
> > > +    ctx->mmap_toeplitz_key = NULL;
> > > +    ctx->mmap_indirections_table = NULL;
> > > +    return false;
> > > +}
> > > +
> > > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > > +{
> > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > +        return;
> > > +    }
> > > +
> > > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > +
> > > +    ctx->mmap_configuration = NULL;
> > > +    ctx->mmap_toeplitz_key = NULL;
> > > +    ctx->mmap_indirections_table = NULL;
> > >  }
> > >
> > >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > >  {
> > >      struct rss_bpf *rss_bpf_ctx;
> > >
> > > -    if (ctx == NULL) {
> > > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > >          return false;
> > >      }
> > >
> > > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > >      ctx->map_toeplitz_key = bpf_map__fd(
> > >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> > >
> > > +    if (!ebpf_rss_mmap(ctx)) {
> > > +        goto error;
> > > +    }
> > > +
> > >      return true;
> > >  error:
> > >      rss_bpf__destroy(rss_bpf_ctx);
> > >      ctx->obj = NULL;
> > > +    ctx->program_fd = -1;
> > > +    ctx->map_configuration = -1;
> > > +    ctx->map_toeplitz_key = -1;
> > > +    ctx->map_indirections_table = -1;
> > >
> > >      return false;
> > >  }
> > > @@ -77,15 +149,11 @@ error:
> > >  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > >                                  struct EBPFRSSConfig *config)
> > >  {
> > > -    uint32_t map_key = 0;
> > > -
> > >      if (!ebpf_rss_is_loaded(ctx)) {
> > >          return false;
> > >      }
> > > -    if (bpf_map_update_elem(ctx->map_configuration,
> > > -                            &map_key, config, 0) < 0) {
> > > -        return false;
> > > -    }
> > > +
> > > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> > >      return true;
> > >  }
> > >
> > > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
> > >                                              uint16_t *indirections_table,
> > >                                              size_t len)
> > >  {
> > > -    uint32_t i = 0;
> > > -
> > >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> > >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> > >          return false;
> > >      }
> > >
> > > -    for (; i < len; ++i) {
> > > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > > -                                indirections_table + i, 0) < 0) {
> > > -            return false;
> > > -        }
> > > -    }
> > > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > > +            sizeof(*indirections_table) * len);
> >
> > As discussed, should we stick the compatibility on the host without
> > bpf mmap support?
> >
> > If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> > yes, we should track if the map is mmaped and switch between memcpy
> > and syscall.
> >
> > Thanks
>
> I've made some tests.
> I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
> 1.0.1, 1.1.0, and last 1.2.0.

It looks to me we don't check the libbpf version now. Do we need to do
that (e.g the bpf mmap support for libbpf is not supported from the
start).

> Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE map_flags.
> Without this flag, the program can be loaded on every tested
> kernel/libbpf configuration but Qemu can't mmap() the eBPF
> fds(obviously).

Exactly, and that's why I'm asking whether or not we need to probe mmap here.

This is because, without this series, Qemu can work without BPF mmap
(but with privilege). And this doesn't work after this series.

> Alternative to mmap() is bpf_map_update_elem() syscall/method which
> would require capabilities for Qemu.

Yes, but that's a different "problem". I think we should keep Qemu
work in that setup. (privilege + syscall updating).

> With this flag, kernel 5.4 + libbpf can't load eBPF object.
> So, compatibility would require 2 different eBPF objects or some kind
> of hack, also it would require additional capability for Qemu.
> I don't think that we need checks for disabling eBPF RSS. It wouldn't
> brake anything on the old kernel and after an update, it should work
> ok.

Even on old kernel + old libbpf without bpf mmap support?

Thanks

>
> >
> > >      return true;
> > >  }
> > >
> > >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > >                                       uint8_t *toeplitz_key)
> > >  {
> > > -    uint32_t map_key = 0;
> > > -
> > >      /* prepare toeplitz key */
> > >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> > >
> > > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> > >
> > > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > > -                            0) < 0) {
> > > -        return false;
> > > -    }
> > > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > >      return true;
> > >  }
> > >
> > > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > >          return;
> > >      }
> > >
> > > -    rss_bpf__destroy(ctx->obj);
> > > +    ebpf_rss_munmap(ctx);
> > > +
> > > +    if (ctx->obj) {
> > > +        rss_bpf__destroy(ctx->obj);
> > > +    } else {
> > > +        close(ctx->program_fd);
> > > +        close(ctx->map_configuration);
> > > +        close(ctx->map_toeplitz_key);
> > > +        close(ctx->map_indirections_table);
> > > +    }
> > > +
> > >      ctx->obj = NULL;
> > > +    ctx->program_fd = -1;
> > > +    ctx->map_configuration = -1;
> > > +    ctx->map_toeplitz_key = -1;
> > > +    ctx->map_indirections_table = -1;
> > >  }
> > > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > > index bf3f2572c7c..ab08a7266d0 100644
> > > --- a/ebpf/ebpf_rss.h
> > > +++ b/ebpf/ebpf_rss.h
> > > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> > >      int map_configuration;
> > >      int map_toeplitz_key;
> > >      int map_indirections_table;
> > > +
> > > +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> > > +    void *mmap_configuration;
> > > +    void *mmap_toeplitz_key;
> > > +    void *mmap_indirections_table;
> > >  };
> > >
> > >  struct EBPFRSSConfig {
> > > --
> > > 2.41.0
> > >
> >
>
Andrew Melnichenko Aug. 14, 2023, 8:36 a.m. UTC | #4
Hi, all.

I've researched an issue a bit. And what can we do?
In the case of an "old" kernel 5.4, we need to load RSS eBPF without
BPF_F_MAPPABLE
and use bpf syscall to update the maps. This requires additional capabilities,
and the libvirtd will never give any capabilities to Qemu.
So, the only case for "fallback" is running Qemu manually with
capabilities(or with root) on kernel 5.4.

We can add hack/fallback to RSS ebpf loading routine with additional
checks and modify for BPF_F_MAPPABLE.
And we can add a fallback for mmap/syscall ebpf access.

The problem is that we need kernel 5.5 with BPF_F_MAPPABLE in headers
to compile Qemu with fallback,
or move macro to the Qemu headers.

It can be implemented something like this:
RSS eBPF open/load:
 * open the skeleton.
 * load the skeleton as is - it would fail because of an unknown BPF_F_MAPPABLE.
 * hack/modify map_flags for skeleton and try to reload.
RSS eBPF map update(this is straightforward):
 * check the mem pointer if null, use bpf syscall

The advantage of hacks in Qemu is that we are aware of the eBPF context.
I suggest creating different series of patches that would implement
the hack/fallback,
If we really want to support eBPF on old kernels.

On Wed, Aug 9, 2023 at 5:21 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Wed, Aug 9, 2023 at 7:15 AM Andrew Melnichenko <andrew@daynix.com> wrote:
> >
> > Hi all,
> >
> > On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > > >
> > > > Changed eBPF map updates through mmaped array.
> > > > Mmaped arrays provide direct access to map data.
> > > > It should omit using bpf_map_update_elem() call,
> > > > which may require capabilities that are not present.
> > > >
> > > > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > > ---
> > > >  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> > > >  ebpf/ebpf_rss.h |   5 +++
> > > >  2 files changed, 99 insertions(+), 23 deletions(-)
> > > >
> > > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > > > index cee658c158b..247f5eee1b6 100644
> > > > --- a/ebpf/ebpf_rss.c
> > > > +++ b/ebpf/ebpf_rss.c
> > > > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > > >  {
> > > >      if (ctx != NULL) {
> > > >          ctx->obj = NULL;
> > > > +        ctx->program_fd = -1;
> > > > +        ctx->map_configuration = -1;
> > > > +        ctx->map_toeplitz_key = -1;
> > > > +        ctx->map_indirections_table = -1;
> > > > +
> > > > +        ctx->mmap_configuration = NULL;
> > > > +        ctx->mmap_toeplitz_key = NULL;
> > > > +        ctx->mmap_indirections_table = NULL;
> > > >      }
> > > >  }
> > > >
> > > >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> > > >  {
> > > > -    return ctx != NULL && ctx->obj != NULL;
> > > > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > > > +}
> > > > +
> > > > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > > > +{
> > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > +                                   ctx->map_configuration, 0);
> > > > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> > > > +        return false;
> > > > +    }
> > > > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > +                                   ctx->map_toeplitz_key, 0);
> > > > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > > > +        goto toeplitz_fail;
> > > > +    }
> > > > +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > +                                   ctx->map_indirections_table, 0);
> > > > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> > > > +        goto indirection_fail;
> > > > +    }
> > > > +
> > > > +    return true;
> > > > +
> > > > +indirection_fail:
> > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > +toeplitz_fail:
> > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > +
> > > > +    ctx->mmap_configuration = NULL;
> > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > +    ctx->mmap_indirections_table = NULL;
> > > > +    return false;
> > > > +}
> > > > +
> > > > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > > > +{
> > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > +
> > > > +    ctx->mmap_configuration = NULL;
> > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > +    ctx->mmap_indirections_table = NULL;
> > > >  }
> > > >
> > > >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > >  {
> > > >      struct rss_bpf *rss_bpf_ctx;
> > > >
> > > > -    if (ctx == NULL) {
> > > > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > > >          return false;
> > > >      }
> > > >
> > > > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > >      ctx->map_toeplitz_key = bpf_map__fd(
> > > >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> > > >
> > > > +    if (!ebpf_rss_mmap(ctx)) {
> > > > +        goto error;
> > > > +    }
> > > > +
> > > >      return true;
> > > >  error:
> > > >      rss_bpf__destroy(rss_bpf_ctx);
> > > >      ctx->obj = NULL;
> > > > +    ctx->program_fd = -1;
> > > > +    ctx->map_configuration = -1;
> > > > +    ctx->map_toeplitz_key = -1;
> > > > +    ctx->map_indirections_table = -1;
> > > >
> > > >      return false;
> > > >  }
> > > > @@ -77,15 +149,11 @@ error:
> > > >  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > > >                                  struct EBPFRSSConfig *config)
> > > >  {
> > > > -    uint32_t map_key = 0;
> > > > -
> > > >      if (!ebpf_rss_is_loaded(ctx)) {
> > > >          return false;
> > > >      }
> > > > -    if (bpf_map_update_elem(ctx->map_configuration,
> > > > -                            &map_key, config, 0) < 0) {
> > > > -        return false;
> > > > -    }
> > > > +
> > > > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> > > >      return true;
> > > >  }
> > > >
> > > > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
> > > >                                              uint16_t *indirections_table,
> > > >                                              size_t len)
> > > >  {
> > > > -    uint32_t i = 0;
> > > > -
> > > >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> > > >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> > > >          return false;
> > > >      }
> > > >
> > > > -    for (; i < len; ++i) {
> > > > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > > > -                                indirections_table + i, 0) < 0) {
> > > > -            return false;
> > > > -        }
> > > > -    }
> > > > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > > > +            sizeof(*indirections_table) * len);
> > >
> > > As discussed, should we stick the compatibility on the host without
> > > bpf mmap support?
> > >
> > > If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> > > yes, we should track if the map is mmaped and switch between memcpy
> > > and syscall.
> > >
> > > Thanks
> >
> > I've made some tests.
> > I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
> > 1.0.1, 1.1.0, and last 1.2.0.
>
> It looks to me we don't check the libbpf version now. Do we need to do
> that (e.g the bpf mmap support for libbpf is not supported from the
> start).
>
> > Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE map_flags.
> > Without this flag, the program can be loaded on every tested
> > kernel/libbpf configuration but Qemu can't mmap() the eBPF
> > fds(obviously).
>
> Exactly, and that's why I'm asking whether or not we need to probe mmap here.
>
> This is because, without this series, Qemu can work without BPF mmap
> (but with privilege). And this doesn't work after this series.
>
> > Alternative to mmap() is bpf_map_update_elem() syscall/method which
> > would require capabilities for Qemu.
>
> Yes, but that's a different "problem". I think we should keep Qemu
> work in that setup. (privilege + syscall updating).
>
> > With this flag, kernel 5.4 + libbpf can't load eBPF object.
> > So, compatibility would require 2 different eBPF objects or some kind
> > of hack, also it would require additional capability for Qemu.
> > I don't think that we need checks for disabling eBPF RSS. It wouldn't
> > brake anything on the old kernel and after an update, it should work
> > ok.
>
> Even on old kernel + old libbpf without bpf mmap support?
>
> Thanks
>
> >
> > >
> > > >      return true;
> > > >  }
> > > >
> > > >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > >                                       uint8_t *toeplitz_key)
> > > >  {
> > > > -    uint32_t map_key = 0;
> > > > -
> > > >      /* prepare toeplitz key */
> > > >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> > > >
> > > > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> > > >
> > > > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > > > -                            0) < 0) {
> > > > -        return false;
> > > > -    }
> > > > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > >      return true;
> > > >  }
> > > >
> > > > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > > >          return;
> > > >      }
> > > >
> > > > -    rss_bpf__destroy(ctx->obj);
> > > > +    ebpf_rss_munmap(ctx);
> > > > +
> > > > +    if (ctx->obj) {
> > > > +        rss_bpf__destroy(ctx->obj);
> > > > +    } else {
> > > > +        close(ctx->program_fd);
> > > > +        close(ctx->map_configuration);
> > > > +        close(ctx->map_toeplitz_key);
> > > > +        close(ctx->map_indirections_table);
> > > > +    }
> > > > +
> > > >      ctx->obj = NULL;
> > > > +    ctx->program_fd = -1;
> > > > +    ctx->map_configuration = -1;
> > > > +    ctx->map_toeplitz_key = -1;
> > > > +    ctx->map_indirections_table = -1;
> > > >  }
> > > > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > > > index bf3f2572c7c..ab08a7266d0 100644
> > > > --- a/ebpf/ebpf_rss.h
> > > > +++ b/ebpf/ebpf_rss.h
> > > > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> > > >      int map_configuration;
> > > >      int map_toeplitz_key;
> > > >      int map_indirections_table;
> > > > +
> > > > +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> > > > +    void *mmap_configuration;
> > > > +    void *mmap_toeplitz_key;
> > > > +    void *mmap_indirections_table;
> > > >  };
> > > >
> > > >  struct EBPFRSSConfig {
> > > > --
> > > > 2.41.0
> > > >
> > >
> >
>
Jason Wang Aug. 16, 2023, 1:16 a.m. UTC | #5
On Mon, Aug 14, 2023 at 4:36 PM Andrew Melnichenko <andrew@daynix.com> wrote:
>
> Hi, all.
>
> I've researched an issue a bit. And what can we do?
> In the case of an "old" kernel 5.4, we need to load RSS eBPF without
> BPF_F_MAPPABLE
> and use bpf syscall to update the maps. This requires additional capabilities,
> and the libvirtd will never give any capabilities to Qemu.
> So, the only case for "fallback" is running Qemu manually with
> capabilities(or with root) on kernel 5.4.
>
> We can add hack/fallback to RSS ebpf loading routine with additional
> checks and modify for BPF_F_MAPPABLE.
> And we can add a fallback for mmap/syscall ebpf access.
>
> The problem is that we need kernel 5.5 with BPF_F_MAPPABLE in headers
> to compile Qemu with fallback,
> or move macro to the Qemu headers.
>
> It can be implemented something like this:
> RSS eBPF open/load:
>  * open the skeleton.
>  * load the skeleton as is - it would fail because of an unknown BPF_F_MAPPABLE.
>  * hack/modify map_flags for skeleton and try to reload.
> RSS eBPF map update(this is straightforward):
>  * check the mem pointer if null, use bpf syscall
>
> The advantage of hacks in Qemu is that we are aware of the eBPF context.
> I suggest creating different series of patches that would implement
> the hack/fallback,
> If we really want to support eBPF on old kernels.

So I think the simplest way is to disable eBPF RSS support on old
kernels? (e.g during the configure)

Thanks

>
> On Wed, Aug 9, 2023 at 5:21 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Wed, Aug 9, 2023 at 7:15 AM Andrew Melnichenko <andrew@daynix.com> wrote:
> > >
> > > Hi all,
> > >
> > > On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > > > >
> > > > > Changed eBPF map updates through mmaped array.
> > > > > Mmaped arrays provide direct access to map data.
> > > > > It should omit using bpf_map_update_elem() call,
> > > > > which may require capabilities that are not present.
> > > > >
> > > > > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > > > ---
> > > > >  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> > > > >  ebpf/ebpf_rss.h |   5 +++
> > > > >  2 files changed, 99 insertions(+), 23 deletions(-)
> > > > >
> > > > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > > > > index cee658c158b..247f5eee1b6 100644
> > > > > --- a/ebpf/ebpf_rss.c
> > > > > +++ b/ebpf/ebpf_rss.c
> > > > > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > > > >  {
> > > > >      if (ctx != NULL) {
> > > > >          ctx->obj = NULL;
> > > > > +        ctx->program_fd = -1;
> > > > > +        ctx->map_configuration = -1;
> > > > > +        ctx->map_toeplitz_key = -1;
> > > > > +        ctx->map_indirections_table = -1;
> > > > > +
> > > > > +        ctx->mmap_configuration = NULL;
> > > > > +        ctx->mmap_toeplitz_key = NULL;
> > > > > +        ctx->mmap_indirections_table = NULL;
> > > > >      }
> > > > >  }
> > > > >
> > > > >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> > > > >  {
> > > > > -    return ctx != NULL && ctx->obj != NULL;
> > > > > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > > > > +}
> > > > > +
> > > > > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > > > > +{
> > > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > > +        return false;
> > > > > +    }
> > > > > +
> > > > > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > +                                   ctx->map_configuration, 0);
> > > > > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> > > > > +        return false;
> > > > > +    }
> > > > > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > +                                   ctx->map_toeplitz_key, 0);
> > > > > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > > > > +        goto toeplitz_fail;
> > > > > +    }
> > > > > +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > +                                   ctx->map_indirections_table, 0);
> > > > > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> > > > > +        goto indirection_fail;
> > > > > +    }
> > > > > +
> > > > > +    return true;
> > > > > +
> > > > > +indirection_fail:
> > > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > > +toeplitz_fail:
> > > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > > +
> > > > > +    ctx->mmap_configuration = NULL;
> > > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > > +    ctx->mmap_indirections_table = NULL;
> > > > > +    return false;
> > > > > +}
> > > > > +
> > > > > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > > > > +{
> > > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > > +        return;
> > > > > +    }
> > > > > +
> > > > > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > > +
> > > > > +    ctx->mmap_configuration = NULL;
> > > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > > +    ctx->mmap_indirections_table = NULL;
> > > > >  }
> > > > >
> > > > >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > > >  {
> > > > >      struct rss_bpf *rss_bpf_ctx;
> > > > >
> > > > > -    if (ctx == NULL) {
> > > > > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > > > >          return false;
> > > > >      }
> > > > >
> > > > > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > > >      ctx->map_toeplitz_key = bpf_map__fd(
> > > > >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> > > > >
> > > > > +    if (!ebpf_rss_mmap(ctx)) {
> > > > > +        goto error;
> > > > > +    }
> > > > > +
> > > > >      return true;
> > > > >  error:
> > > > >      rss_bpf__destroy(rss_bpf_ctx);
> > > > >      ctx->obj = NULL;
> > > > > +    ctx->program_fd = -1;
> > > > > +    ctx->map_configuration = -1;
> > > > > +    ctx->map_toeplitz_key = -1;
> > > > > +    ctx->map_indirections_table = -1;
> > > > >
> > > > >      return false;
> > > > >  }
> > > > > @@ -77,15 +149,11 @@ error:
> > > > >  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > > > >                                  struct EBPFRSSConfig *config)
> > > > >  {
> > > > > -    uint32_t map_key = 0;
> > > > > -
> > > > >      if (!ebpf_rss_is_loaded(ctx)) {
> > > > >          return false;
> > > > >      }
> > > > > -    if (bpf_map_update_elem(ctx->map_configuration,
> > > > > -                            &map_key, config, 0) < 0) {
> > > > > -        return false;
> > > > > -    }
> > > > > +
> > > > > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> > > > >      return true;
> > > > >  }
> > > > >
> > > > > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
> > > > >                                              uint16_t *indirections_table,
> > > > >                                              size_t len)
> > > > >  {
> > > > > -    uint32_t i = 0;
> > > > > -
> > > > >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> > > > >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> > > > >          return false;
> > > > >      }
> > > > >
> > > > > -    for (; i < len; ++i) {
> > > > > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > > > > -                                indirections_table + i, 0) < 0) {
> > > > > -            return false;
> > > > > -        }
> > > > > -    }
> > > > > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > > > > +            sizeof(*indirections_table) * len);
> > > >
> > > > As discussed, should we stick the compatibility on the host without
> > > > bpf mmap support?
> > > >
> > > > If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> > > > yes, we should track if the map is mmaped and switch between memcpy
> > > > and syscall.
> > > >
> > > > Thanks
> > >
> > > I've made some tests.
> > > I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
> > > 1.0.1, 1.1.0, and last 1.2.0.
> >
> > It looks to me we don't check the libbpf version now. Do we need to do
> > that (e.g the bpf mmap support for libbpf is not supported from the
> > start).
> >
> > > Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE map_flags.
> > > Without this flag, the program can be loaded on every tested
> > > kernel/libbpf configuration but Qemu can't mmap() the eBPF
> > > fds(obviously).
> >
> > Exactly, and that's why I'm asking whether or not we need to probe mmap here.
> >
> > This is because, without this series, Qemu can work without BPF mmap
> > (but with privilege). And this doesn't work after this series.
> >
> > > Alternative to mmap() is bpf_map_update_elem() syscall/method which
> > > would require capabilities for Qemu.
> >
> > Yes, but that's a different "problem". I think we should keep Qemu
> > work in that setup. (privilege + syscall updating).
> >
> > > With this flag, kernel 5.4 + libbpf can't load eBPF object.
> > > So, compatibility would require 2 different eBPF objects or some kind
> > > of hack, also it would require additional capability for Qemu.
> > > I don't think that we need checks for disabling eBPF RSS. It wouldn't
> > > brake anything on the old kernel and after an update, it should work
> > > ok.
> >
> > Even on old kernel + old libbpf without bpf mmap support?
> >
> > Thanks
> >
> > >
> > > >
> > > > >      return true;
> > > > >  }
> > > > >
> > > > >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > > >                                       uint8_t *toeplitz_key)
> > > > >  {
> > > > > -    uint32_t map_key = 0;
> > > > > -
> > > > >      /* prepare toeplitz key */
> > > > >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> > > > >
> > > > > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > > >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > > >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> > > > >
> > > > > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > > > > -                            0) < 0) {
> > > > > -        return false;
> > > > > -    }
> > > > > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > > >      return true;
> > > > >  }
> > > > >
> > > > > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > > > >          return;
> > > > >      }
> > > > >
> > > > > -    rss_bpf__destroy(ctx->obj);
> > > > > +    ebpf_rss_munmap(ctx);
> > > > > +
> > > > > +    if (ctx->obj) {
> > > > > +        rss_bpf__destroy(ctx->obj);
> > > > > +    } else {
> > > > > +        close(ctx->program_fd);
> > > > > +        close(ctx->map_configuration);
> > > > > +        close(ctx->map_toeplitz_key);
> > > > > +        close(ctx->map_indirections_table);
> > > > > +    }
> > > > > +
> > > > >      ctx->obj = NULL;
> > > > > +    ctx->program_fd = -1;
> > > > > +    ctx->map_configuration = -1;
> > > > > +    ctx->map_toeplitz_key = -1;
> > > > > +    ctx->map_indirections_table = -1;
> > > > >  }
> > > > > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > > > > index bf3f2572c7c..ab08a7266d0 100644
> > > > > --- a/ebpf/ebpf_rss.h
> > > > > +++ b/ebpf/ebpf_rss.h
> > > > > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> > > > >      int map_configuration;
> > > > >      int map_toeplitz_key;
> > > > >      int map_indirections_table;
> > > > > +
> > > > > +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> > > > > +    void *mmap_configuration;
> > > > > +    void *mmap_toeplitz_key;
> > > > > +    void *mmap_indirections_table;
> > > > >  };
> > > > >
> > > > >  struct EBPFRSSConfig {
> > > > > --
> > > > > 2.41.0
> > > > >
> > > >
> > >
> >
>
Andrew Melnichenko Aug. 18, 2023, 2:07 a.m. UTC | #6
Hi all,

On Wed, Aug 16, 2023 at 4:16 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Aug 14, 2023 at 4:36 PM Andrew Melnichenko <andrew@daynix.com> wrote:
> >
> > Hi, all.
> >
> > I've researched an issue a bit. And what can we do?
> > In the case of an "old" kernel 5.4, we need to load RSS eBPF without
> > BPF_F_MAPPABLE
> > and use bpf syscall to update the maps. This requires additional capabilities,
> > and the libvirtd will never give any capabilities to Qemu.
> > So, the only case for "fallback" is running Qemu manually with
> > capabilities(or with root) on kernel 5.4.
> >
> > We can add hack/fallback to RSS ebpf loading routine with additional
> > checks and modify for BPF_F_MAPPABLE.
> > And we can add a fallback for mmap/syscall ebpf access.
> >
> > The problem is that we need kernel 5.5 with BPF_F_MAPPABLE in headers
> > to compile Qemu with fallback,
> > or move macro to the Qemu headers.
> >
> > It can be implemented something like this:
> > RSS eBPF open/load:
> >  * open the skeleton.
> >  * load the skeleton as is - it would fail because of an unknown BPF_F_MAPPABLE.
> >  * hack/modify map_flags for skeleton and try to reload.
> > RSS eBPF map update(this is straightforward):
> >  * check the mem pointer if null, use bpf syscall
> >
> > The advantage of hacks in Qemu is that we are aware of the eBPF context.
> > I suggest creating different series of patches that would implement
> > the hack/fallback,
> > If we really want to support eBPF on old kernels.
>
> So I think the simplest way is to disable eBPF RSS support on old
> kernels? (e.g during the configure)
>
> Thanks

I think it's possible to check BPF_F_MAPPABLE flag during configuration.
The absence of this flag would indicate that the kernel probably is
old on the build machine.

It wouldn't solve the issue with a "new" environment and old
kernel(g.e. fallback kernel).
Or "old" environment and new kernel(g.e. self-build one).
Also, the environment on the build maintainer's machine and end-up
system may be different
(assuming that the build machine is always up to date).
On the other hand, there is already a fallback to "in-qemu" RSS if eBPF fails.

If it requires, we can add the check, I don't see that it solves much
without hack.
It will be required if we add mmap/syscall hack for element update.

>
> >
> > On Wed, Aug 9, 2023 at 5:21 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Wed, Aug 9, 2023 at 7:15 AM Andrew Melnichenko <andrew@daynix.com> wrote:
> > > >
> > > > Hi all,
> > > >
> > > > On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > > > > >
> > > > > > Changed eBPF map updates through mmaped array.
> > > > > > Mmaped arrays provide direct access to map data.
> > > > > > It should omit using bpf_map_update_elem() call,
> > > > > > which may require capabilities that are not present.
> > > > > >
> > > > > > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > > > > ---
> > > > > >  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> > > > > >  ebpf/ebpf_rss.h |   5 +++
> > > > > >  2 files changed, 99 insertions(+), 23 deletions(-)
> > > > > >
> > > > > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > > > > > index cee658c158b..247f5eee1b6 100644
> > > > > > --- a/ebpf/ebpf_rss.c
> > > > > > +++ b/ebpf/ebpf_rss.c
> > > > > > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > > > > >  {
> > > > > >      if (ctx != NULL) {
> > > > > >          ctx->obj = NULL;
> > > > > > +        ctx->program_fd = -1;
> > > > > > +        ctx->map_configuration = -1;
> > > > > > +        ctx->map_toeplitz_key = -1;
> > > > > > +        ctx->map_indirections_table = -1;
> > > > > > +
> > > > > > +        ctx->mmap_configuration = NULL;
> > > > > > +        ctx->mmap_toeplitz_key = NULL;
> > > > > > +        ctx->mmap_indirections_table = NULL;
> > > > > >      }
> > > > > >  }
> > > > > >
> > > > > >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> > > > > >  {
> > > > > > -    return ctx != NULL && ctx->obj != NULL;
> > > > > > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > > > > > +}
> > > > > > +
> > > > > > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > > > > > +{
> > > > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > > > +        return false;
> > > > > > +    }
> > > > > > +
> > > > > > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > > +                                   ctx->map_configuration, 0);
> > > > > > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> > > > > > +        return false;
> > > > > > +    }
> > > > > > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > > +                                   ctx->map_toeplitz_key, 0);
> > > > > > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > > > > > +        goto toeplitz_fail;
> > > > > > +    }
> > > > > > +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > > +                                   ctx->map_indirections_table, 0);
> > > > > > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> > > > > > +        goto indirection_fail;
> > > > > > +    }
> > > > > > +
> > > > > > +    return true;
> > > > > > +
> > > > > > +indirection_fail:
> > > > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > > > +toeplitz_fail:
> > > > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > > > +
> > > > > > +    ctx->mmap_configuration = NULL;
> > > > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > > > +    ctx->mmap_indirections_table = NULL;
> > > > > > +    return false;
> > > > > > +}
> > > > > > +
> > > > > > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > > > > > +{
> > > > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +
> > > > > > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > > > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > > > +
> > > > > > +    ctx->mmap_configuration = NULL;
> > > > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > > > +    ctx->mmap_indirections_table = NULL;
> > > > > >  }
> > > > > >
> > > > > >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > > > >  {
> > > > > >      struct rss_bpf *rss_bpf_ctx;
> > > > > >
> > > > > > -    if (ctx == NULL) {
> > > > > > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > > > > >          return false;
> > > > > >      }
> > > > > >
> > > > > > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > > > >      ctx->map_toeplitz_key = bpf_map__fd(
> > > > > >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> > > > > >
> > > > > > +    if (!ebpf_rss_mmap(ctx)) {
> > > > > > +        goto error;
> > > > > > +    }
> > > > > > +
> > > > > >      return true;
> > > > > >  error:
> > > > > >      rss_bpf__destroy(rss_bpf_ctx);
> > > > > >      ctx->obj = NULL;
> > > > > > +    ctx->program_fd = -1;
> > > > > > +    ctx->map_configuration = -1;
> > > > > > +    ctx->map_toeplitz_key = -1;
> > > > > > +    ctx->map_indirections_table = -1;
> > > > > >
> > > > > >      return false;
> > > > > >  }
> > > > > > @@ -77,15 +149,11 @@ error:
> > > > > >  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > > > > >                                  struct EBPFRSSConfig *config)
> > > > > >  {
> > > > > > -    uint32_t map_key = 0;
> > > > > > -
> > > > > >      if (!ebpf_rss_is_loaded(ctx)) {
> > > > > >          return false;
> > > > > >      }
> > > > > > -    if (bpf_map_update_elem(ctx->map_configuration,
> > > > > > -                            &map_key, config, 0) < 0) {
> > > > > > -        return false;
> > > > > > -    }
> > > > > > +
> > > > > > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> > > > > >      return true;
> > > > > >  }
> > > > > >
> > > > > > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
> > > > > >                                              uint16_t *indirections_table,
> > > > > >                                              size_t len)
> > > > > >  {
> > > > > > -    uint32_t i = 0;
> > > > > > -
> > > > > >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> > > > > >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> > > > > >          return false;
> > > > > >      }
> > > > > >
> > > > > > -    for (; i < len; ++i) {
> > > > > > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > > > > > -                                indirections_table + i, 0) < 0) {
> > > > > > -            return false;
> > > > > > -        }
> > > > > > -    }
> > > > > > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > > > > > +            sizeof(*indirections_table) * len);
> > > > >
> > > > > As discussed, should we stick the compatibility on the host without
> > > > > bpf mmap support?
> > > > >
> > > > > If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> > > > > yes, we should track if the map is mmaped and switch between memcpy
> > > > > and syscall.
> > > > >
> > > > > Thanks
> > > >
> > > > I've made some tests.
> > > > I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
> > > > 1.0.1, 1.1.0, and last 1.2.0.
> > >
> > > It looks to me we don't check the libbpf version now. Do we need to do
> > > that (e.g the bpf mmap support for libbpf is not supported from the
> > > start).
> > >
> > > > Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE map_flags.
> > > > Without this flag, the program can be loaded on every tested
> > > > kernel/libbpf configuration but Qemu can't mmap() the eBPF
> > > > fds(obviously).
> > >
> > > Exactly, and that's why I'm asking whether or not we need to probe mmap here.
> > >
> > > This is because, without this series, Qemu can work without BPF mmap
> > > (but with privilege). And this doesn't work after this series.
> > >
> > > > Alternative to mmap() is bpf_map_update_elem() syscall/method which
> > > > would require capabilities for Qemu.
> > >
> > > Yes, but that's a different "problem". I think we should keep Qemu
> > > work in that setup. (privilege + syscall updating).
> > >
> > > > With this flag, kernel 5.4 + libbpf can't load eBPF object.
> > > > So, compatibility would require 2 different eBPF objects or some kind
> > > > of hack, also it would require additional capability for Qemu.
> > > > I don't think that we need checks for disabling eBPF RSS. It wouldn't
> > > > brake anything on the old kernel and after an update, it should work
> > > > ok.
> > >
> > > Even on old kernel + old libbpf without bpf mmap support?
> > >
> > > Thanks
> > >
> > > >
> > > > >
> > > > > >      return true;
> > > > > >  }
> > > > > >
> > > > > >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > > > >                                       uint8_t *toeplitz_key)
> > > > > >  {
> > > > > > -    uint32_t map_key = 0;
> > > > > > -
> > > > > >      /* prepare toeplitz key */
> > > > > >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> > > > > >
> > > > > > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > > > >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > > > >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> > > > > >
> > > > > > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > > > > > -                            0) < 0) {
> > > > > > -        return false;
> > > > > > -    }
> > > > > > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > > > >      return true;
> > > > > >  }
> > > > > >
> > > > > > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > > > > >          return;
> > > > > >      }
> > > > > >
> > > > > > -    rss_bpf__destroy(ctx->obj);
> > > > > > +    ebpf_rss_munmap(ctx);
> > > > > > +
> > > > > > +    if (ctx->obj) {
> > > > > > +        rss_bpf__destroy(ctx->obj);
> > > > > > +    } else {
> > > > > > +        close(ctx->program_fd);
> > > > > > +        close(ctx->map_configuration);
> > > > > > +        close(ctx->map_toeplitz_key);
> > > > > > +        close(ctx->map_indirections_table);
> > > > > > +    }
> > > > > > +
> > > > > >      ctx->obj = NULL;
> > > > > > +    ctx->program_fd = -1;
> > > > > > +    ctx->map_configuration = -1;
> > > > > > +    ctx->map_toeplitz_key = -1;
> > > > > > +    ctx->map_indirections_table = -1;
> > > > > >  }
> > > > > > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > > > > > index bf3f2572c7c..ab08a7266d0 100644
> > > > > > --- a/ebpf/ebpf_rss.h
> > > > > > +++ b/ebpf/ebpf_rss.h
> > > > > > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> > > > > >      int map_configuration;
> > > > > >      int map_toeplitz_key;
> > > > > >      int map_indirections_table;
> > > > > > +
> > > > > > +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> > > > > > +    void *mmap_configuration;
> > > > > > +    void *mmap_toeplitz_key;
> > > > > > +    void *mmap_indirections_table;
> > > > > >  };
> > > > > >
> > > > > >  struct EBPFRSSConfig {
> > > > > > --
> > > > > > 2.41.0
> > > > > >
> > > > >
> > > >
> > >
> >
>
Jason Wang Aug. 21, 2023, 3:57 a.m. UTC | #7
On Fri, Aug 18, 2023 at 10:08 AM Andrew Melnichenko <andrew@daynix.com> wrote:
>
> Hi all,
>
> On Wed, Aug 16, 2023 at 4:16 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Aug 14, 2023 at 4:36 PM Andrew Melnichenko <andrew@daynix.com> wrote:
> > >
> > > Hi, all.
> > >
> > > I've researched an issue a bit. And what can we do?
> > > In the case of an "old" kernel 5.4, we need to load RSS eBPF without
> > > BPF_F_MAPPABLE
> > > and use bpf syscall to update the maps. This requires additional capabilities,
> > > and the libvirtd will never give any capabilities to Qemu.
> > > So, the only case for "fallback" is running Qemu manually with
> > > capabilities(or with root) on kernel 5.4.
> > >
> > > We can add hack/fallback to RSS ebpf loading routine with additional
> > > checks and modify for BPF_F_MAPPABLE.
> > > And we can add a fallback for mmap/syscall ebpf access.
> > >
> > > The problem is that we need kernel 5.5 with BPF_F_MAPPABLE in headers
> > > to compile Qemu with fallback,
> > > or move macro to the Qemu headers.
> > >
> > > It can be implemented something like this:
> > > RSS eBPF open/load:
> > >  * open the skeleton.
> > >  * load the skeleton as is - it would fail because of an unknown BPF_F_MAPPABLE.
> > >  * hack/modify map_flags for skeleton and try to reload.
> > > RSS eBPF map update(this is straightforward):
> > >  * check the mem pointer if null, use bpf syscall
> > >
> > > The advantage of hacks in Qemu is that we are aware of the eBPF context.
> > > I suggest creating different series of patches that would implement
> > > the hack/fallback,
> > > If we really want to support eBPF on old kernels.
> >
> > So I think the simplest way is to disable eBPF RSS support on old
> > kernels? (e.g during the configure)
> >
> > Thanks
>
> I think it's possible to check BPF_F_MAPPABLE flag during configuration.
> The absence of this flag would indicate that the kernel probably is
> old on the build machine.
>
> It wouldn't solve the issue with a "new" environment and old
> kernel(g.e. fallback kernel).

eBPF RSS support will just be disabled, this seems to be fine.

> Or "old" environment and new kernel(g.e. self-build one).

I don't see what's the issue in this case.

> Also, the environment on the build maintainer's machine and end-up
> system may be different
> (assuming that the build machine is always up to date).

Yes, but this "issue" is not specific to RSS eBPF?

> On the other hand, there is already a fallback to "in-qemu" RSS if eBPF fails.

I see, but the question is what's the implication for the mgmt layer
e.g libvirt in this case?

Thanks

>
> If it requires, we can add the check, I don't see that it solves much
> without hack.
> It will be required if we add mmap/syscall hack for element update.
>
> >
> > >
> > > On Wed, Aug 9, 2023 at 5:21 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Wed, Aug 9, 2023 at 7:15 AM Andrew Melnichenko <andrew@daynix.com> wrote:
> > > > >
> > > > > Hi all,
> > > > >
> > > > > On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > > > > > >
> > > > > > > Changed eBPF map updates through mmaped array.
> > > > > > > Mmaped arrays provide direct access to map data.
> > > > > > > It should omit using bpf_map_update_elem() call,
> > > > > > > which may require capabilities that are not present.
> > > > > > >
> > > > > > > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > > > > > ---
> > > > > > >  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> > > > > > >  ebpf/ebpf_rss.h |   5 +++
> > > > > > >  2 files changed, 99 insertions(+), 23 deletions(-)
> > > > > > >
> > > > > > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > > > > > > index cee658c158b..247f5eee1b6 100644
> > > > > > > --- a/ebpf/ebpf_rss.c
> > > > > > > +++ b/ebpf/ebpf_rss.c
> > > > > > > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > > > > > >  {
> > > > > > >      if (ctx != NULL) {
> > > > > > >          ctx->obj = NULL;
> > > > > > > +        ctx->program_fd = -1;
> > > > > > > +        ctx->map_configuration = -1;
> > > > > > > +        ctx->map_toeplitz_key = -1;
> > > > > > > +        ctx->map_indirections_table = -1;
> > > > > > > +
> > > > > > > +        ctx->mmap_configuration = NULL;
> > > > > > > +        ctx->mmap_toeplitz_key = NULL;
> > > > > > > +        ctx->mmap_indirections_table = NULL;
> > > > > > >      }
> > > > > > >  }
> > > > > > >
> > > > > > >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> > > > > > >  {
> > > > > > > -    return ctx != NULL && ctx->obj != NULL;
> > > > > > > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > > > > > > +{
> > > > > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > > > > +        return false;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > > > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > > > +                                   ctx->map_configuration, 0);
> > > > > > > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > > > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> > > > > > > +        return false;
> > > > > > > +    }
> > > > > > > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > > > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > > > +                                   ctx->map_toeplitz_key, 0);
> > > > > > > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > > > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > > > > > > +        goto toeplitz_fail;
> > > > > > > +    }
> > > > > > > +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > > > > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > > > > +                                   ctx->map_indirections_table, 0);
> > > > > > > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > > > > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> > > > > > > +        goto indirection_fail;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    return true;
> > > > > > > +
> > > > > > > +indirection_fail:
> > > > > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > > > > +toeplitz_fail:
> > > > > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > > > > +
> > > > > > > +    ctx->mmap_configuration = NULL;
> > > > > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > > > > +    ctx->mmap_indirections_table = NULL;
> > > > > > > +    return false;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > > > > > > +{
> > > > > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > > > > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > > > > +
> > > > > > > +    ctx->mmap_configuration = NULL;
> > > > > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > > > > +    ctx->mmap_indirections_table = NULL;
> > > > > > >  }
> > > > > > >
> > > > > > >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > > > > >  {
> > > > > > >      struct rss_bpf *rss_bpf_ctx;
> > > > > > >
> > > > > > > -    if (ctx == NULL) {
> > > > > > > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > > > > > >          return false;
> > > > > > >      }
> > > > > > >
> > > > > > > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > > > > >      ctx->map_toeplitz_key = bpf_map__fd(
> > > > > > >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> > > > > > >
> > > > > > > +    if (!ebpf_rss_mmap(ctx)) {
> > > > > > > +        goto error;
> > > > > > > +    }
> > > > > > > +
> > > > > > >      return true;
> > > > > > >  error:
> > > > > > >      rss_bpf__destroy(rss_bpf_ctx);
> > > > > > >      ctx->obj = NULL;
> > > > > > > +    ctx->program_fd = -1;
> > > > > > > +    ctx->map_configuration = -1;
> > > > > > > +    ctx->map_toeplitz_key = -1;
> > > > > > > +    ctx->map_indirections_table = -1;
> > > > > > >
> > > > > > >      return false;
> > > > > > >  }
> > > > > > > @@ -77,15 +149,11 @@ error:
> > > > > > >  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > > > > > >                                  struct EBPFRSSConfig *config)
> > > > > > >  {
> > > > > > > -    uint32_t map_key = 0;
> > > > > > > -
> > > > > > >      if (!ebpf_rss_is_loaded(ctx)) {
> > > > > > >          return false;
> > > > > > >      }
> > > > > > > -    if (bpf_map_update_elem(ctx->map_configuration,
> > > > > > > -                            &map_key, config, 0) < 0) {
> > > > > > > -        return false;
> > > > > > > -    }
> > > > > > > +
> > > > > > > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> > > > > > >      return true;
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
> > > > > > >                                              uint16_t *indirections_table,
> > > > > > >                                              size_t len)
> > > > > > >  {
> > > > > > > -    uint32_t i = 0;
> > > > > > > -
> > > > > > >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> > > > > > >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> > > > > > >          return false;
> > > > > > >      }
> > > > > > >
> > > > > > > -    for (; i < len; ++i) {
> > > > > > > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > > > > > > -                                indirections_table + i, 0) < 0) {
> > > > > > > -            return false;
> > > > > > > -        }
> > > > > > > -    }
> > > > > > > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > > > > > > +            sizeof(*indirections_table) * len);
> > > > > >
> > > > > > As discussed, should we stick the compatibility on the host without
> > > > > > bpf mmap support?
> > > > > >
> > > > > > If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> > > > > > yes, we should track if the map is mmaped and switch between memcpy
> > > > > > and syscall.
> > > > > >
> > > > > > Thanks
> > > > >
> > > > > I've made some tests.
> > > > > I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
> > > > > 1.0.1, 1.1.0, and last 1.2.0.
> > > >
> > > > It looks to me we don't check the libbpf version now. Do we need to do
> > > > that (e.g the bpf mmap support for libbpf is not supported from the
> > > > start).
> > > >
> > > > > Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE map_flags.
> > > > > Without this flag, the program can be loaded on every tested
> > > > > kernel/libbpf configuration but Qemu can't mmap() the eBPF
> > > > > fds(obviously).
> > > >
> > > > Exactly, and that's why I'm asking whether or not we need to probe mmap here.
> > > >
> > > > This is because, without this series, Qemu can work without BPF mmap
> > > > (but with privilege). And this doesn't work after this series.
> > > >
> > > > > Alternative to mmap() is bpf_map_update_elem() syscall/method which
> > > > > would require capabilities for Qemu.
> > > >
> > > > Yes, but that's a different "problem". I think we should keep Qemu
> > > > work in that setup. (privilege + syscall updating).
> > > >
> > > > > With this flag, kernel 5.4 + libbpf can't load eBPF object.
> > > > > So, compatibility would require 2 different eBPF objects or some kind
> > > > > of hack, also it would require additional capability for Qemu.
> > > > > I don't think that we need checks for disabling eBPF RSS. It wouldn't
> > > > > brake anything on the old kernel and after an update, it should work
> > > > > ok.
> > > >
> > > > Even on old kernel + old libbpf without bpf mmap support?
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > >
> > > > > > >      return true;
> > > > > > >  }
> > > > > > >
> > > > > > >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > > > > >                                       uint8_t *toeplitz_key)
> > > > > > >  {
> > > > > > > -    uint32_t map_key = 0;
> > > > > > > -
> > > > > > >      /* prepare toeplitz key */
> > > > > > >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> > > > > > >
> > > > > > > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > > > > >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > > > > >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> > > > > > >
> > > > > > > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > > > > > > -                            0) < 0) {
> > > > > > > -        return false;
> > > > > > > -    }
> > > > > > > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > > > > >      return true;
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > > > > > >          return;
> > > > > > >      }
> > > > > > >
> > > > > > > -    rss_bpf__destroy(ctx->obj);
> > > > > > > +    ebpf_rss_munmap(ctx);
> > > > > > > +
> > > > > > > +    if (ctx->obj) {
> > > > > > > +        rss_bpf__destroy(ctx->obj);
> > > > > > > +    } else {
> > > > > > > +        close(ctx->program_fd);
> > > > > > > +        close(ctx->map_configuration);
> > > > > > > +        close(ctx->map_toeplitz_key);
> > > > > > > +        close(ctx->map_indirections_table);
> > > > > > > +    }
> > > > > > > +
> > > > > > >      ctx->obj = NULL;
> > > > > > > +    ctx->program_fd = -1;
> > > > > > > +    ctx->map_configuration = -1;
> > > > > > > +    ctx->map_toeplitz_key = -1;
> > > > > > > +    ctx->map_indirections_table = -1;
> > > > > > >  }
> > > > > > > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > > > > > > index bf3f2572c7c..ab08a7266d0 100644
> > > > > > > --- a/ebpf/ebpf_rss.h
> > > > > > > +++ b/ebpf/ebpf_rss.h
> > > > > > > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> > > > > > >      int map_configuration;
> > > > > > >      int map_toeplitz_key;
> > > > > > >      int map_indirections_table;
> > > > > > > +
> > > > > > > +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> > > > > > > +    void *mmap_configuration;
> > > > > > > +    void *mmap_toeplitz_key;
> > > > > > > +    void *mmap_indirections_table;
> > > > > > >  };
> > > > > > >
> > > > > > >  struct EBPFRSSConfig {
> > > > > > > --
> > > > > > > 2.41.0
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
diff mbox series

Patch

diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
index cee658c158b..247f5eee1b6 100644
--- a/ebpf/ebpf_rss.c
+++ b/ebpf/ebpf_rss.c
@@ -27,19 +27,83 @@  void ebpf_rss_init(struct EBPFRSSContext *ctx)
 {
     if (ctx != NULL) {
         ctx->obj = NULL;
+        ctx->program_fd = -1;
+        ctx->map_configuration = -1;
+        ctx->map_toeplitz_key = -1;
+        ctx->map_indirections_table = -1;
+
+        ctx->mmap_configuration = NULL;
+        ctx->mmap_toeplitz_key = NULL;
+        ctx->mmap_indirections_table = NULL;
     }
 }
 
 bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
 {
-    return ctx != NULL && ctx->obj != NULL;
+    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
+}
+
+static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
+{
+    if (!ebpf_rss_is_loaded(ctx)) {
+        return false;
+    }
+
+    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
+                                   PROT_READ | PROT_WRITE, MAP_SHARED,
+                                   ctx->map_configuration, 0);
+    if (ctx->mmap_configuration == MAP_FAILED) {
+        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
+        return false;
+    }
+    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
+                                   PROT_READ | PROT_WRITE, MAP_SHARED,
+                                   ctx->map_toeplitz_key, 0);
+    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
+        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
+        goto toeplitz_fail;
+    }
+    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
+                                   PROT_READ | PROT_WRITE, MAP_SHARED,
+                                   ctx->map_indirections_table, 0);
+    if (ctx->mmap_indirections_table == MAP_FAILED) {
+        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
+        goto indirection_fail;
+    }
+
+    return true;
+
+indirection_fail:
+    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
+toeplitz_fail:
+    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
+
+    ctx->mmap_configuration = NULL;
+    ctx->mmap_toeplitz_key = NULL;
+    ctx->mmap_indirections_table = NULL;
+    return false;
+}
+
+static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
+{
+    if (!ebpf_rss_is_loaded(ctx)) {
+        return;
+    }
+
+    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
+    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
+    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
+
+    ctx->mmap_configuration = NULL;
+    ctx->mmap_toeplitz_key = NULL;
+    ctx->mmap_indirections_table = NULL;
 }
 
 bool ebpf_rss_load(struct EBPFRSSContext *ctx)
 {
     struct rss_bpf *rss_bpf_ctx;
 
-    if (ctx == NULL) {
+    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
         return false;
     }
 
@@ -66,10 +130,18 @@  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
     ctx->map_toeplitz_key = bpf_map__fd(
             rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
 
+    if (!ebpf_rss_mmap(ctx)) {
+        goto error;
+    }
+
     return true;
 error:
     rss_bpf__destroy(rss_bpf_ctx);
     ctx->obj = NULL;
+    ctx->program_fd = -1;
+    ctx->map_configuration = -1;
+    ctx->map_toeplitz_key = -1;
+    ctx->map_indirections_table = -1;
 
     return false;
 }
@@ -77,15 +149,11 @@  error:
 static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
                                 struct EBPFRSSConfig *config)
 {
-    uint32_t map_key = 0;
-
     if (!ebpf_rss_is_loaded(ctx)) {
         return false;
     }
-    if (bpf_map_update_elem(ctx->map_configuration,
-                            &map_key, config, 0) < 0) {
-        return false;
-    }
+
+    memcpy(ctx->mmap_configuration, config, sizeof(*config));
     return true;
 }
 
@@ -93,27 +161,19 @@  static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
                                             uint16_t *indirections_table,
                                             size_t len)
 {
-    uint32_t i = 0;
-
     if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
        len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
         return false;
     }
 
-    for (; i < len; ++i) {
-        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
-                                indirections_table + i, 0) < 0) {
-            return false;
-        }
-    }
+    memcpy(ctx->mmap_indirections_table, indirections_table,
+            sizeof(*indirections_table) * len);
     return true;
 }
 
 static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
                                      uint8_t *toeplitz_key)
 {
-    uint32_t map_key = 0;
-
     /* prepare toeplitz key */
     uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
 
@@ -123,10 +183,7 @@  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
     memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
     *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
 
-    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
-                            0) < 0) {
-        return false;
-    }
+    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
     return true;
 }
 
@@ -160,6 +217,20 @@  void ebpf_rss_unload(struct EBPFRSSContext *ctx)
         return;
     }
 
-    rss_bpf__destroy(ctx->obj);
+    ebpf_rss_munmap(ctx);
+
+    if (ctx->obj) {
+        rss_bpf__destroy(ctx->obj);
+    } else {
+        close(ctx->program_fd);
+        close(ctx->map_configuration);
+        close(ctx->map_toeplitz_key);
+        close(ctx->map_indirections_table);
+    }
+
     ctx->obj = NULL;
+    ctx->program_fd = -1;
+    ctx->map_configuration = -1;
+    ctx->map_toeplitz_key = -1;
+    ctx->map_indirections_table = -1;
 }
diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
index bf3f2572c7c..ab08a7266d0 100644
--- a/ebpf/ebpf_rss.h
+++ b/ebpf/ebpf_rss.h
@@ -20,6 +20,11 @@  struct EBPFRSSContext {
     int map_configuration;
     int map_toeplitz_key;
     int map_indirections_table;
+
+    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
+    void *mmap_configuration;
+    void *mmap_toeplitz_key;
+    void *mmap_indirections_table;
 };
 
 struct EBPFRSSConfig {