diff mbox series

[RFC,1/5] ebpf: Added eBPF initialization by fds and map update.

Message ID 20230330001522.120774-2-andrew@daynix.com
State New
Headers show
Series eBPF RSS through QMP support. | expand

Commit Message

Andrew Melnichenko March 30, 2023, 12:15 a.m. UTC
Changed eBPF map updates through mmaped array.
Mmaped arrays provide direct access to map data.
It should omit using bpf_map_update_elem() call,
which may require capabilities that are not present.

Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
---
 ebpf/ebpf_rss-stub.c |   6 +++
 ebpf/ebpf_rss.c      | 120 ++++++++++++++++++++++++++++++++++---------
 ebpf/ebpf_rss.h      |  10 ++++
 3 files changed, 113 insertions(+), 23 deletions(-)

Comments

Jason Wang March 30, 2023, 6:53 a.m. UTC | #1
On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
>
> Changed eBPF map updates through mmaped array.
> Mmaped arrays provide direct access to map data.
> It should omit using bpf_map_update_elem() call,
> which may require capabilities that are not present.

This requires kernel support, so after this change, eBPF based RSS
doesn't work on old kernels that only support syscall based map
updating.

I think it's better to keep the syscall path and fail the fds passing
if the kernel doesn't support mmap().

Thanks

>
> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> ---
>  ebpf/ebpf_rss-stub.c |   6 +++
>  ebpf/ebpf_rss.c      | 120 ++++++++++++++++++++++++++++++++++---------
>  ebpf/ebpf_rss.h      |  10 ++++
>  3 files changed, 113 insertions(+), 23 deletions(-)
>
> diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
> index e71e229190..8d7fae2ad9 100644
> --- a/ebpf/ebpf_rss-stub.c
> +++ b/ebpf/ebpf_rss-stub.c
> @@ -28,6 +28,12 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
>      return false;
>  }
>
> +bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
> +                       int config_fd, int toeplitz_fd, int table_fd)
> +{
> +    return false;
> +}
> +
>  bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
>                        uint16_t *indirections_table, uint8_t *toeplitz_key)
>  {
> diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> index cee658c158..08015fecb1 100644
> --- a/ebpf/ebpf_rss.c
> +++ b/ebpf/ebpf_rss.c
> @@ -27,19 +27,68 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
>  {
>      if (ctx != NULL) {
>          ctx->obj = NULL;
> +        ctx->program_fd = -1;
>      }
>  }
>
>  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
>  {
> -    return ctx != NULL && ctx->obj != NULL;
> +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> +}
> +
> +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> +{
> +    if (!ebpf_rss_is_loaded(ctx)) {
> +        return false;
> +    }
> +
> +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> +                                   ctx->map_configuration, 0);
> +    if (ctx->mmap_configuration == MAP_FAILED) {
> +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> +        return false;
> +    }
> +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> +                                   ctx->map_toeplitz_key, 0);
> +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> +        goto toeplitz_fail;
> +    }
> +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> +                                   ctx->map_indirections_table, 0);
> +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> +        goto indirection_fail;
> +    }
> +
> +    return true;
> +
> +indirection_fail:
> +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> +toeplitz_fail:
> +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> +    return false;
> +}
> +
> +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> +{
> +    if (!ebpf_rss_is_loaded(ctx)) {
> +        return;
> +    }
> +
> +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
>  }
>
>  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
>  {
>      struct rss_bpf *rss_bpf_ctx;
>
> -    if (ctx == NULL) {
> +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
>          return false;
>      }
>
> @@ -66,26 +115,51 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
>      ctx->map_toeplitz_key = bpf_map__fd(
>              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
>
> +    if (!ebpf_rss_mmap(ctx)) {
> +        goto error;
> +    }
> +
>      return true;
>  error:
>      rss_bpf__destroy(rss_bpf_ctx);
>      ctx->obj = NULL;
> +    ctx->program_fd = -1;
>
>      return false;
>  }
>
> -static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> -                                struct EBPFRSSConfig *config)
> +bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
> +                       int config_fd, int toeplitz_fd, int table_fd)
>  {
> -    uint32_t map_key = 0;
> +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> +        return false;
> +    }
>
> -    if (!ebpf_rss_is_loaded(ctx)) {
> +    if (program_fd < 0 || config_fd < 0 || toeplitz_fd < 0 || table_fd < 0) {
>          return false;
>      }
> -    if (bpf_map_update_elem(ctx->map_configuration,
> -                            &map_key, config, 0) < 0) {
> +
> +    ctx->program_fd = program_fd;
> +    ctx->map_configuration = config_fd;
> +    ctx->map_toeplitz_key = toeplitz_fd;
> +    ctx->map_indirections_table = table_fd;
> +
> +    if (!ebpf_rss_mmap(ctx)) {
> +        ctx->program_fd = -1;
>          return false;
>      }
> +
> +    return true;
> +}
> +
> +static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> +                                struct EBPFRSSConfig *config)
> +{
> +    if (!ebpf_rss_is_loaded(ctx)) {
> +        return false;
> +    }
> +
> +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
>      return true;
>  }
>
> @@ -93,27 +167,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
>                                              uint16_t *indirections_table,
>                                              size_t len)
>  {
> -    uint32_t i = 0;
> -
>      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
>         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
>          return false;
>      }
>
> -    for (; i < len; ++i) {
> -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> -                                indirections_table + i, 0) < 0) {
> -            return false;
> -        }
> -    }
> +    memcpy(ctx->mmap_indirections_table, indirections_table,
> +            sizeof(*indirections_table) * len);
>      return true;
>  }
>
>  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
>                                       uint8_t *toeplitz_key)
>  {
> -    uint32_t map_key = 0;
> -
>      /* prepare toeplitz key */
>      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
>
> @@ -123,10 +189,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
>      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
>      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
>
> -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> -                            0) < 0) {
> -        return false;
> -    }
> +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
>      return true;
>  }
>
> @@ -160,6 +223,17 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
>          return;
>      }
>
> -    rss_bpf__destroy(ctx->obj);
> +    ebpf_rss_munmap(ctx);
> +
> +    if (ctx->obj) {
> +        rss_bpf__destroy(ctx->obj);
> +    } else {
> +        close(ctx->program_fd);
> +        close(ctx->map_configuration);
> +        close(ctx->map_toeplitz_key);
> +        close(ctx->map_indirections_table);
> +    }
> +
>      ctx->obj = NULL;
> +    ctx->program_fd = -1;
>  }
> diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> index bf3f2572c7..239242b0d2 100644
> --- a/ebpf/ebpf_rss.h
> +++ b/ebpf/ebpf_rss.h
> @@ -14,12 +14,19 @@
>  #ifndef QEMU_EBPF_RSS_H
>  #define QEMU_EBPF_RSS_H
>
> +#define EBPF_RSS_MAX_FDS 4
> +
>  struct EBPFRSSContext {
>      void *obj;
>      int program_fd;
>      int map_configuration;
>      int map_toeplitz_key;
>      int map_indirections_table;
> +
> +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> +    void *mmap_configuration;
> +    void *mmap_toeplitz_key;
> +    void *mmap_indirections_table;
>  };
>
>  struct EBPFRSSConfig {
> @@ -36,6 +43,9 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
>
>  bool ebpf_rss_load(struct EBPFRSSContext *ctx);
>
> +bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
> +                       int config_fd, int toeplitz_fd, int table_fd);
> +
>  bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
>                        uint16_t *indirections_table, uint8_t *toeplitz_key);
>
> --
> 2.39.1
>
Jason Wang March 30, 2023, 6:56 a.m. UTC | #2
On Thu, Mar 30, 2023 at 2:53 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> >
> > Changed eBPF map updates through mmaped array.
> > Mmaped arrays provide direct access to map data.
> > It should omit using bpf_map_update_elem() call,
> > which may require capabilities that are not present.
>
> This requires kernel support, so after this change, eBPF based RSS
> doesn't work on old kernels that only support syscall based map
> updating.
>
> I think it's better to keep the syscall path and fail the fds passing
> if the kernel doesn't support mmap().
>
> Thanks
>
> >
> > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > ---
> >  ebpf/ebpf_rss-stub.c |   6 +++
> >  ebpf/ebpf_rss.c      | 120 ++++++++++++++++++++++++++++++++++---------
> >  ebpf/ebpf_rss.h      |  10 ++++
> >  3 files changed, 113 insertions(+), 23 deletions(-)
> >
> > diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
> > index e71e229190..8d7fae2ad9 100644
> > --- a/ebpf/ebpf_rss-stub.c
> > +++ b/ebpf/ebpf_rss-stub.c
> > @@ -28,6 +28,12 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> >      return false;
> >  }
> >
> > +bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
> > +                       int config_fd, int toeplitz_fd, int table_fd)
> > +{
> > +    return false;
> > +}
> > +
> >  bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
> >                        uint16_t *indirections_table, uint8_t *toeplitz_key)
> >  {
> > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > index cee658c158..08015fecb1 100644
> > --- a/ebpf/ebpf_rss.c
> > +++ b/ebpf/ebpf_rss.c
> > @@ -27,19 +27,68 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> >  {
> >      if (ctx != NULL) {
> >          ctx->obj = NULL;
> > +        ctx->program_fd = -1;
> >      }
> >  }
> >
> >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> >  {
> > -    return ctx != NULL && ctx->obj != NULL;
> > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > +}
> > +
> > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > +{
> > +    if (!ebpf_rss_is_loaded(ctx)) {
> > +        return false;
> > +    }
> > +
> > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +                                   ctx->map_configuration, 0);
> > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
> > +        return false;
> > +    }
> > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +                                   ctx->map_toeplitz_key, 0);
> > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > +        goto toeplitz_fail;
> > +    }
> > +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +                                   ctx->map_indirections_table, 0);
> > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
> > +        goto indirection_fail;
> > +    }
> > +
> > +    return true;
> > +
> > +indirection_fail:
> > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > +toeplitz_fail:
> > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > +    return false;
> > +}
> > +
> > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > +{
> > +    if (!ebpf_rss_is_loaded(ctx)) {
> > +        return;
> > +    }
> > +
> > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> >  }
> >
> >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> >  {
> >      struct rss_bpf *rss_bpf_ctx;
> >
> > -    if (ctx == NULL) {
> > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> >          return false;
> >      }
> >
> > @@ -66,26 +115,51 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> >      ctx->map_toeplitz_key = bpf_map__fd(
> >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> >
> > +    if (!ebpf_rss_mmap(ctx)) {
> > +        goto error;
> > +    }
> > +
> >      return true;
> >  error:
> >      rss_bpf__destroy(rss_bpf_ctx);
> >      ctx->obj = NULL;
> > +    ctx->program_fd = -1;
> >
> >      return false;
> >  }
> >
> > -static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > -                                struct EBPFRSSConfig *config)
> > +bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
> > +                       int config_fd, int toeplitz_fd, int table_fd)
> >  {
> > -    uint32_t map_key = 0;
> > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > +        return false;
> > +    }
> >
> > -    if (!ebpf_rss_is_loaded(ctx)) {
> > +    if (program_fd < 0 || config_fd < 0 || toeplitz_fd < 0 || table_fd < 0) {
> >          return false;
> >      }
> > -    if (bpf_map_update_elem(ctx->map_configuration,
> > -                            &map_key, config, 0) < 0) {
> > +
> > +    ctx->program_fd = program_fd;
> > +    ctx->map_configuration = config_fd;
> > +    ctx->map_toeplitz_key = toeplitz_fd;
> > +    ctx->map_indirections_table = table_fd;
> > +
> > +    if (!ebpf_rss_mmap(ctx)) {
> > +        ctx->program_fd = -1;
> >          return false;
> >      }
> > +
> > +    return true;
> > +}
> > +
> > +static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > +                                struct EBPFRSSConfig *config)
> > +{
> > +    if (!ebpf_rss_is_loaded(ctx)) {
> > +        return false;
> > +    }
> > +
> > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> >      return true;
> >  }
> >
> > @@ -93,27 +167,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
> >                                              uint16_t *indirections_table,
> >                                              size_t len)
> >  {
> > -    uint32_t i = 0;
> > -
> >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> >          return false;
> >      }
> >
> > -    for (; i < len; ++i) {
> > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > -                                indirections_table + i, 0) < 0) {
> > -            return false;
> > -        }
> > -    }
> > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > +            sizeof(*indirections_table) * len);

Btw, I wonder what's the atomic requirement here.

Using memcpy may result in byte to byte updating that may result in
unexpected redirection of the flows.

Thanks

> >      return true;
> >  }
> >
> >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> >                                       uint8_t *toeplitz_key)
> >  {
> > -    uint32_t map_key = 0;
> > -
> >      /* prepare toeplitz key */
> >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> >
> > @@ -123,10 +189,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> >
> > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > -                            0) < 0) {
> > -        return false;
> > -    }
> > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> >      return true;
> >  }
> >
> > @@ -160,6 +223,17 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> >          return;
> >      }
> >
> > -    rss_bpf__destroy(ctx->obj);
> > +    ebpf_rss_munmap(ctx);
> > +
> > +    if (ctx->obj) {
> > +        rss_bpf__destroy(ctx->obj);
> > +    } else {
> > +        close(ctx->program_fd);
> > +        close(ctx->map_configuration);
> > +        close(ctx->map_toeplitz_key);
> > +        close(ctx->map_indirections_table);
> > +    }
> > +
> >      ctx->obj = NULL;
> > +    ctx->program_fd = -1;
> >  }
> > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > index bf3f2572c7..239242b0d2 100644
> > --- a/ebpf/ebpf_rss.h
> > +++ b/ebpf/ebpf_rss.h
> > @@ -14,12 +14,19 @@
> >  #ifndef QEMU_EBPF_RSS_H
> >  #define QEMU_EBPF_RSS_H
> >
> > +#define EBPF_RSS_MAX_FDS 4
> > +
> >  struct EBPFRSSContext {
> >      void *obj;
> >      int program_fd;
> >      int map_configuration;
> >      int map_toeplitz_key;
> >      int map_indirections_table;
> > +
> > +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> > +    void *mmap_configuration;
> > +    void *mmap_toeplitz_key;
> > +    void *mmap_indirections_table;
> >  };
> >
> >  struct EBPFRSSConfig {
> > @@ -36,6 +43,9 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
> >
> >  bool ebpf_rss_load(struct EBPFRSSContext *ctx);
> >
> > +bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
> > +                       int config_fd, int toeplitz_fd, int table_fd);
> > +
> >  bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
> >                        uint16_t *indirections_table, uint8_t *toeplitz_key);
> >
> > --
> > 2.39.1
> >
Daniel P. Berrangé March 30, 2023, 7:10 a.m. UTC | #3
On Thu, Mar 30, 2023 at 02:53:16PM +0800, Jason Wang wrote:
> On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> >
> > Changed eBPF map updates through mmaped array.
> > Mmaped arrays provide direct access to map data.
> > It should omit using bpf_map_update_elem() call,
> > which may require capabilities that are not present.
> 
> This requires kernel support, so after this change, eBPF based RSS
> doesn't work on old kernels that only support syscall based map
> updating.

What kernel version is the cut off ?


With regards,
Daniel
Andrew Melnichenko March 30, 2023, 11:13 a.m. UTC | #4
Hi all,

On Thu, Mar 30, 2023 at 10:10 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Thu, Mar 30, 2023 at 02:53:16PM +0800, Jason Wang wrote:
> > On Thu, Mar 30, 2023 at 8:33 AM Andrew Melnychenko <andrew@daynix.com> wrote:
> > >
> > > Changed eBPF map updates through mmaped array.
> > > Mmaped arrays provide direct access to map data.
> > > It should omit using bpf_map_update_elem() call,
> > > which may require capabilities that are not present.
> >
> > This requires kernel support, so after this change, eBPF based RSS
> > doesn't work on old kernels that only support syscall based map
> > updating.
>

It would not work either - bpf_map_update_elem() requires BPF capabilities.

> What kernel version is the cut off ?

If I'm not mistaken - something like kernel 5.5<.
But in any case - the RSS eBPF program is quite big and would require features
like jumpsback.
So, on old kernels, it would not load.

>
>
> With regards,
> Daniel
> --
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>
diff mbox series

Patch

diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
index e71e229190..8d7fae2ad9 100644
--- a/ebpf/ebpf_rss-stub.c
+++ b/ebpf/ebpf_rss-stub.c
@@ -28,6 +28,12 @@  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
     return false;
 }
 
+bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
+                       int config_fd, int toeplitz_fd, int table_fd)
+{
+    return false;
+}
+
 bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
                       uint16_t *indirections_table, uint8_t *toeplitz_key)
 {
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
index cee658c158..08015fecb1 100644
--- a/ebpf/ebpf_rss.c
+++ b/ebpf/ebpf_rss.c
@@ -27,19 +27,68 @@  void ebpf_rss_init(struct EBPFRSSContext *ctx)
 {
     if (ctx != NULL) {
         ctx->obj = NULL;
+        ctx->program_fd = -1;
     }
 }
 
 bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
 {
-    return ctx != NULL && ctx->obj != NULL;
+    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
+}
+
+static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
+{
+    if (!ebpf_rss_is_loaded(ctx)) {
+        return false;
+    }
+
+    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
+                                   PROT_READ | PROT_WRITE, MAP_SHARED,
+                                   ctx->map_configuration, 0);
+    if (ctx->mmap_configuration == MAP_FAILED) {
+        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
+        return false;
+    }
+    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
+                                   PROT_READ | PROT_WRITE, MAP_SHARED,
+                                   ctx->map_toeplitz_key, 0);
+    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
+        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
+        goto toeplitz_fail;
+    }
+    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
+                                   PROT_READ | PROT_WRITE, MAP_SHARED,
+                                   ctx->map_indirections_table, 0);
+    if (ctx->mmap_indirections_table == MAP_FAILED) {
+        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
+        goto indirection_fail;
+    }
+
+    return true;
+
+indirection_fail:
+    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
+toeplitz_fail:
+    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
+    return false;
+}
+
+static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
+{
+    if (!ebpf_rss_is_loaded(ctx)) {
+        return;
+    }
+
+    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
+    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
+    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
 }
 
 bool ebpf_rss_load(struct EBPFRSSContext *ctx)
 {
     struct rss_bpf *rss_bpf_ctx;
 
-    if (ctx == NULL) {
+    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
         return false;
     }
 
@@ -66,26 +115,51 @@  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
     ctx->map_toeplitz_key = bpf_map__fd(
             rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
 
+    if (!ebpf_rss_mmap(ctx)) {
+        goto error;
+    }
+
     return true;
 error:
     rss_bpf__destroy(rss_bpf_ctx);
     ctx->obj = NULL;
+    ctx->program_fd = -1;
 
     return false;
 }
 
-static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
-                                struct EBPFRSSConfig *config)
+bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
+                       int config_fd, int toeplitz_fd, int table_fd)
 {
-    uint32_t map_key = 0;
+    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
+        return false;
+    }
 
-    if (!ebpf_rss_is_loaded(ctx)) {
+    if (program_fd < 0 || config_fd < 0 || toeplitz_fd < 0 || table_fd < 0) {
         return false;
     }
-    if (bpf_map_update_elem(ctx->map_configuration,
-                            &map_key, config, 0) < 0) {
+
+    ctx->program_fd = program_fd;
+    ctx->map_configuration = config_fd;
+    ctx->map_toeplitz_key = toeplitz_fd;
+    ctx->map_indirections_table = table_fd;
+
+    if (!ebpf_rss_mmap(ctx)) {
+        ctx->program_fd = -1;
         return false;
     }
+
+    return true;
+}
+
+static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
+                                struct EBPFRSSConfig *config)
+{
+    if (!ebpf_rss_is_loaded(ctx)) {
+        return false;
+    }
+
+    memcpy(ctx->mmap_configuration, config, sizeof(*config));
     return true;
 }
 
@@ -93,27 +167,19 @@  static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
                                             uint16_t *indirections_table,
                                             size_t len)
 {
-    uint32_t i = 0;
-
     if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
        len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
         return false;
     }
 
-    for (; i < len; ++i) {
-        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
-                                indirections_table + i, 0) < 0) {
-            return false;
-        }
-    }
+    memcpy(ctx->mmap_indirections_table, indirections_table,
+            sizeof(*indirections_table) * len);
     return true;
 }
 
 static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
                                      uint8_t *toeplitz_key)
 {
-    uint32_t map_key = 0;
-
     /* prepare toeplitz key */
     uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
 
@@ -123,10 +189,7 @@  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
     memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
     *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
 
-    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
-                            0) < 0) {
-        return false;
-    }
+    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
     return true;
 }
 
@@ -160,6 +223,17 @@  void ebpf_rss_unload(struct EBPFRSSContext *ctx)
         return;
     }
 
-    rss_bpf__destroy(ctx->obj);
+    ebpf_rss_munmap(ctx);
+
+    if (ctx->obj) {
+        rss_bpf__destroy(ctx->obj);
+    } else {
+        close(ctx->program_fd);
+        close(ctx->map_configuration);
+        close(ctx->map_toeplitz_key);
+        close(ctx->map_indirections_table);
+    }
+
     ctx->obj = NULL;
+    ctx->program_fd = -1;
 }
diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
index bf3f2572c7..239242b0d2 100644
--- a/ebpf/ebpf_rss.h
+++ b/ebpf/ebpf_rss.h
@@ -14,12 +14,19 @@ 
 #ifndef QEMU_EBPF_RSS_H
 #define QEMU_EBPF_RSS_H
 
+#define EBPF_RSS_MAX_FDS 4
+
 struct EBPFRSSContext {
     void *obj;
     int program_fd;
     int map_configuration;
     int map_toeplitz_key;
     int map_indirections_table;
+
+    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
+    void *mmap_configuration;
+    void *mmap_toeplitz_key;
+    void *mmap_indirections_table;
 };
 
 struct EBPFRSSConfig {
@@ -36,6 +43,9 @@  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
 
 bool ebpf_rss_load(struct EBPFRSSContext *ctx);
 
+bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
+                       int config_fd, int toeplitz_fd, int table_fd);
+
 bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
                       uint16_t *indirections_table, uint8_t *toeplitz_key);