diff mbox series

[4/5] ebpf_rss_helper: Added helper for eBPF RSS.

Message ID 20210713153758.323614-5-andrew@daynix.com
State New
Headers show
Series ebpf: Added ebpf helper for libvirtd. | expand

Commit Message

Andrew Melnichenko July 13, 2021, 3:37 p.m. UTC
Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
Also, libbpf dependency now exclusively for Linux.
Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
There is no reason yet to build eBPF loader and helper for non Linux systems,
even if libbpf is present.

Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
---
 ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
 meson.build                 |  37 ++++++----
 2 files changed, 154 insertions(+), 13 deletions(-)
 create mode 100644 ebpf/qemu-ebpf-rss-helper.c

Comments

Jason Wang Aug. 20, 2021, 3:40 a.m. UTC | #1
在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.


I wonder if this can be done as helper for TAP/bridge.

E.g it's the qemu to launch those helper with set-uid.

Then libvirt won't even need to care about that?


> Also, libbpf dependency now exclusively for Linux.
> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> There is no reason yet to build eBPF loader and helper for non Linux systems,
> even if libbpf is present.
>
> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> ---
>   ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
>   meson.build                 |  37 ++++++----
>   2 files changed, 154 insertions(+), 13 deletions(-)
>   create mode 100644 ebpf/qemu-ebpf-rss-helper.c
>
> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> new file mode 100644
> index 0000000000..fe68758f57
> --- /dev/null
> +++ b/ebpf/qemu-ebpf-rss-helper.c
> @@ -0,0 +1,130 @@
> +/*
> + * eBPF RSS Helper
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + *  Andrew Melnychenko <andrew@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Description: This is helper program for libvirtd.
> + *              It loads eBPF RSS program and passes fds through unix socket.
> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> + */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <getopt.h>
> +#include <memory.h>
> +#include <errno.h>
> +#include <sys/socket.h>
> +
> +#include "ebpf_rss.h"
> +
> +#include "qemu-helper-stamp.h"
> +
> +void QEMU_HELPER_STAMP(void) {}
> +
> +static int send_fds(int socket, int *fds, int n)
> +{
> +    struct msghdr msg = {};
> +    struct cmsghdr *cmsg = NULL;
> +    char buf[CMSG_SPACE(n * sizeof(int))];
> +    char dummy_buffer = 0;
> +    struct iovec io = { .iov_base = &dummy_buffer,
> +                        .iov_len = sizeof(dummy_buffer) };
> +
> +    memset(buf, 0, sizeof(buf));
> +
> +    msg.msg_iov = &io;
> +    msg.msg_iovlen = 1;
> +    msg.msg_control = buf;
> +    msg.msg_controllen = sizeof(buf);
> +
> +    cmsg = CMSG_FIRSTHDR(&msg);
> +    cmsg->cmsg_level = SOL_SOCKET;
> +    cmsg->cmsg_type = SCM_RIGHTS;
> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> +
> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> +
> +    return sendmsg(socket, &msg, 0);
> +}
> +
> +static void print_help_and_exit(const char *prog, int exitcode)
> +{
> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> +            " through unix socket.\n", prog);
> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
> +            " used to pass eBPF fds.\n");
> +    fprintf(stderr, "\t--help, -h - this help.\n");
> +    exit(exitcode);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +    char *fd_string = NULL;
> +    int unix_fd = 0;
> +    struct EBPFRSSContext ctx = {};
> +    int fds[EBPF_RSS_MAX_FDS] = {};
> +    int ret = -1;
> +
> +    for (;;) {
> +        int c;
> +        static struct option long_options[] = {
> +                {"help",  no_argument, 0, 'h'},
> +                {"fd",  required_argument, 0, 'f'},
> +                {0, 0, 0, 0}
> +        };
> +        c = getopt_long(argc, argv, "hf:",
> +                long_options, NULL);
> +
> +        if (c == -1) {
> +            break;
> +        }
> +
> +        switch (c) {
> +        case 'f':
> +            fd_string = optarg;
> +            break;
> +        case 'h':
> +        default:
> +            print_help_and_exit(argv[0],
> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> +        }
> +    }
> +
> +    if (!fd_string) {
> +        fprintf(stderr, "Unix file descriptor not present.\n");
> +        print_help_and_exit(argv[0], EXIT_FAILURE);
> +    }
> +
> +    unix_fd = atoi(fd_string);
> +
> +    if (!unix_fd) {
> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> +        return EXIT_FAILURE;
> +    }
> +
> +    ebpf_rss_init(&ctx);
> +    if (!ebpf_rss_load(&ctx)) {
> +        fprintf(stderr, "Can't load ebpf.\n");
> +        return EXIT_FAILURE;
> +    }
> +    fds[0] = ctx.program_fd;
> +    fds[1] = ctx.map_configuration;
> +
> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> +    if (ret < 0) {
> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
> +    }
> +
> +    ebpf_rss_unload(&ctx);
> +
> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> +}
> +
> diff --git a/meson.build b/meson.build
> index 257e51d91b..913aa1fee5 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
>   endif
>   
>   # libbpf
> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> -if libbpf.found() and not cc.links('''
> -   #include <bpf/libbpf.h>
> -   int main(void)
> -   {
> -     bpf_object__destroy_skeleton(NULL);
> -     return 0;
> -   }''', dependencies: libbpf)
> -  libbpf = not_found
> -  if get_option('bpf').enabled()
> -    error('libbpf skeleton test failed')
> -  else
> -    warning('libbpf skeleton test failed, disabling')
> +libbpf = not_found
> +if targetos == 'linux'
> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> +  if libbpf.found() and not cc.links('''
> +    #include <bpf/libbpf.h>
> +    int main(void)
> +    {
> +      bpf_object__destroy_skeleton(NULL);


Do we need to test whether the bpf can do mmap() here?

Thanks


> +      return 0;
> +    }''', dependencies: libbpf)
> +    libbpf = not_found
> +    if get_option('bpf').enabled()
> +      error('libbpf skeleton test failed')
> +    else
> +      warning('libbpf skeleton test failed, disabling')
> +    endif
>     endif
>   endif
>   
> @@ -2423,6 +2426,14 @@ if have_tools
>                  dependencies: [authz, crypto, io, qom, qemuutil,
>                                 libcap_ng, mpathpersist],
>                  install: true)
> +
> +    if libbpf.found()
> +        executable('qemu-ebpf-rss-helper', files(
> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> +                   dependencies: [qemuutil, libbpf, glib],
> +                   install: true,
> +                   install_dir: get_option('libexecdir'))
> +    endif
>     endif
>   
>     if 'CONFIG_IVSHMEM' in config_host
Andrew Melnichenko Aug. 25, 2021, 6:24 p.m. UTC | #2
Hi,

> I wonder if this can be done as helper for TAP/bridge.
>
Well, it does already, libvirt may create TAP device and pass it in command
line or using getfd qmp command.

E.g it's the qemu to launch those helper with set-uid.
>
Then libvirt won't even need to care about that?

Yea, we may think about this routine in the future as a fallback.

Do we need to test whether the bpf can do mmap() here?
>
I'm not sure that it's required.

On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:

>
> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > Helper program. Loads eBPF RSS program and maps and passes them through
> unix socket.
> > Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
>
>
> I wonder if this can be done as helper for TAP/bridge.
>
> E.g it's the qemu to launch those helper with set-uid.
>
> Then libvirt won't even need to care about that?
>
>
> > Also, libbpf dependency now exclusively for Linux.
> > Libbpf is used for eBPF RSS steering, which is supported only by Linux
> TAP.
> > There is no reason yet to build eBPF loader and helper for non Linux
> systems,
> > even if libbpf is present.
> >
> > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > ---
> >   ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
> >   meson.build                 |  37 ++++++----
> >   2 files changed, 154 insertions(+), 13 deletions(-)
> >   create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> >
> > diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > new file mode 100644
> > index 0000000000..fe68758f57
> > --- /dev/null
> > +++ b/ebpf/qemu-ebpf-rss-helper.c
> > @@ -0,0 +1,130 @@
> > +/*
> > + * eBPF RSS Helper
> > + *
> > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > + *
> > + * Authors:
> > + *  Andrew Melnychenko <andrew@daynix.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Description: This is helper program for libvirtd.
> > + *              It loads eBPF RSS program and passes fds through unix
> socket.
> > + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> > + */
> > +
> > +#include <stdio.h>
> > +#include <stdint.h>
> > +#include <stdlib.h>
> > +#include <stdbool.h>
> > +#include <getopt.h>
> > +#include <memory.h>
> > +#include <errno.h>
> > +#include <sys/socket.h>
> > +
> > +#include "ebpf_rss.h"
> > +
> > +#include "qemu-helper-stamp.h"
> > +
> > +void QEMU_HELPER_STAMP(void) {}
> > +
> > +static int send_fds(int socket, int *fds, int n)
> > +{
> > +    struct msghdr msg = {};
> > +    struct cmsghdr *cmsg = NULL;
> > +    char buf[CMSG_SPACE(n * sizeof(int))];
> > +    char dummy_buffer = 0;
> > +    struct iovec io = { .iov_base = &dummy_buffer,
> > +                        .iov_len = sizeof(dummy_buffer) };
> > +
> > +    memset(buf, 0, sizeof(buf));
> > +
> > +    msg.msg_iov = &io;
> > +    msg.msg_iovlen = 1;
> > +    msg.msg_control = buf;
> > +    msg.msg_controllen = sizeof(buf);
> > +
> > +    cmsg = CMSG_FIRSTHDR(&msg);
> > +    cmsg->cmsg_level = SOL_SOCKET;
> > +    cmsg->cmsg_type = SCM_RIGHTS;
> > +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> > +
> > +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> > +
> > +    return sendmsg(socket, &msg, 0);
> > +}
> > +
> > +static void print_help_and_exit(const char *prog, int exitcode)
> > +{
> > +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF
> fds"
> > +            " through unix socket.\n", prog);
> > +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file
> descriptor"
> > +            " used to pass eBPF fds.\n");
> > +    fprintf(stderr, "\t--help, -h - this help.\n");
> > +    exit(exitcode);
> > +}
> > +
> > +int main(int argc, char **argv)
> > +{
> > +    char *fd_string = NULL;
> > +    int unix_fd = 0;
> > +    struct EBPFRSSContext ctx = {};
> > +    int fds[EBPF_RSS_MAX_FDS] = {};
> > +    int ret = -1;
> > +
> > +    for (;;) {
> > +        int c;
> > +        static struct option long_options[] = {
> > +                {"help",  no_argument, 0, 'h'},
> > +                {"fd",  required_argument, 0, 'f'},
> > +                {0, 0, 0, 0}
> > +        };
> > +        c = getopt_long(argc, argv, "hf:",
> > +                long_options, NULL);
> > +
> > +        if (c == -1) {
> > +            break;
> > +        }
> > +
> > +        switch (c) {
> > +        case 'f':
> > +            fd_string = optarg;
> > +            break;
> > +        case 'h':
> > +        default:
> > +            print_help_and_exit(argv[0],
> > +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> > +        }
> > +    }
> > +
> > +    if (!fd_string) {
> > +        fprintf(stderr, "Unix file descriptor not present.\n");
> > +        print_help_and_exit(argv[0], EXIT_FAILURE);
> > +    }
> > +
> > +    unix_fd = atoi(fd_string);
> > +
> > +    if (!unix_fd) {
> > +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> > +        return EXIT_FAILURE;
> > +    }
> > +
> > +    ebpf_rss_init(&ctx);
> > +    if (!ebpf_rss_load(&ctx)) {
> > +        fprintf(stderr, "Can't load ebpf.\n");
> > +        return EXIT_FAILURE;
> > +    }
> > +    fds[0] = ctx.program_fd;
> > +    fds[1] = ctx.map_configuration;
> > +
> > +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> > +    if (ret < 0) {
> > +        fprintf(stderr, "Issue while sending fds: %s.\n",
> strerror(errno));
> > +    }
> > +
> > +    ebpf_rss_unload(&ctx);
> > +
> > +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> > +}
> > +
> > diff --git a/meson.build b/meson.build
> > index 257e51d91b..913aa1fee5 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> >   endif
> >
> >   # libbpf
> > -libbpf = dependency('libbpf', required: get_option('bpf'), method:
> 'pkg-config')
> > -if libbpf.found() and not cc.links('''
> > -   #include <bpf/libbpf.h>
> > -   int main(void)
> > -   {
> > -     bpf_object__destroy_skeleton(NULL);
> > -     return 0;
> > -   }''', dependencies: libbpf)
> > -  libbpf = not_found
> > -  if get_option('bpf').enabled()
> > -    error('libbpf skeleton test failed')
> > -  else
> > -    warning('libbpf skeleton test failed, disabling')
> > +libbpf = not_found
> > +if targetos == 'linux'
> > +  libbpf = dependency('libbpf', required: get_option('bpf'), method:
> 'pkg-config')
> > +  if libbpf.found() and not cc.links('''
> > +    #include <bpf/libbpf.h>
> > +    int main(void)
> > +    {
> > +      bpf_object__destroy_skeleton(NULL);
>
>
> Do we need to test whether the bpf can do mmap() here?
>
> Thanks
>
>
> > +      return 0;
> > +    }''', dependencies: libbpf)
> > +    libbpf = not_found
> > +    if get_option('bpf').enabled()
> > +      error('libbpf skeleton test failed')
> > +    else
> > +      warning('libbpf skeleton test failed, disabling')
> > +    endif
> >     endif
> >   endif
> >
> > @@ -2423,6 +2426,14 @@ if have_tools
> >                  dependencies: [authz, crypto, io, qom, qemuutil,
> >                                 libcap_ng, mpathpersist],
> >                  install: true)
> > +
> > +    if libbpf.found()
> > +        executable('qemu-ebpf-rss-helper', files(
> > +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> > +                   dependencies: [qemuutil, libbpf, glib],
> > +                   install: true,
> > +                   install_dir: get_option('libexecdir'))
> > +    endif
> >     endif
> >
> >     if 'CONFIG_IVSHMEM' in config_host
>
>
Yuri Benditovich Aug. 30, 2021, 5:07 p.m. UTC | #3
On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
> > Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
>
>
> I wonder if this can be done as helper for TAP/bridge.
>
> E.g it's the qemu to launch those helper with set-uid.
>
> Then libvirt won't even need to care about that?
>

There are pros and cons for such a solution with set-uid.
From my point of view one of the cons is that set-uid is efficient
only at install time so the coexistence of different qemu builds (and
different helpers for each one) is kind of problematic.
With the current solution this does not present any problem: the
developer can have several different builds, each one automatically
has its own helper and there is no conflict between these builds and
between these builds and installed qemu package. Changing the
'emulator' in the libvirt profile automatically brings the proper
helper to work.

>
> > Also, libbpf dependency now exclusively for Linux.
> > Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> > There is no reason yet to build eBPF loader and helper for non Linux systems,
> > even if libbpf is present.
> >
> > Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > ---
> >   ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
> >   meson.build                 |  37 ++++++----
> >   2 files changed, 154 insertions(+), 13 deletions(-)
> >   create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> >
> > diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > new file mode 100644
> > index 0000000000..fe68758f57
> > --- /dev/null
> > +++ b/ebpf/qemu-ebpf-rss-helper.c
> > @@ -0,0 +1,130 @@
> > +/*
> > + * eBPF RSS Helper
> > + *
> > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > + *
> > + * Authors:
> > + *  Andrew Melnychenko <andrew@daynix.com>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Description: This is helper program for libvirtd.
> > + *              It loads eBPF RSS program and passes fds through unix socket.
> > + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> > + */
> > +
> > +#include <stdio.h>
> > +#include <stdint.h>
> > +#include <stdlib.h>
> > +#include <stdbool.h>
> > +#include <getopt.h>
> > +#include <memory.h>
> > +#include <errno.h>
> > +#include <sys/socket.h>
> > +
> > +#include "ebpf_rss.h"
> > +
> > +#include "qemu-helper-stamp.h"
> > +
> > +void QEMU_HELPER_STAMP(void) {}
> > +
> > +static int send_fds(int socket, int *fds, int n)
> > +{
> > +    struct msghdr msg = {};
> > +    struct cmsghdr *cmsg = NULL;
> > +    char buf[CMSG_SPACE(n * sizeof(int))];
> > +    char dummy_buffer = 0;
> > +    struct iovec io = { .iov_base = &dummy_buffer,
> > +                        .iov_len = sizeof(dummy_buffer) };
> > +
> > +    memset(buf, 0, sizeof(buf));
> > +
> > +    msg.msg_iov = &io;
> > +    msg.msg_iovlen = 1;
> > +    msg.msg_control = buf;
> > +    msg.msg_controllen = sizeof(buf);
> > +
> > +    cmsg = CMSG_FIRSTHDR(&msg);
> > +    cmsg->cmsg_level = SOL_SOCKET;
> > +    cmsg->cmsg_type = SCM_RIGHTS;
> > +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> > +
> > +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> > +
> > +    return sendmsg(socket, &msg, 0);
> > +}
> > +
> > +static void print_help_and_exit(const char *prog, int exitcode)
> > +{
> > +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> > +            " through unix socket.\n", prog);
> > +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
> > +            " used to pass eBPF fds.\n");
> > +    fprintf(stderr, "\t--help, -h - this help.\n");
> > +    exit(exitcode);
> > +}
> > +
> > +int main(int argc, char **argv)
> > +{
> > +    char *fd_string = NULL;
> > +    int unix_fd = 0;
> > +    struct EBPFRSSContext ctx = {};
> > +    int fds[EBPF_RSS_MAX_FDS] = {};
> > +    int ret = -1;
> > +
> > +    for (;;) {
> > +        int c;
> > +        static struct option long_options[] = {
> > +                {"help",  no_argument, 0, 'h'},
> > +                {"fd",  required_argument, 0, 'f'},
> > +                {0, 0, 0, 0}
> > +        };
> > +        c = getopt_long(argc, argv, "hf:",
> > +                long_options, NULL);
> > +
> > +        if (c == -1) {
> > +            break;
> > +        }
> > +
> > +        switch (c) {
> > +        case 'f':
> > +            fd_string = optarg;
> > +            break;
> > +        case 'h':
> > +        default:
> > +            print_help_and_exit(argv[0],
> > +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> > +        }
> > +    }
> > +
> > +    if (!fd_string) {
> > +        fprintf(stderr, "Unix file descriptor not present.\n");
> > +        print_help_and_exit(argv[0], EXIT_FAILURE);
> > +    }
> > +
> > +    unix_fd = atoi(fd_string);
> > +
> > +    if (!unix_fd) {
> > +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> > +        return EXIT_FAILURE;
> > +    }
> > +
> > +    ebpf_rss_init(&ctx);
> > +    if (!ebpf_rss_load(&ctx)) {
> > +        fprintf(stderr, "Can't load ebpf.\n");
> > +        return EXIT_FAILURE;
> > +    }
> > +    fds[0] = ctx.program_fd;
> > +    fds[1] = ctx.map_configuration;
> > +
> > +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> > +    if (ret < 0) {
> > +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
> > +    }
> > +
> > +    ebpf_rss_unload(&ctx);
> > +
> > +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> > +}
> > +
> > diff --git a/meson.build b/meson.build
> > index 257e51d91b..913aa1fee5 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> >   endif
> >
> >   # libbpf
> > -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > -if libbpf.found() and not cc.links('''
> > -   #include <bpf/libbpf.h>
> > -   int main(void)
> > -   {
> > -     bpf_object__destroy_skeleton(NULL);
> > -     return 0;
> > -   }''', dependencies: libbpf)
> > -  libbpf = not_found
> > -  if get_option('bpf').enabled()
> > -    error('libbpf skeleton test failed')
> > -  else
> > -    warning('libbpf skeleton test failed, disabling')
> > +libbpf = not_found
> > +if targetos == 'linux'
> > +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > +  if libbpf.found() and not cc.links('''
> > +    #include <bpf/libbpf.h>
> > +    int main(void)
> > +    {
> > +      bpf_object__destroy_skeleton(NULL);
>
>
> Do we need to test whether the bpf can do mmap() here?
>
> Thanks
>
>
> > +      return 0;
> > +    }''', dependencies: libbpf)
> > +    libbpf = not_found
> > +    if get_option('bpf').enabled()
> > +      error('libbpf skeleton test failed')
> > +    else
> > +      warning('libbpf skeleton test failed, disabling')
> > +    endif
> >     endif
> >   endif
> >
> > @@ -2423,6 +2426,14 @@ if have_tools
> >                  dependencies: [authz, crypto, io, qom, qemuutil,
> >                                 libcap_ng, mpathpersist],
> >                  install: true)
> > +
> > +    if libbpf.found()
> > +        executable('qemu-ebpf-rss-helper', files(
> > +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> > +                   dependencies: [qemuutil, libbpf, glib],
> > +                   install: true,
> > +                   install_dir: get_option('libexecdir'))
> > +    endif
> >     endif
> >
> >     if 'CONFIG_IVSHMEM' in config_host
>
Jason Wang Sept. 1, 2021, 6:37 a.m. UTC | #4
在 2021/8/26 上午2:24, Andrew Melnichenko 写道:
> Hi,
>
>     I wonder if this can be done as helper for TAP/bridge.
>
> Well, it does already, libvirt may create TAP device and pass it in 
> command line or using getfd qmp command.
>
>     E.g it's the qemu to launch those helper with set-uid.
>
>     Then libvirt won't even need to care about that?
>
> Yea, we may think about this routine in the future as a fallback.
>
>     Do we need to test whether the bpf can do mmap() here?
>
> I'm not sure that it's required.


I think it's for back-compatibility.

E.g current codes works without mmap(), and user will surprise that it 
wont' work after upgrading their qemu.

Thanks



>
> On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com 
> <mailto:jasowang@redhat.com>> wrote:
>
>
>     在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
>     > Helper program. Loads eBPF RSS program and maps and passes them
>     through unix socket.
>     > Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
>
>
>     I wonder if this can be done as helper for TAP/bridge.
>
>     E.g it's the qemu to launch those helper with set-uid.
>
>     Then libvirt won't even need to care about that?
>
>
>     > Also, libbpf dependency now exclusively for Linux.
>     > Libbpf is used for eBPF RSS steering, which is supported only by
>     Linux TAP.
>     > There is no reason yet to build eBPF loader and helper for non
>     Linux systems,
>     > even if libbpf is present.
>     >
>     > Signed-off-by: Andrew Melnychenko <andrew@daynix.com
>     <mailto:andrew@daynix.com>>
>     > ---
>     >   ebpf/qemu-ebpf-rss-helper.c | 130
>     ++++++++++++++++++++++++++++++++++++
>     >   meson.build                 |  37 ++++++----
>     >   2 files changed, 154 insertions(+), 13 deletions(-)
>     >   create mode 100644 ebpf/qemu-ebpf-rss-helper.c
>     >
>     > diff --git a/ebpf/qemu-ebpf-rss-helper.c
>     b/ebpf/qemu-ebpf-rss-helper.c
>     > new file mode 100644
>     > index 0000000000..fe68758f57
>     > --- /dev/null
>     > +++ b/ebpf/qemu-ebpf-rss-helper.c
>     > @@ -0,0 +1,130 @@
>     > +/*
>     > + * eBPF RSS Helper
>     > + *
>     > + * Developed by Daynix Computing LTD (http://www.daynix.com
>     <http://www.daynix.com>)
>     > + *
>     > + * Authors:
>     > + *  Andrew Melnychenko <andrew@daynix.com
>     <mailto:andrew@daynix.com>>
>     > + *
>     > + * This work is licensed under the terms of the GNU GPL,
>     version 2.  See
>     > + * the COPYING file in the top-level directory.
>     > + *
>     > + * Description: This is helper program for libvirtd.
>     > + *              It loads eBPF RSS program and passes fds
>     through unix socket.
>     > + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
>     > + */
>     > +
>     > +#include <stdio.h>
>     > +#include <stdint.h>
>     > +#include <stdlib.h>
>     > +#include <stdbool.h>
>     > +#include <getopt.h>
>     > +#include <memory.h>
>     > +#include <errno.h>
>     > +#include <sys/socket.h>
>     > +
>     > +#include "ebpf_rss.h"
>     > +
>     > +#include "qemu-helper-stamp.h"
>     > +
>     > +void QEMU_HELPER_STAMP(void) {}
>     > +
>     > +static int send_fds(int socket, int *fds, int n)
>     > +{
>     > +    struct msghdr msg = {};
>     > +    struct cmsghdr *cmsg = NULL;
>     > +    char buf[CMSG_SPACE(n * sizeof(int))];
>     > +    char dummy_buffer = 0;
>     > +    struct iovec io = { .iov_base = &dummy_buffer,
>     > +                        .iov_len = sizeof(dummy_buffer) };
>     > +
>     > +    memset(buf, 0, sizeof(buf));
>     > +
>     > +    msg.msg_iov = &io;
>     > +    msg.msg_iovlen = 1;
>     > +    msg.msg_control = buf;
>     > +    msg.msg_controllen = sizeof(buf);
>     > +
>     > +    cmsg = CMSG_FIRSTHDR(&msg);
>     > +    cmsg->cmsg_level = SOL_SOCKET;
>     > +    cmsg->cmsg_type = SCM_RIGHTS;
>     > +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
>     > +
>     > +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
>     > +
>     > +    return sendmsg(socket, &msg, 0);
>     > +}
>     > +
>     > +static void print_help_and_exit(const char *prog, int exitcode)
>     > +{
>     > +    fprintf(stderr, "%s - load eBPF RSS program for qemu and
>     pass eBPF fds"
>     > +            " through unix socket.\n", prog);
>     > +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file
>     descriptor"
>     > +            " used to pass eBPF fds.\n");
>     > +    fprintf(stderr, "\t--help, -h - this help.\n");
>     > +    exit(exitcode);
>     > +}
>     > +
>     > +int main(int argc, char **argv)
>     > +{
>     > +    char *fd_string = NULL;
>     > +    int unix_fd = 0;
>     > +    struct EBPFRSSContext ctx = {};
>     > +    int fds[EBPF_RSS_MAX_FDS] = {};
>     > +    int ret = -1;
>     > +
>     > +    for (;;) {
>     > +        int c;
>     > +        static struct option long_options[] = {
>     > +                {"help",  no_argument, 0, 'h'},
>     > +                {"fd",  required_argument, 0, 'f'},
>     > +                {0, 0, 0, 0}
>     > +        };
>     > +        c = getopt_long(argc, argv, "hf:",
>     > +                long_options, NULL);
>     > +
>     > +        if (c == -1) {
>     > +            break;
>     > +        }
>     > +
>     > +        switch (c) {
>     > +        case 'f':
>     > +            fd_string = optarg;
>     > +            break;
>     > +        case 'h':
>     > +        default:
>     > +            print_help_and_exit(argv[0],
>     > +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
>     > +        }
>     > +    }
>     > +
>     > +    if (!fd_string) {
>     > +        fprintf(stderr, "Unix file descriptor not present.\n");
>     > +        print_help_and_exit(argv[0], EXIT_FAILURE);
>     > +    }
>     > +
>     > +    unix_fd = atoi(fd_string);
>     > +
>     > +    if (!unix_fd) {
>     > +        fprintf(stderr, "Unix file descriptor is invalid.\n");
>     > +        return EXIT_FAILURE;
>     > +    }
>     > +
>     > +    ebpf_rss_init(&ctx);
>     > +    if (!ebpf_rss_load(&ctx)) {
>     > +        fprintf(stderr, "Can't load ebpf.\n");
>     > +        return EXIT_FAILURE;
>     > +    }
>     > +    fds[0] = ctx.program_fd;
>     > +    fds[1] = ctx.map_configuration;
>     > +
>     > +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
>     > +    if (ret < 0) {
>     > +        fprintf(stderr, "Issue while sending fds: %s.\n",
>     strerror(errno));
>     > +    }
>     > +
>     > +    ebpf_rss_unload(&ctx);
>     > +
>     > +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
>     > +}
>     > +
>     > diff --git a/meson.build b/meson.build
>     > index 257e51d91b..913aa1fee5 100644
>     > --- a/meson.build
>     > +++ b/meson.build
>     > @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
>     >   endif
>     >
>     >   # libbpf
>     > -libbpf = dependency('libbpf', required: get_option('bpf'),
>     method: 'pkg-config')
>     > -if libbpf.found() and not cc.links('''
>     > -   #include <bpf/libbpf.h>
>     > -   int main(void)
>     > -   {
>     > -     bpf_object__destroy_skeleton(NULL);
>     > -     return 0;
>     > -   }''', dependencies: libbpf)
>     > -  libbpf = not_found
>     > -  if get_option('bpf').enabled()
>     > -    error('libbpf skeleton test failed')
>     > -  else
>     > -    warning('libbpf skeleton test failed, disabling')
>     > +libbpf = not_found
>     > +if targetos == 'linux'
>     > +  libbpf = dependency('libbpf', required: get_option('bpf'),
>     method: 'pkg-config')
>     > +  if libbpf.found() and not cc.links('''
>     > +    #include <bpf/libbpf.h>
>     > +    int main(void)
>     > +    {
>     > +      bpf_object__destroy_skeleton(NULL);
>
>
>     Do we need to test whether the bpf can do mmap() here?
>
>     Thanks
>
>
>     > +      return 0;
>     > +    }''', dependencies: libbpf)
>     > +    libbpf = not_found
>     > +    if get_option('bpf').enabled()
>     > +      error('libbpf skeleton test failed')
>     > +    else
>     > +      warning('libbpf skeleton test failed, disabling')
>     > +    endif
>     >     endif
>     >   endif
>     >
>     > @@ -2423,6 +2426,14 @@ if have_tools
>     >                  dependencies: [authz, crypto, io, qom, qemuutil,
>     >                                 libcap_ng, mpathpersist],
>     >                  install: true)
>     > +
>     > +    if libbpf.found()
>     > +        executable('qemu-ebpf-rss-helper', files(
>     > +                   'ebpf/qemu-ebpf-rss-helper.c',
>     'ebpf/ebpf_rss.c'),
>     > +                   dependencies: [qemuutil, libbpf, glib],
>     > +                   install: true,
>     > +                   install_dir: get_option('libexecdir'))
>     > +    endif
>     >     endif
>     >
>     >     if 'CONFIG_IVSHMEM' in config_host
>
Jason Wang Sept. 1, 2021, 6:42 a.m. UTC | #5
在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
>>
>> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
>>> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
>>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
>>
>> I wonder if this can be done as helper for TAP/bridge.
>>
>> E.g it's the qemu to launch those helper with set-uid.
>>
>> Then libvirt won't even need to care about that?
>>
> There are pros and cons for such a solution with set-uid.
>  From my point of view one of the cons is that set-uid is efficient
> only at install time so the coexistence of different qemu builds (and
> different helpers for each one) is kind of problematic.
> With the current solution this does not present any problem: the
> developer can have several different builds, each one automatically
> has its own helper and there is no conflict between these builds and
> between these builds and installed qemu package. Changing the
> 'emulator' in the libvirt profile automatically brings the proper
> helper to work.


I'm not sure I get you here. We can still have default/sample helper to 
make sure it works for different builds.

If we can avoid the involvement of libvirt, that would be better.

Thanks


>
>>> Also, libbpf dependency now exclusively for Linux.
>>> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
>>> There is no reason yet to build eBPF loader and helper for non Linux systems,
>>> even if libbpf is present.
>>>
>>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
>>> ---
>>>    ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
>>>    meson.build                 |  37 ++++++----
>>>    2 files changed, 154 insertions(+), 13 deletions(-)
>>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
>>>
>>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
>>> new file mode 100644
>>> index 0000000000..fe68758f57
>>> --- /dev/null
>>> +++ b/ebpf/qemu-ebpf-rss-helper.c
>>> @@ -0,0 +1,130 @@
>>> +/*
>>> + * eBPF RSS Helper
>>> + *
>>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>>> + *
>>> + * Authors:
>>> + *  Andrew Melnychenko <andrew@daynix.com>
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>>> + * the COPYING file in the top-level directory.
>>> + *
>>> + * Description: This is helper program for libvirtd.
>>> + *              It loads eBPF RSS program and passes fds through unix socket.
>>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
>>> + */
>>> +
>>> +#include <stdio.h>
>>> +#include <stdint.h>
>>> +#include <stdlib.h>
>>> +#include <stdbool.h>
>>> +#include <getopt.h>
>>> +#include <memory.h>
>>> +#include <errno.h>
>>> +#include <sys/socket.h>
>>> +
>>> +#include "ebpf_rss.h"
>>> +
>>> +#include "qemu-helper-stamp.h"
>>> +
>>> +void QEMU_HELPER_STAMP(void) {}
>>> +
>>> +static int send_fds(int socket, int *fds, int n)
>>> +{
>>> +    struct msghdr msg = {};
>>> +    struct cmsghdr *cmsg = NULL;
>>> +    char buf[CMSG_SPACE(n * sizeof(int))];
>>> +    char dummy_buffer = 0;
>>> +    struct iovec io = { .iov_base = &dummy_buffer,
>>> +                        .iov_len = sizeof(dummy_buffer) };
>>> +
>>> +    memset(buf, 0, sizeof(buf));
>>> +
>>> +    msg.msg_iov = &io;
>>> +    msg.msg_iovlen = 1;
>>> +    msg.msg_control = buf;
>>> +    msg.msg_controllen = sizeof(buf);
>>> +
>>> +    cmsg = CMSG_FIRSTHDR(&msg);
>>> +    cmsg->cmsg_level = SOL_SOCKET;
>>> +    cmsg->cmsg_type = SCM_RIGHTS;
>>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
>>> +
>>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
>>> +
>>> +    return sendmsg(socket, &msg, 0);
>>> +}
>>> +
>>> +static void print_help_and_exit(const char *prog, int exitcode)
>>> +{
>>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
>>> +            " through unix socket.\n", prog);
>>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
>>> +            " used to pass eBPF fds.\n");
>>> +    fprintf(stderr, "\t--help, -h - this help.\n");
>>> +    exit(exitcode);
>>> +}
>>> +
>>> +int main(int argc, char **argv)
>>> +{
>>> +    char *fd_string = NULL;
>>> +    int unix_fd = 0;
>>> +    struct EBPFRSSContext ctx = {};
>>> +    int fds[EBPF_RSS_MAX_FDS] = {};
>>> +    int ret = -1;
>>> +
>>> +    for (;;) {
>>> +        int c;
>>> +        static struct option long_options[] = {
>>> +                {"help",  no_argument, 0, 'h'},
>>> +                {"fd",  required_argument, 0, 'f'},
>>> +                {0, 0, 0, 0}
>>> +        };
>>> +        c = getopt_long(argc, argv, "hf:",
>>> +                long_options, NULL);
>>> +
>>> +        if (c == -1) {
>>> +            break;
>>> +        }
>>> +
>>> +        switch (c) {
>>> +        case 'f':
>>> +            fd_string = optarg;
>>> +            break;
>>> +        case 'h':
>>> +        default:
>>> +            print_help_and_exit(argv[0],
>>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
>>> +        }
>>> +    }
>>> +
>>> +    if (!fd_string) {
>>> +        fprintf(stderr, "Unix file descriptor not present.\n");
>>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
>>> +    }
>>> +
>>> +    unix_fd = atoi(fd_string);
>>> +
>>> +    if (!unix_fd) {
>>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
>>> +        return EXIT_FAILURE;
>>> +    }
>>> +
>>> +    ebpf_rss_init(&ctx);
>>> +    if (!ebpf_rss_load(&ctx)) {
>>> +        fprintf(stderr, "Can't load ebpf.\n");
>>> +        return EXIT_FAILURE;
>>> +    }
>>> +    fds[0] = ctx.program_fd;
>>> +    fds[1] = ctx.map_configuration;
>>> +
>>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
>>> +    if (ret < 0) {
>>> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
>>> +    }
>>> +
>>> +    ebpf_rss_unload(&ctx);
>>> +
>>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
>>> +}
>>> +
>>> diff --git a/meson.build b/meson.build
>>> index 257e51d91b..913aa1fee5 100644
>>> --- a/meson.build
>>> +++ b/meson.build
>>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
>>>    endif
>>>
>>>    # libbpf
>>> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
>>> -if libbpf.found() and not cc.links('''
>>> -   #include <bpf/libbpf.h>
>>> -   int main(void)
>>> -   {
>>> -     bpf_object__destroy_skeleton(NULL);
>>> -     return 0;
>>> -   }''', dependencies: libbpf)
>>> -  libbpf = not_found
>>> -  if get_option('bpf').enabled()
>>> -    error('libbpf skeleton test failed')
>>> -  else
>>> -    warning('libbpf skeleton test failed, disabling')
>>> +libbpf = not_found
>>> +if targetos == 'linux'
>>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
>>> +  if libbpf.found() and not cc.links('''
>>> +    #include <bpf/libbpf.h>
>>> +    int main(void)
>>> +    {
>>> +      bpf_object__destroy_skeleton(NULL);
>>
>> Do we need to test whether the bpf can do mmap() here?
>>
>> Thanks
>>
>>
>>> +      return 0;
>>> +    }''', dependencies: libbpf)
>>> +    libbpf = not_found
>>> +    if get_option('bpf').enabled()
>>> +      error('libbpf skeleton test failed')
>>> +    else
>>> +      warning('libbpf skeleton test failed, disabling')
>>> +    endif
>>>      endif
>>>    endif
>>>
>>> @@ -2423,6 +2426,14 @@ if have_tools
>>>                   dependencies: [authz, crypto, io, qom, qemuutil,
>>>                                  libcap_ng, mpathpersist],
>>>                   install: true)
>>> +
>>> +    if libbpf.found()
>>> +        executable('qemu-ebpf-rss-helper', files(
>>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
>>> +                   dependencies: [qemuutil, libbpf, glib],
>>> +                   install: true,
>>> +                   install_dir: get_option('libexecdir'))
>>> +    endif
>>>      endif
>>>
>>>      if 'CONFIG_IVSHMEM' in config_host
Andrew Melnichenko Sept. 6, 2021, 3:50 p.m. UTC | #6
Hi,

> I think it's for back-compatibility.
>
> E.g current codes works without mmap(), and user will surprise that it
> wont' work after upgrading their qemu.
>
Well, the current code would require additional capabilities with
"kernel.unprivileged_bpf_disabled=1", which may be possible on RedHat
systems.
Technically we may have mmap test which will show that mmap for
BPF_MAP_TYPE_ARRAY works, but on the target system, we will know it only in
runtime.
If I'm not mistaken, mmap for BPF_MAP_TYPE_ARRAY was added before kernel
5.4 and our bpf program requires kernel 5.8+.
So, there are no reasons to add bpf() update map as a fallback for mmap().

On Wed, Sep 1, 2021 at 9:42 AM Jason Wang <jasowang@redhat.com> wrote:

>
> 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
> >>
> >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> >>> Helper program. Loads eBPF RSS program and maps and passes them
> through unix socket.
> >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> >>
> >> I wonder if this can be done as helper for TAP/bridge.
> >>
> >> E.g it's the qemu to launch those helper with set-uid.
> >>
> >> Then libvirt won't even need to care about that?
> >>
> > There are pros and cons for such a solution with set-uid.
> >  From my point of view one of the cons is that set-uid is efficient
> > only at install time so the coexistence of different qemu builds (and
> > different helpers for each one) is kind of problematic.
> > With the current solution this does not present any problem: the
> > developer can have several different builds, each one automatically
> > has its own helper and there is no conflict between these builds and
> > between these builds and installed qemu package. Changing the
> > 'emulator' in the libvirt profile automatically brings the proper
> > helper to work.
>
>
> I'm not sure I get you here. We can still have default/sample helper to
> make sure it works for different builds.
>
> If we can avoid the involvement of libvirt, that would be better.
>
> Thanks
>
>
> >
> >>> Also, libbpf dependency now exclusively for Linux.
> >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux
> TAP.
> >>> There is no reason yet to build eBPF loader and helper for non Linux
> systems,
> >>> even if libbpf is present.
> >>>
> >>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> >>> ---
> >>>    ebpf/qemu-ebpf-rss-helper.c | 130
> ++++++++++++++++++++++++++++++++++++
> >>>    meson.build                 |  37 ++++++----
> >>>    2 files changed, 154 insertions(+), 13 deletions(-)
> >>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> >>>
> >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> >>> new file mode 100644
> >>> index 0000000000..fe68758f57
> >>> --- /dev/null
> >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> >>> @@ -0,0 +1,130 @@
> >>> +/*
> >>> + * eBPF RSS Helper
> >>> + *
> >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> >>> + *
> >>> + * Authors:
> >>> + *  Andrew Melnychenko <andrew@daynix.com>
> >>> + *
> >>> + * This work is licensed under the terms of the GNU GPL, version 2.
> See
> >>> + * the COPYING file in the top-level directory.
> >>> + *
> >>> + * Description: This is helper program for libvirtd.
> >>> + *              It loads eBPF RSS program and passes fds through unix
> socket.
> >>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> >>> + */
> >>> +
> >>> +#include <stdio.h>
> >>> +#include <stdint.h>
> >>> +#include <stdlib.h>
> >>> +#include <stdbool.h>
> >>> +#include <getopt.h>
> >>> +#include <memory.h>
> >>> +#include <errno.h>
> >>> +#include <sys/socket.h>
> >>> +
> >>> +#include "ebpf_rss.h"
> >>> +
> >>> +#include "qemu-helper-stamp.h"
> >>> +
> >>> +void QEMU_HELPER_STAMP(void) {}
> >>> +
> >>> +static int send_fds(int socket, int *fds, int n)
> >>> +{
> >>> +    struct msghdr msg = {};
> >>> +    struct cmsghdr *cmsg = NULL;
> >>> +    char buf[CMSG_SPACE(n * sizeof(int))];
> >>> +    char dummy_buffer = 0;
> >>> +    struct iovec io = { .iov_base = &dummy_buffer,
> >>> +                        .iov_len = sizeof(dummy_buffer) };
> >>> +
> >>> +    memset(buf, 0, sizeof(buf));
> >>> +
> >>> +    msg.msg_iov = &io;
> >>> +    msg.msg_iovlen = 1;
> >>> +    msg.msg_control = buf;
> >>> +    msg.msg_controllen = sizeof(buf);
> >>> +
> >>> +    cmsg = CMSG_FIRSTHDR(&msg);
> >>> +    cmsg->cmsg_level = SOL_SOCKET;
> >>> +    cmsg->cmsg_type = SCM_RIGHTS;
> >>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> >>> +
> >>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> >>> +
> >>> +    return sendmsg(socket, &msg, 0);
> >>> +}
> >>> +
> >>> +static void print_help_and_exit(const char *prog, int exitcode)
> >>> +{
> >>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass
> eBPF fds"
> >>> +            " through unix socket.\n", prog);
> >>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file
> descriptor"
> >>> +            " used to pass eBPF fds.\n");
> >>> +    fprintf(stderr, "\t--help, -h - this help.\n");
> >>> +    exit(exitcode);
> >>> +}
> >>> +
> >>> +int main(int argc, char **argv)
> >>> +{
> >>> +    char *fd_string = NULL;
> >>> +    int unix_fd = 0;
> >>> +    struct EBPFRSSContext ctx = {};
> >>> +    int fds[EBPF_RSS_MAX_FDS] = {};
> >>> +    int ret = -1;
> >>> +
> >>> +    for (;;) {
> >>> +        int c;
> >>> +        static struct option long_options[] = {
> >>> +                {"help",  no_argument, 0, 'h'},
> >>> +                {"fd",  required_argument, 0, 'f'},
> >>> +                {0, 0, 0, 0}
> >>> +        };
> >>> +        c = getopt_long(argc, argv, "hf:",
> >>> +                long_options, NULL);
> >>> +
> >>> +        if (c == -1) {
> >>> +            break;
> >>> +        }
> >>> +
> >>> +        switch (c) {
> >>> +        case 'f':
> >>> +            fd_string = optarg;
> >>> +            break;
> >>> +        case 'h':
> >>> +        default:
> >>> +            print_help_and_exit(argv[0],
> >>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> >>> +        }
> >>> +    }
> >>> +
> >>> +    if (!fd_string) {
> >>> +        fprintf(stderr, "Unix file descriptor not present.\n");
> >>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
> >>> +    }
> >>> +
> >>> +    unix_fd = atoi(fd_string);
> >>> +
> >>> +    if (!unix_fd) {
> >>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> >>> +        return EXIT_FAILURE;
> >>> +    }
> >>> +
> >>> +    ebpf_rss_init(&ctx);
> >>> +    if (!ebpf_rss_load(&ctx)) {
> >>> +        fprintf(stderr, "Can't load ebpf.\n");
> >>> +        return EXIT_FAILURE;
> >>> +    }
> >>> +    fds[0] = ctx.program_fd;
> >>> +    fds[1] = ctx.map_configuration;
> >>> +
> >>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> >>> +    if (ret < 0) {
> >>> +        fprintf(stderr, "Issue while sending fds: %s.\n",
> strerror(errno));
> >>> +    }
> >>> +
> >>> +    ebpf_rss_unload(&ctx);
> >>> +
> >>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> >>> +}
> >>> +
> >>> diff --git a/meson.build b/meson.build
> >>> index 257e51d91b..913aa1fee5 100644
> >>> --- a/meson.build
> >>> +++ b/meson.build
> >>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> >>>    endif
> >>>
> >>>    # libbpf
> >>> -libbpf = dependency('libbpf', required: get_option('bpf'), method:
> 'pkg-config')
> >>> -if libbpf.found() and not cc.links('''
> >>> -   #include <bpf/libbpf.h>
> >>> -   int main(void)
> >>> -   {
> >>> -     bpf_object__destroy_skeleton(NULL);
> >>> -     return 0;
> >>> -   }''', dependencies: libbpf)
> >>> -  libbpf = not_found
> >>> -  if get_option('bpf').enabled()
> >>> -    error('libbpf skeleton test failed')
> >>> -  else
> >>> -    warning('libbpf skeleton test failed, disabling')
> >>> +libbpf = not_found
> >>> +if targetos == 'linux'
> >>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method:
> 'pkg-config')
> >>> +  if libbpf.found() and not cc.links('''
> >>> +    #include <bpf/libbpf.h>
> >>> +    int main(void)
> >>> +    {
> >>> +      bpf_object__destroy_skeleton(NULL);
> >>
> >> Do we need to test whether the bpf can do mmap() here?
> >>
> >> Thanks
> >>
> >>
> >>> +      return 0;
> >>> +    }''', dependencies: libbpf)
> >>> +    libbpf = not_found
> >>> +    if get_option('bpf').enabled()
> >>> +      error('libbpf skeleton test failed')
> >>> +    else
> >>> +      warning('libbpf skeleton test failed, disabling')
> >>> +    endif
> >>>      endif
> >>>    endif
> >>>
> >>> @@ -2423,6 +2426,14 @@ if have_tools
> >>>                   dependencies: [authz, crypto, io, qom, qemuutil,
> >>>                                  libcap_ng, mpathpersist],
> >>>                   install: true)
> >>> +
> >>> +    if libbpf.found()
> >>> +        executable('qemu-ebpf-rss-helper', files(
> >>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> >>> +                   dependencies: [qemuutil, libbpf, glib],
> >>> +                   install: true,
> >>> +                   install_dir: get_option('libexecdir'))
> >>> +    endif
> >>>      endif
> >>>
> >>>      if 'CONFIG_IVSHMEM' in config_host
>
>
Jason Wang Sept. 7, 2021, 3:22 a.m. UTC | #7
On Mon, Sep 6, 2021 at 11:50 PM Andrew Melnichenko <andrew@daynix.com> wrote:
>
> Hi,
>>
>> I think it's for back-compatibility.
>>
>> E.g current codes works without mmap(), and user will surprise that it
>> wont' work after upgrading their qemu.
>
> Well, the current code would require additional capabilities with "kernel.unprivileged_bpf_disabled=1", which may be possible on RedHat systems.
> Technically we may have mmap test which will show that mmap for BPF_MAP_TYPE_ARRAY works, but on the target system, we will know it only in runtime.
> If I'm not mistaken, mmap for BPF_MAP_TYPE_ARRAY was added before kernel 5.4 and our bpf program requires kernel 5.8+.

Ok, if this is the case, please explain this in the commit log.

Btw, any reason that 5.8 is required for our bpf program?

Thanks

> So, there are no reasons to add bpf() update map as a fallback for mmap().
>
> On Wed, Sep 1, 2021 at 9:42 AM Jason Wang <jasowang@redhat.com> wrote:
>>
>>
>> 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
>> > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
>> >>
>> >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
>> >>> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
>> >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
>> >>
>> >> I wonder if this can be done as helper for TAP/bridge.
>> >>
>> >> E.g it's the qemu to launch those helper with set-uid.
>> >>
>> >> Then libvirt won't even need to care about that?
>> >>
>> > There are pros and cons for such a solution with set-uid.
>> >  From my point of view one of the cons is that set-uid is efficient
>> > only at install time so the coexistence of different qemu builds (and
>> > different helpers for each one) is kind of problematic.
>> > With the current solution this does not present any problem: the
>> > developer can have several different builds, each one automatically
>> > has its own helper and there is no conflict between these builds and
>> > between these builds and installed qemu package. Changing the
>> > 'emulator' in the libvirt profile automatically brings the proper
>> > helper to work.
>>
>>
>> I'm not sure I get you here. We can still have default/sample helper to
>> make sure it works for different builds.
>>
>> If we can avoid the involvement of libvirt, that would be better.
>>
>> Thanks
>>
>>
>> >
>> >>> Also, libbpf dependency now exclusively for Linux.
>> >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
>> >>> There is no reason yet to build eBPF loader and helper for non Linux systems,
>> >>> even if libbpf is present.
>> >>>
>> >>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
>> >>> ---
>> >>>    ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
>> >>>    meson.build                 |  37 ++++++----
>> >>>    2 files changed, 154 insertions(+), 13 deletions(-)
>> >>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
>> >>>
>> >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
>> >>> new file mode 100644
>> >>> index 0000000000..fe68758f57
>> >>> --- /dev/null
>> >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
>> >>> @@ -0,0 +1,130 @@
>> >>> +/*
>> >>> + * eBPF RSS Helper
>> >>> + *
>> >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> >>> + *
>> >>> + * Authors:
>> >>> + *  Andrew Melnychenko <andrew@daynix.com>
>> >>> + *
>> >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> >>> + * the COPYING file in the top-level directory.
>> >>> + *
>> >>> + * Description: This is helper program for libvirtd.
>> >>> + *              It loads eBPF RSS program and passes fds through unix socket.
>> >>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
>> >>> + */
>> >>> +
>> >>> +#include <stdio.h>
>> >>> +#include <stdint.h>
>> >>> +#include <stdlib.h>
>> >>> +#include <stdbool.h>
>> >>> +#include <getopt.h>
>> >>> +#include <memory.h>
>> >>> +#include <errno.h>
>> >>> +#include <sys/socket.h>
>> >>> +
>> >>> +#include "ebpf_rss.h"
>> >>> +
>> >>> +#include "qemu-helper-stamp.h"
>> >>> +
>> >>> +void QEMU_HELPER_STAMP(void) {}
>> >>> +
>> >>> +static int send_fds(int socket, int *fds, int n)
>> >>> +{
>> >>> +    struct msghdr msg = {};
>> >>> +    struct cmsghdr *cmsg = NULL;
>> >>> +    char buf[CMSG_SPACE(n * sizeof(int))];
>> >>> +    char dummy_buffer = 0;
>> >>> +    struct iovec io = { .iov_base = &dummy_buffer,
>> >>> +                        .iov_len = sizeof(dummy_buffer) };
>> >>> +
>> >>> +    memset(buf, 0, sizeof(buf));
>> >>> +
>> >>> +    msg.msg_iov = &io;
>> >>> +    msg.msg_iovlen = 1;
>> >>> +    msg.msg_control = buf;
>> >>> +    msg.msg_controllen = sizeof(buf);
>> >>> +
>> >>> +    cmsg = CMSG_FIRSTHDR(&msg);
>> >>> +    cmsg->cmsg_level = SOL_SOCKET;
>> >>> +    cmsg->cmsg_type = SCM_RIGHTS;
>> >>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
>> >>> +
>> >>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
>> >>> +
>> >>> +    return sendmsg(socket, &msg, 0);
>> >>> +}
>> >>> +
>> >>> +static void print_help_and_exit(const char *prog, int exitcode)
>> >>> +{
>> >>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
>> >>> +            " through unix socket.\n", prog);
>> >>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
>> >>> +            " used to pass eBPF fds.\n");
>> >>> +    fprintf(stderr, "\t--help, -h - this help.\n");
>> >>> +    exit(exitcode);
>> >>> +}
>> >>> +
>> >>> +int main(int argc, char **argv)
>> >>> +{
>> >>> +    char *fd_string = NULL;
>> >>> +    int unix_fd = 0;
>> >>> +    struct EBPFRSSContext ctx = {};
>> >>> +    int fds[EBPF_RSS_MAX_FDS] = {};
>> >>> +    int ret = -1;
>> >>> +
>> >>> +    for (;;) {
>> >>> +        int c;
>> >>> +        static struct option long_options[] = {
>> >>> +                {"help",  no_argument, 0, 'h'},
>> >>> +                {"fd",  required_argument, 0, 'f'},
>> >>> +                {0, 0, 0, 0}
>> >>> +        };
>> >>> +        c = getopt_long(argc, argv, "hf:",
>> >>> +                long_options, NULL);
>> >>> +
>> >>> +        if (c == -1) {
>> >>> +            break;
>> >>> +        }
>> >>> +
>> >>> +        switch (c) {
>> >>> +        case 'f':
>> >>> +            fd_string = optarg;
>> >>> +            break;
>> >>> +        case 'h':
>> >>> +        default:
>> >>> +            print_help_and_exit(argv[0],
>> >>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
>> >>> +        }
>> >>> +    }
>> >>> +
>> >>> +    if (!fd_string) {
>> >>> +        fprintf(stderr, "Unix file descriptor not present.\n");
>> >>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
>> >>> +    }
>> >>> +
>> >>> +    unix_fd = atoi(fd_string);
>> >>> +
>> >>> +    if (!unix_fd) {
>> >>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
>> >>> +        return EXIT_FAILURE;
>> >>> +    }
>> >>> +
>> >>> +    ebpf_rss_init(&ctx);
>> >>> +    if (!ebpf_rss_load(&ctx)) {
>> >>> +        fprintf(stderr, "Can't load ebpf.\n");
>> >>> +        return EXIT_FAILURE;
>> >>> +    }
>> >>> +    fds[0] = ctx.program_fd;
>> >>> +    fds[1] = ctx.map_configuration;
>> >>> +
>> >>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
>> >>> +    if (ret < 0) {
>> >>> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
>> >>> +    }
>> >>> +
>> >>> +    ebpf_rss_unload(&ctx);
>> >>> +
>> >>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
>> >>> +}
>> >>> +
>> >>> diff --git a/meson.build b/meson.build
>> >>> index 257e51d91b..913aa1fee5 100644
>> >>> --- a/meson.build
>> >>> +++ b/meson.build
>> >>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
>> >>>    endif
>> >>>
>> >>>    # libbpf
>> >>> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
>> >>> -if libbpf.found() and not cc.links('''
>> >>> -   #include <bpf/libbpf.h>
>> >>> -   int main(void)
>> >>> -   {
>> >>> -     bpf_object__destroy_skeleton(NULL);
>> >>> -     return 0;
>> >>> -   }''', dependencies: libbpf)
>> >>> -  libbpf = not_found
>> >>> -  if get_option('bpf').enabled()
>> >>> -    error('libbpf skeleton test failed')
>> >>> -  else
>> >>> -    warning('libbpf skeleton test failed, disabling')
>> >>> +libbpf = not_found
>> >>> +if targetos == 'linux'
>> >>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
>> >>> +  if libbpf.found() and not cc.links('''
>> >>> +    #include <bpf/libbpf.h>
>> >>> +    int main(void)
>> >>> +    {
>> >>> +      bpf_object__destroy_skeleton(NULL);
>> >>
>> >> Do we need to test whether the bpf can do mmap() here?
>> >>
>> >> Thanks
>> >>
>> >>
>> >>> +      return 0;
>> >>> +    }''', dependencies: libbpf)
>> >>> +    libbpf = not_found
>> >>> +    if get_option('bpf').enabled()
>> >>> +      error('libbpf skeleton test failed')
>> >>> +    else
>> >>> +      warning('libbpf skeleton test failed, disabling')
>> >>> +    endif
>> >>>      endif
>> >>>    endif
>> >>>
>> >>> @@ -2423,6 +2426,14 @@ if have_tools
>> >>>                   dependencies: [authz, crypto, io, qom, qemuutil,
>> >>>                                  libcap_ng, mpathpersist],
>> >>>                   install: true)
>> >>> +
>> >>> +    if libbpf.found()
>> >>> +        executable('qemu-ebpf-rss-helper', files(
>> >>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
>> >>> +                   dependencies: [qemuutil, libbpf, glib],
>> >>> +                   install: true,
>> >>> +                   install_dir: get_option('libexecdir'))
>> >>> +    endif
>> >>>      endif
>> >>>
>> >>>      if 'CONFIG_IVSHMEM' in config_host
>>
Yuri Benditovich Sept. 7, 2021, 10:40 a.m. UTC | #8
On Wed, Sep 1, 2021 at 9:42 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
> >>
> >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> >>> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
> >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> >>
> >> I wonder if this can be done as helper for TAP/bridge.
> >>
> >> E.g it's the qemu to launch those helper with set-uid.
> >>
> >> Then libvirt won't even need to care about that?
> >>
> > There are pros and cons for such a solution with set-uid.
> >  From my point of view one of the cons is that set-uid is efficient
> > only at install time so the coexistence of different qemu builds (and
> > different helpers for each one) is kind of problematic.
> > With the current solution this does not present any problem: the
> > developer can have several different builds, each one automatically
> > has its own helper and there is no conflict between these builds and
> > between these builds and installed qemu package. Changing the
> > 'emulator' in the libvirt profile automatically brings the proper
> > helper to work.
>
>
> I'm not sure I get you here. We can still have default/sample helper to
> make sure it works for different builds.
>
> If we can avoid the involvement of libvirt, that would be better.

Hi Jason,

Indeed I did not get the idea, can you please explain it in more
details (as detailed as possible to avoid future misunderstanding),
especially how exactly we can use the set-uid and what is the 'default' helper.
We also would prefer to do everything from qemu but we do not see how
we can do that.

Our main points (what should be addressed):
- qemu should be able to load ebpf and use the maps when it runs from
libvirt (without special caps) and standalone (with caps)
- it is possible that there are different qemu builds on the machine,
one of them might be installed, their ebpf's might be different and
the interface between qemu and ebpf (exact content of maps and number
of maps)
- qemu configures the RSS dynamically according to the commands
provided by the guest

Thanks in advance
Yuri

>
> Thanks
>
>
> >
> >>> Also, libbpf dependency now exclusively for Linux.
> >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> >>> There is no reason yet to build eBPF loader and helper for non Linux systems,
> >>> even if libbpf is present.
> >>>
> >>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> >>> ---
> >>>    ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
> >>>    meson.build                 |  37 ++++++----
> >>>    2 files changed, 154 insertions(+), 13 deletions(-)
> >>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> >>>
> >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> >>> new file mode 100644
> >>> index 0000000000..fe68758f57
> >>> --- /dev/null
> >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> >>> @@ -0,0 +1,130 @@
> >>> +/*
> >>> + * eBPF RSS Helper
> >>> + *
> >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> >>> + *
> >>> + * Authors:
> >>> + *  Andrew Melnychenko <andrew@daynix.com>
> >>> + *
> >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> >>> + * the COPYING file in the top-level directory.
> >>> + *
> >>> + * Description: This is helper program for libvirtd.
> >>> + *              It loads eBPF RSS program and passes fds through unix socket.
> >>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> >>> + */
> >>> +
> >>> +#include <stdio.h>
> >>> +#include <stdint.h>
> >>> +#include <stdlib.h>
> >>> +#include <stdbool.h>
> >>> +#include <getopt.h>
> >>> +#include <memory.h>
> >>> +#include <errno.h>
> >>> +#include <sys/socket.h>
> >>> +
> >>> +#include "ebpf_rss.h"
> >>> +
> >>> +#include "qemu-helper-stamp.h"
> >>> +
> >>> +void QEMU_HELPER_STAMP(void) {}
> >>> +
> >>> +static int send_fds(int socket, int *fds, int n)
> >>> +{
> >>> +    struct msghdr msg = {};
> >>> +    struct cmsghdr *cmsg = NULL;
> >>> +    char buf[CMSG_SPACE(n * sizeof(int))];
> >>> +    char dummy_buffer = 0;
> >>> +    struct iovec io = { .iov_base = &dummy_buffer,
> >>> +                        .iov_len = sizeof(dummy_buffer) };
> >>> +
> >>> +    memset(buf, 0, sizeof(buf));
> >>> +
> >>> +    msg.msg_iov = &io;
> >>> +    msg.msg_iovlen = 1;
> >>> +    msg.msg_control = buf;
> >>> +    msg.msg_controllen = sizeof(buf);
> >>> +
> >>> +    cmsg = CMSG_FIRSTHDR(&msg);
> >>> +    cmsg->cmsg_level = SOL_SOCKET;
> >>> +    cmsg->cmsg_type = SCM_RIGHTS;
> >>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> >>> +
> >>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> >>> +
> >>> +    return sendmsg(socket, &msg, 0);
> >>> +}
> >>> +
> >>> +static void print_help_and_exit(const char *prog, int exitcode)
> >>> +{
> >>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> >>> +            " through unix socket.\n", prog);
> >>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
> >>> +            " used to pass eBPF fds.\n");
> >>> +    fprintf(stderr, "\t--help, -h - this help.\n");
> >>> +    exit(exitcode);
> >>> +}
> >>> +
> >>> +int main(int argc, char **argv)
> >>> +{
> >>> +    char *fd_string = NULL;
> >>> +    int unix_fd = 0;
> >>> +    struct EBPFRSSContext ctx = {};
> >>> +    int fds[EBPF_RSS_MAX_FDS] = {};
> >>> +    int ret = -1;
> >>> +
> >>> +    for (;;) {
> >>> +        int c;
> >>> +        static struct option long_options[] = {
> >>> +                {"help",  no_argument, 0, 'h'},
> >>> +                {"fd",  required_argument, 0, 'f'},
> >>> +                {0, 0, 0, 0}
> >>> +        };
> >>> +        c = getopt_long(argc, argv, "hf:",
> >>> +                long_options, NULL);
> >>> +
> >>> +        if (c == -1) {
> >>> +            break;
> >>> +        }
> >>> +
> >>> +        switch (c) {
> >>> +        case 'f':
> >>> +            fd_string = optarg;
> >>> +            break;
> >>> +        case 'h':
> >>> +        default:
> >>> +            print_help_and_exit(argv[0],
> >>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> >>> +        }
> >>> +    }
> >>> +
> >>> +    if (!fd_string) {
> >>> +        fprintf(stderr, "Unix file descriptor not present.\n");
> >>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
> >>> +    }
> >>> +
> >>> +    unix_fd = atoi(fd_string);
> >>> +
> >>> +    if (!unix_fd) {
> >>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> >>> +        return EXIT_FAILURE;
> >>> +    }
> >>> +
> >>> +    ebpf_rss_init(&ctx);
> >>> +    if (!ebpf_rss_load(&ctx)) {
> >>> +        fprintf(stderr, "Can't load ebpf.\n");
> >>> +        return EXIT_FAILURE;
> >>> +    }
> >>> +    fds[0] = ctx.program_fd;
> >>> +    fds[1] = ctx.map_configuration;
> >>> +
> >>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> >>> +    if (ret < 0) {
> >>> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
> >>> +    }
> >>> +
> >>> +    ebpf_rss_unload(&ctx);
> >>> +
> >>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> >>> +}
> >>> +
> >>> diff --git a/meson.build b/meson.build
> >>> index 257e51d91b..913aa1fee5 100644
> >>> --- a/meson.build
> >>> +++ b/meson.build
> >>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> >>>    endif
> >>>
> >>>    # libbpf
> >>> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> >>> -if libbpf.found() and not cc.links('''
> >>> -   #include <bpf/libbpf.h>
> >>> -   int main(void)
> >>> -   {
> >>> -     bpf_object__destroy_skeleton(NULL);
> >>> -     return 0;
> >>> -   }''', dependencies: libbpf)
> >>> -  libbpf = not_found
> >>> -  if get_option('bpf').enabled()
> >>> -    error('libbpf skeleton test failed')
> >>> -  else
> >>> -    warning('libbpf skeleton test failed, disabling')
> >>> +libbpf = not_found
> >>> +if targetos == 'linux'
> >>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> >>> +  if libbpf.found() and not cc.links('''
> >>> +    #include <bpf/libbpf.h>
> >>> +    int main(void)
> >>> +    {
> >>> +      bpf_object__destroy_skeleton(NULL);
> >>
> >> Do we need to test whether the bpf can do mmap() here?
> >>
> >> Thanks
> >>
> >>
> >>> +      return 0;
> >>> +    }''', dependencies: libbpf)
> >>> +    libbpf = not_found
> >>> +    if get_option('bpf').enabled()
> >>> +      error('libbpf skeleton test failed')
> >>> +    else
> >>> +      warning('libbpf skeleton test failed, disabling')
> >>> +    endif
> >>>      endif
> >>>    endif
> >>>
> >>> @@ -2423,6 +2426,14 @@ if have_tools
> >>>                   dependencies: [authz, crypto, io, qom, qemuutil,
> >>>                                  libcap_ng, mpathpersist],
> >>>                   install: true)
> >>> +
> >>> +    if libbpf.found()
> >>> +        executable('qemu-ebpf-rss-helper', files(
> >>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> >>> +                   dependencies: [qemuutil, libbpf, glib],
> >>> +                   install: true,
> >>> +                   install_dir: get_option('libexecdir'))
> >>> +    endif
> >>>      endif
> >>>
> >>>      if 'CONFIG_IVSHMEM' in config_host
>
Jason Wang Sept. 8, 2021, 3:45 a.m. UTC | #9
On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
<yuri.benditovich@daynix.com> wrote:
>
> On Wed, Sep 1, 2021 at 9:42 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> >
> > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
> > >>
> > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > >>> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
> > >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> > >>
> > >> I wonder if this can be done as helper for TAP/bridge.
> > >>
> > >> E.g it's the qemu to launch those helper with set-uid.
> > >>
> > >> Then libvirt won't even need to care about that?
> > >>
> > > There are pros and cons for such a solution with set-uid.
> > >  From my point of view one of the cons is that set-uid is efficient
> > > only at install time so the coexistence of different qemu builds (and
> > > different helpers for each one) is kind of problematic.
> > > With the current solution this does not present any problem: the
> > > developer can have several different builds, each one automatically
> > > has its own helper and there is no conflict between these builds and
> > > between these builds and installed qemu package. Changing the
> > > 'emulator' in the libvirt profile automatically brings the proper
> > > helper to work.
> >
> >
> > I'm not sure I get you here. We can still have default/sample helper to
> > make sure it works for different builds.
> >
> > If we can avoid the involvement of libvirt, that would be better.
>
> Hi Jason,
>
> Indeed I did not get the idea, can you please explain it in more
> details (as detailed as possible to avoid future misunderstanding),
> especially how exactly we can use the set-uid and what is the 'default' helper.
> We also would prefer to do everything from qemu but we do not see how
> we can do that.


Something like:

1) -netdev tap,rss_helper=/path/to/name
2) having a sample/default helper implemented in Qemu
3) we can introduce something special path like "default", then if
-netdev tap,rss_helper="default" is specified, qemu will use the
sample helper

So we have:
1) set set-uid for the helper
2) libvirt may just choose to launch the default helper

>
> Our main points (what should be addressed):
> - qemu should be able to load ebpf and use the maps when it runs from
> libvirt (without special caps) and standalone (with caps)

This is solved by leaving the privileged operations to the helper with set-uid.

> - it is possible that there are different qemu builds on the machine,
> one of them might be installed, their ebpf's might be different and
> the interface between qemu and ebpf (exact content of maps and number
> of maps)

We can use different helpers in this way.

> - qemu configures the RSS dynamically according to the commands
> provided by the guest

Consider we decided to use mmap() based maps, this is not an issue.

Or am I missing something?

Thanks

>
> Thanks in advance
> Yuri
>
> >
> > Thanks
> >
> >
> > >
> > >>> Also, libbpf dependency now exclusively for Linux.
> > >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> > >>> There is no reason yet to build eBPF loader and helper for non Linux systems,
> > >>> even if libbpf is present.
> > >>>
> > >>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > >>> ---
> > >>>    ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
> > >>>    meson.build                 |  37 ++++++----
> > >>>    2 files changed, 154 insertions(+), 13 deletions(-)
> > >>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> > >>>
> > >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > >>> new file mode 100644
> > >>> index 0000000000..fe68758f57
> > >>> --- /dev/null
> > >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> > >>> @@ -0,0 +1,130 @@
> > >>> +/*
> > >>> + * eBPF RSS Helper
> > >>> + *
> > >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > >>> + *
> > >>> + * Authors:
> > >>> + *  Andrew Melnychenko <andrew@daynix.com>
> > >>> + *
> > >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > >>> + * the COPYING file in the top-level directory.
> > >>> + *
> > >>> + * Description: This is helper program for libvirtd.
> > >>> + *              It loads eBPF RSS program and passes fds through unix socket.
> > >>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> > >>> + */
> > >>> +
> > >>> +#include <stdio.h>
> > >>> +#include <stdint.h>
> > >>> +#include <stdlib.h>
> > >>> +#include <stdbool.h>
> > >>> +#include <getopt.h>
> > >>> +#include <memory.h>
> > >>> +#include <errno.h>
> > >>> +#include <sys/socket.h>
> > >>> +
> > >>> +#include "ebpf_rss.h"
> > >>> +
> > >>> +#include "qemu-helper-stamp.h"
> > >>> +
> > >>> +void QEMU_HELPER_STAMP(void) {}
> > >>> +
> > >>> +static int send_fds(int socket, int *fds, int n)
> > >>> +{
> > >>> +    struct msghdr msg = {};
> > >>> +    struct cmsghdr *cmsg = NULL;
> > >>> +    char buf[CMSG_SPACE(n * sizeof(int))];
> > >>> +    char dummy_buffer = 0;
> > >>> +    struct iovec io = { .iov_base = &dummy_buffer,
> > >>> +                        .iov_len = sizeof(dummy_buffer) };
> > >>> +
> > >>> +    memset(buf, 0, sizeof(buf));
> > >>> +
> > >>> +    msg.msg_iov = &io;
> > >>> +    msg.msg_iovlen = 1;
> > >>> +    msg.msg_control = buf;
> > >>> +    msg.msg_controllen = sizeof(buf);
> > >>> +
> > >>> +    cmsg = CMSG_FIRSTHDR(&msg);
> > >>> +    cmsg->cmsg_level = SOL_SOCKET;
> > >>> +    cmsg->cmsg_type = SCM_RIGHTS;
> > >>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> > >>> +
> > >>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> > >>> +
> > >>> +    return sendmsg(socket, &msg, 0);
> > >>> +}
> > >>> +
> > >>> +static void print_help_and_exit(const char *prog, int exitcode)
> > >>> +{
> > >>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> > >>> +            " through unix socket.\n", prog);
> > >>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
> > >>> +            " used to pass eBPF fds.\n");
> > >>> +    fprintf(stderr, "\t--help, -h - this help.\n");
> > >>> +    exit(exitcode);
> > >>> +}
> > >>> +
> > >>> +int main(int argc, char **argv)
> > >>> +{
> > >>> +    char *fd_string = NULL;
> > >>> +    int unix_fd = 0;
> > >>> +    struct EBPFRSSContext ctx = {};
> > >>> +    int fds[EBPF_RSS_MAX_FDS] = {};
> > >>> +    int ret = -1;
> > >>> +
> > >>> +    for (;;) {
> > >>> +        int c;
> > >>> +        static struct option long_options[] = {
> > >>> +                {"help",  no_argument, 0, 'h'},
> > >>> +                {"fd",  required_argument, 0, 'f'},
> > >>> +                {0, 0, 0, 0}
> > >>> +        };
> > >>> +        c = getopt_long(argc, argv, "hf:",
> > >>> +                long_options, NULL);
> > >>> +
> > >>> +        if (c == -1) {
> > >>> +            break;
> > >>> +        }
> > >>> +
> > >>> +        switch (c) {
> > >>> +        case 'f':
> > >>> +            fd_string = optarg;
> > >>> +            break;
> > >>> +        case 'h':
> > >>> +        default:
> > >>> +            print_help_and_exit(argv[0],
> > >>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> > >>> +        }
> > >>> +    }
> > >>> +
> > >>> +    if (!fd_string) {
> > >>> +        fprintf(stderr, "Unix file descriptor not present.\n");
> > >>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
> > >>> +    }
> > >>> +
> > >>> +    unix_fd = atoi(fd_string);
> > >>> +
> > >>> +    if (!unix_fd) {
> > >>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> > >>> +        return EXIT_FAILURE;
> > >>> +    }
> > >>> +
> > >>> +    ebpf_rss_init(&ctx);
> > >>> +    if (!ebpf_rss_load(&ctx)) {
> > >>> +        fprintf(stderr, "Can't load ebpf.\n");
> > >>> +        return EXIT_FAILURE;
> > >>> +    }
> > >>> +    fds[0] = ctx.program_fd;
> > >>> +    fds[1] = ctx.map_configuration;
> > >>> +
> > >>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> > >>> +    if (ret < 0) {
> > >>> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
> > >>> +    }
> > >>> +
> > >>> +    ebpf_rss_unload(&ctx);
> > >>> +
> > >>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> > >>> +}
> > >>> +
> > >>> diff --git a/meson.build b/meson.build
> > >>> index 257e51d91b..913aa1fee5 100644
> > >>> --- a/meson.build
> > >>> +++ b/meson.build
> > >>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> > >>>    endif
> > >>>
> > >>>    # libbpf
> > >>> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > >>> -if libbpf.found() and not cc.links('''
> > >>> -   #include <bpf/libbpf.h>
> > >>> -   int main(void)
> > >>> -   {
> > >>> -     bpf_object__destroy_skeleton(NULL);
> > >>> -     return 0;
> > >>> -   }''', dependencies: libbpf)
> > >>> -  libbpf = not_found
> > >>> -  if get_option('bpf').enabled()
> > >>> -    error('libbpf skeleton test failed')
> > >>> -  else
> > >>> -    warning('libbpf skeleton test failed, disabling')
> > >>> +libbpf = not_found
> > >>> +if targetos == 'linux'
> > >>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > >>> +  if libbpf.found() and not cc.links('''
> > >>> +    #include <bpf/libbpf.h>
> > >>> +    int main(void)
> > >>> +    {
> > >>> +      bpf_object__destroy_skeleton(NULL);
> > >>
> > >> Do we need to test whether the bpf can do mmap() here?
> > >>
> > >> Thanks
> > >>
> > >>
> > >>> +      return 0;
> > >>> +    }''', dependencies: libbpf)
> > >>> +    libbpf = not_found
> > >>> +    if get_option('bpf').enabled()
> > >>> +      error('libbpf skeleton test failed')
> > >>> +    else
> > >>> +      warning('libbpf skeleton test failed, disabling')
> > >>> +    endif
> > >>>      endif
> > >>>    endif
> > >>>
> > >>> @@ -2423,6 +2426,14 @@ if have_tools
> > >>>                   dependencies: [authz, crypto, io, qom, qemuutil,
> > >>>                                  libcap_ng, mpathpersist],
> > >>>                   install: true)
> > >>> +
> > >>> +    if libbpf.found()
> > >>> +        executable('qemu-ebpf-rss-helper', files(
> > >>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> > >>> +                   dependencies: [qemuutil, libbpf, glib],
> > >>> +                   install: true,
> > >>> +                   install_dir: get_option('libexecdir'))
> > >>> +    endif
> > >>>      endif
> > >>>
> > >>>      if 'CONFIG_IVSHMEM' in config_host
> >
>
Yuri Benditovich Sept. 9, 2021, midnight UTC | #10
On Wed, Sep 8, 2021 at 6:45 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
> <yuri.benditovich@daynix.com> wrote:
> >
> > On Wed, Sep 1, 2021 at 9:42 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > >
> > > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >>
> > > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > > >>> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
> > > >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> > > >>
> > > >> I wonder if this can be done as helper for TAP/bridge.
> > > >>
> > > >> E.g it's the qemu to launch those helper with set-uid.
> > > >>
> > > >> Then libvirt won't even need to care about that?
> > > >>
> > > > There are pros and cons for such a solution with set-uid.
> > > >  From my point of view one of the cons is that set-uid is efficient
> > > > only at install time so the coexistence of different qemu builds (and
> > > > different helpers for each one) is kind of problematic.
> > > > With the current solution this does not present any problem: the
> > > > developer can have several different builds, each one automatically
> > > > has its own helper and there is no conflict between these builds and
> > > > between these builds and installed qemu package. Changing the
> > > > 'emulator' in the libvirt profile automatically brings the proper
> > > > helper to work.
> > >
> > >
> > > I'm not sure I get you here. We can still have default/sample helper to
> > > make sure it works for different builds.
> > >
> > > If we can avoid the involvement of libvirt, that would be better.
> >
> > Hi Jason,
> >
> > Indeed I did not get the idea, can you please explain it in more
> > details (as detailed as possible to avoid future misunderstanding),
> > especially how exactly we can use the set-uid and what is the 'default' helper.
> > We also would prefer to do everything from qemu but we do not see how
> > we can do that.
>
>
Some more questions to understand the idea better:
> Something like:
>
> 1) -netdev tap,rss_helper=/path/to/name

So, on each editing of 'emulator' in the xml  the helper path should
be set manually or be default?

> 2) having a sample/default helper implemented in Qemu

Does it mean the default helper is the code in the qemu (without
running additional executable, like it does today) or this is qemu
itself with dedicated command line?
As far as I remember Daniel had strong objections of ever running qemu
with capabilities

> 3) we can introduce something special path like "default", then if
> -netdev tap,rss_helper="default" is specified, qemu will use the
> sample helper

Probably this is not so important but the rss helper and rss in
general has no relation to netdev, much more they are related to
virtio-net

>
> So we have:
> 1) set set-uid for the helper
Who and when does set-uid to the helper binary? Only installer or
libvirt can do that, correct?

> 2) libvirt may just choose to launch the default helper
All this discussion is to avoid launching the helper from libvirt, correct?

>
> >
> > Our main points (what should be addressed):
> > - qemu should be able to load ebpf and use the maps when it runs from
> > libvirt (without special caps) and standalone (with caps)
>
> This is solved by leaving the privileged operations to the helper with set-uid.
>
> > - it is possible that there are different qemu builds on the machine,
> > one of them might be installed, their ebpf's might be different and
> > the interface between qemu and ebpf (exact content of maps and number
> > of maps)
>
> We can use different helpers in this way.
>
> > - qemu configures the RSS dynamically according to the commands
> > provided by the guest
>
> Consider we decided to use mmap() based maps, this is not an issue.
>
> Or am I missing something?
>
> Thanks
>
> >
> > Thanks in advance
> > Yuri
> >
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >>> Also, libbpf dependency now exclusively for Linux.
> > > >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> > > >>> There is no reason yet to build eBPF loader and helper for non Linux systems,
> > > >>> even if libbpf is present.
> > > >>>
> > > >>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > >>> ---
> > > >>>    ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
> > > >>>    meson.build                 |  37 ++++++----
> > > >>>    2 files changed, 154 insertions(+), 13 deletions(-)
> > > >>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> > > >>>
> > > >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > > >>> new file mode 100644
> > > >>> index 0000000000..fe68758f57
> > > >>> --- /dev/null
> > > >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> > > >>> @@ -0,0 +1,130 @@
> > > >>> +/*
> > > >>> + * eBPF RSS Helper
> > > >>> + *
> > > >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > > >>> + *
> > > >>> + * Authors:
> > > >>> + *  Andrew Melnychenko <andrew@daynix.com>
> > > >>> + *
> > > >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > > >>> + * the COPYING file in the top-level directory.
> > > >>> + *
> > > >>> + * Description: This is helper program for libvirtd.
> > > >>> + *              It loads eBPF RSS program and passes fds through unix socket.
> > > >>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> > > >>> + */
> > > >>> +
> > > >>> +#include <stdio.h>
> > > >>> +#include <stdint.h>
> > > >>> +#include <stdlib.h>
> > > >>> +#include <stdbool.h>
> > > >>> +#include <getopt.h>
> > > >>> +#include <memory.h>
> > > >>> +#include <errno.h>
> > > >>> +#include <sys/socket.h>
> > > >>> +
> > > >>> +#include "ebpf_rss.h"
> > > >>> +
> > > >>> +#include "qemu-helper-stamp.h"
> > > >>> +
> > > >>> +void QEMU_HELPER_STAMP(void) {}
> > > >>> +
> > > >>> +static int send_fds(int socket, int *fds, int n)
> > > >>> +{
> > > >>> +    struct msghdr msg = {};
> > > >>> +    struct cmsghdr *cmsg = NULL;
> > > >>> +    char buf[CMSG_SPACE(n * sizeof(int))];
> > > >>> +    char dummy_buffer = 0;
> > > >>> +    struct iovec io = { .iov_base = &dummy_buffer,
> > > >>> +                        .iov_len = sizeof(dummy_buffer) };
> > > >>> +
> > > >>> +    memset(buf, 0, sizeof(buf));
> > > >>> +
> > > >>> +    msg.msg_iov = &io;
> > > >>> +    msg.msg_iovlen = 1;
> > > >>> +    msg.msg_control = buf;
> > > >>> +    msg.msg_controllen = sizeof(buf);
> > > >>> +
> > > >>> +    cmsg = CMSG_FIRSTHDR(&msg);
> > > >>> +    cmsg->cmsg_level = SOL_SOCKET;
> > > >>> +    cmsg->cmsg_type = SCM_RIGHTS;
> > > >>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> > > >>> +
> > > >>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> > > >>> +
> > > >>> +    return sendmsg(socket, &msg, 0);
> > > >>> +}
> > > >>> +
> > > >>> +static void print_help_and_exit(const char *prog, int exitcode)
> > > >>> +{
> > > >>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> > > >>> +            " through unix socket.\n", prog);
> > > >>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
> > > >>> +            " used to pass eBPF fds.\n");
> > > >>> +    fprintf(stderr, "\t--help, -h - this help.\n");
> > > >>> +    exit(exitcode);
> > > >>> +}
> > > >>> +
> > > >>> +int main(int argc, char **argv)
> > > >>> +{
> > > >>> +    char *fd_string = NULL;
> > > >>> +    int unix_fd = 0;
> > > >>> +    struct EBPFRSSContext ctx = {};
> > > >>> +    int fds[EBPF_RSS_MAX_FDS] = {};
> > > >>> +    int ret = -1;
> > > >>> +
> > > >>> +    for (;;) {
> > > >>> +        int c;
> > > >>> +        static struct option long_options[] = {
> > > >>> +                {"help",  no_argument, 0, 'h'},
> > > >>> +                {"fd",  required_argument, 0, 'f'},
> > > >>> +                {0, 0, 0, 0}
> > > >>> +        };
> > > >>> +        c = getopt_long(argc, argv, "hf:",
> > > >>> +                long_options, NULL);
> > > >>> +
> > > >>> +        if (c == -1) {
> > > >>> +            break;
> > > >>> +        }
> > > >>> +
> > > >>> +        switch (c) {
> > > >>> +        case 'f':
> > > >>> +            fd_string = optarg;
> > > >>> +            break;
> > > >>> +        case 'h':
> > > >>> +        default:
> > > >>> +            print_help_and_exit(argv[0],
> > > >>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> > > >>> +        }
> > > >>> +    }
> > > >>> +
> > > >>> +    if (!fd_string) {
> > > >>> +        fprintf(stderr, "Unix file descriptor not present.\n");
> > > >>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
> > > >>> +    }
> > > >>> +
> > > >>> +    unix_fd = atoi(fd_string);
> > > >>> +
> > > >>> +    if (!unix_fd) {
> > > >>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> > > >>> +        return EXIT_FAILURE;
> > > >>> +    }
> > > >>> +
> > > >>> +    ebpf_rss_init(&ctx);
> > > >>> +    if (!ebpf_rss_load(&ctx)) {
> > > >>> +        fprintf(stderr, "Can't load ebpf.\n");
> > > >>> +        return EXIT_FAILURE;
> > > >>> +    }
> > > >>> +    fds[0] = ctx.program_fd;
> > > >>> +    fds[1] = ctx.map_configuration;
> > > >>> +
> > > >>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> > > >>> +    if (ret < 0) {
> > > >>> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
> > > >>> +    }
> > > >>> +
> > > >>> +    ebpf_rss_unload(&ctx);
> > > >>> +
> > > >>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> > > >>> +}
> > > >>> +
> > > >>> diff --git a/meson.build b/meson.build
> > > >>> index 257e51d91b..913aa1fee5 100644
> > > >>> --- a/meson.build
> > > >>> +++ b/meson.build
> > > >>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> > > >>>    endif
> > > >>>
> > > >>>    # libbpf
> > > >>> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > > >>> -if libbpf.found() and not cc.links('''
> > > >>> -   #include <bpf/libbpf.h>
> > > >>> -   int main(void)
> > > >>> -   {
> > > >>> -     bpf_object__destroy_skeleton(NULL);
> > > >>> -     return 0;
> > > >>> -   }''', dependencies: libbpf)
> > > >>> -  libbpf = not_found
> > > >>> -  if get_option('bpf').enabled()
> > > >>> -    error('libbpf skeleton test failed')
> > > >>> -  else
> > > >>> -    warning('libbpf skeleton test failed, disabling')
> > > >>> +libbpf = not_found
> > > >>> +if targetos == 'linux'
> > > >>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > > >>> +  if libbpf.found() and not cc.links('''
> > > >>> +    #include <bpf/libbpf.h>
> > > >>> +    int main(void)
> > > >>> +    {
> > > >>> +      bpf_object__destroy_skeleton(NULL);
> > > >>
> > > >> Do we need to test whether the bpf can do mmap() here?
> > > >>
> > > >> Thanks
> > > >>
> > > >>
> > > >>> +      return 0;
> > > >>> +    }''', dependencies: libbpf)
> > > >>> +    libbpf = not_found
> > > >>> +    if get_option('bpf').enabled()
> > > >>> +      error('libbpf skeleton test failed')
> > > >>> +    else
> > > >>> +      warning('libbpf skeleton test failed, disabling')
> > > >>> +    endif
> > > >>>      endif
> > > >>>    endif
> > > >>>
> > > >>> @@ -2423,6 +2426,14 @@ if have_tools
> > > >>>                   dependencies: [authz, crypto, io, qom, qemuutil,
> > > >>>                                  libcap_ng, mpathpersist],
> > > >>>                   install: true)
> > > >>> +
> > > >>> +    if libbpf.found()
> > > >>> +        executable('qemu-ebpf-rss-helper', files(
> > > >>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> > > >>> +                   dependencies: [qemuutil, libbpf, glib],
> > > >>> +                   install: true,
> > > >>> +                   install_dir: get_option('libexecdir'))
> > > >>> +    endif
> > > >>>      endif
> > > >>>
> > > >>>      if 'CONFIG_IVSHMEM' in config_host
> > >
> >
>
Jason Wang Sept. 9, 2021, 1:16 a.m. UTC | #11
On Thu, Sep 9, 2021 at 8:00 AM Yuri Benditovich
<yuri.benditovich@daynix.com> wrote:
>
> On Wed, Sep 8, 2021 at 6:45 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
> > <yuri.benditovich@daynix.com> wrote:
> > >
> > > On Wed, Sep 1, 2021 at 9:42 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > >
> > > > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >>
> > > > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > > > >>> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
> > > > >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> > > > >>
> > > > >> I wonder if this can be done as helper for TAP/bridge.
> > > > >>
> > > > >> E.g it's the qemu to launch those helper with set-uid.
> > > > >>
> > > > >> Then libvirt won't even need to care about that?
> > > > >>
> > > > > There are pros and cons for such a solution with set-uid.
> > > > >  From my point of view one of the cons is that set-uid is efficient
> > > > > only at install time so the coexistence of different qemu builds (and
> > > > > different helpers for each one) is kind of problematic.
> > > > > With the current solution this does not present any problem: the
> > > > > developer can have several different builds, each one automatically
> > > > > has its own helper and there is no conflict between these builds and
> > > > > between these builds and installed qemu package. Changing the
> > > > > 'emulator' in the libvirt profile automatically brings the proper
> > > > > helper to work.
> > > >
> > > >
> > > > I'm not sure I get you here. We can still have default/sample helper to
> > > > make sure it works for different builds.
> > > >
> > > > If we can avoid the involvement of libvirt, that would be better.
> > >
> > > Hi Jason,
> > >
> > > Indeed I did not get the idea, can you please explain it in more
> > > details (as detailed as possible to avoid future misunderstanding),
> > > especially how exactly we can use the set-uid and what is the 'default' helper.
> > > We also would prefer to do everything from qemu but we do not see how
> > > we can do that.
> >
> >
> Some more questions to understand the idea better:
> > Something like:
> >
> > 1) -netdev tap,rss_helper=/path/to/name
>
> So, on each editing of 'emulator' in the xml  the helper path should
> be set manually or be default?

It could done manually, or we can have a default path.

>
> > 2) having a sample/default helper implemented in Qemu
>
> Does it mean the default helper is the code in the qemu (without
> running additional executable, like it does today)

Yes.

 or this is qemu
> itself with dedicated command line?
> As far as I remember Daniel had strong objections of ever running qemu
> with capabilities

Qemu won't run with capabilities but the helper.

>
> > 3) we can introduce something special path like "default", then if
> > -netdev tap,rss_helper="default" is specified, qemu will use the
> > sample helper
>
> Probably this is not so important but the rss helper and rss in
> general has no relation to netdev, much more they are related to
> virtio-net

So I think the reason for this is that we currently only support
eBPF/RSS for tap.

>
> >
> > So we have:
> > 1) set set-uid for the helper
> Who and when does set-uid to the helper binary? Only installer or
> libvirt can do that, correct?

Yes, it could be done the installer, or other system provision tools.

>
> > 2) libvirt may just choose to launch the default helper
> All this discussion is to avoid launching the helper from libvirt, correct?

Sorry, it's a typo. I meant, libvirt launch qemu, and then qemu will
launch the helper.

Thanks

>
> >
> > >
> > > Our main points (what should be addressed):
> > > - qemu should be able to load ebpf and use the maps when it runs from
> > > libvirt (without special caps) and standalone (with caps)
> >
> > This is solved by leaving the privileged operations to the helper with set-uid.
> >
> > > - it is possible that there are different qemu builds on the machine,
> > > one of them might be installed, their ebpf's might be different and
> > > the interface between qemu and ebpf (exact content of maps and number
> > > of maps)
> >
> > We can use different helpers in this way.
> >
> > > - qemu configures the RSS dynamically according to the commands
> > > provided by the guest
> >
> > Consider we decided to use mmap() based maps, this is not an issue.
> >
> > Or am I missing something?
> >
> > Thanks
> >
> > >
> > > Thanks in advance
> > > Yuri
> > >
> > > >
> > > > Thanks
> > > >
> > > >
> > > > >
> > > > >>> Also, libbpf dependency now exclusively for Linux.
> > > > >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> > > > >>> There is no reason yet to build eBPF loader and helper for non Linux systems,
> > > > >>> even if libbpf is present.
> > > > >>>
> > > > >>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > > >>> ---
> > > > >>>    ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
> > > > >>>    meson.build                 |  37 ++++++----
> > > > >>>    2 files changed, 154 insertions(+), 13 deletions(-)
> > > > >>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> > > > >>>
> > > > >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > > > >>> new file mode 100644
> > > > >>> index 0000000000..fe68758f57
> > > > >>> --- /dev/null
> > > > >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> > > > >>> @@ -0,0 +1,130 @@
> > > > >>> +/*
> > > > >>> + * eBPF RSS Helper
> > > > >>> + *
> > > > >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > > > >>> + *
> > > > >>> + * Authors:
> > > > >>> + *  Andrew Melnychenko <andrew@daynix.com>
> > > > >>> + *
> > > > >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > > > >>> + * the COPYING file in the top-level directory.
> > > > >>> + *
> > > > >>> + * Description: This is helper program for libvirtd.
> > > > >>> + *              It loads eBPF RSS program and passes fds through unix socket.
> > > > >>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> > > > >>> + */
> > > > >>> +
> > > > >>> +#include <stdio.h>
> > > > >>> +#include <stdint.h>
> > > > >>> +#include <stdlib.h>
> > > > >>> +#include <stdbool.h>
> > > > >>> +#include <getopt.h>
> > > > >>> +#include <memory.h>
> > > > >>> +#include <errno.h>
> > > > >>> +#include <sys/socket.h>
> > > > >>> +
> > > > >>> +#include "ebpf_rss.h"
> > > > >>> +
> > > > >>> +#include "qemu-helper-stamp.h"
> > > > >>> +
> > > > >>> +void QEMU_HELPER_STAMP(void) {}
> > > > >>> +
> > > > >>> +static int send_fds(int socket, int *fds, int n)
> > > > >>> +{
> > > > >>> +    struct msghdr msg = {};
> > > > >>> +    struct cmsghdr *cmsg = NULL;
> > > > >>> +    char buf[CMSG_SPACE(n * sizeof(int))];
> > > > >>> +    char dummy_buffer = 0;
> > > > >>> +    struct iovec io = { .iov_base = &dummy_buffer,
> > > > >>> +                        .iov_len = sizeof(dummy_buffer) };
> > > > >>> +
> > > > >>> +    memset(buf, 0, sizeof(buf));
> > > > >>> +
> > > > >>> +    msg.msg_iov = &io;
> > > > >>> +    msg.msg_iovlen = 1;
> > > > >>> +    msg.msg_control = buf;
> > > > >>> +    msg.msg_controllen = sizeof(buf);
> > > > >>> +
> > > > >>> +    cmsg = CMSG_FIRSTHDR(&msg);
> > > > >>> +    cmsg->cmsg_level = SOL_SOCKET;
> > > > >>> +    cmsg->cmsg_type = SCM_RIGHTS;
> > > > >>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> > > > >>> +
> > > > >>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> > > > >>> +
> > > > >>> +    return sendmsg(socket, &msg, 0);
> > > > >>> +}
> > > > >>> +
> > > > >>> +static void print_help_and_exit(const char *prog, int exitcode)
> > > > >>> +{
> > > > >>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> > > > >>> +            " through unix socket.\n", prog);
> > > > >>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
> > > > >>> +            " used to pass eBPF fds.\n");
> > > > >>> +    fprintf(stderr, "\t--help, -h - this help.\n");
> > > > >>> +    exit(exitcode);
> > > > >>> +}
> > > > >>> +
> > > > >>> +int main(int argc, char **argv)
> > > > >>> +{
> > > > >>> +    char *fd_string = NULL;
> > > > >>> +    int unix_fd = 0;
> > > > >>> +    struct EBPFRSSContext ctx = {};
> > > > >>> +    int fds[EBPF_RSS_MAX_FDS] = {};
> > > > >>> +    int ret = -1;
> > > > >>> +
> > > > >>> +    for (;;) {
> > > > >>> +        int c;
> > > > >>> +        static struct option long_options[] = {
> > > > >>> +                {"help",  no_argument, 0, 'h'},
> > > > >>> +                {"fd",  required_argument, 0, 'f'},
> > > > >>> +                {0, 0, 0, 0}
> > > > >>> +        };
> > > > >>> +        c = getopt_long(argc, argv, "hf:",
> > > > >>> +                long_options, NULL);
> > > > >>> +
> > > > >>> +        if (c == -1) {
> > > > >>> +            break;
> > > > >>> +        }
> > > > >>> +
> > > > >>> +        switch (c) {
> > > > >>> +        case 'f':
> > > > >>> +            fd_string = optarg;
> > > > >>> +            break;
> > > > >>> +        case 'h':
> > > > >>> +        default:
> > > > >>> +            print_help_and_exit(argv[0],
> > > > >>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> > > > >>> +        }
> > > > >>> +    }
> > > > >>> +
> > > > >>> +    if (!fd_string) {
> > > > >>> +        fprintf(stderr, "Unix file descriptor not present.\n");
> > > > >>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
> > > > >>> +    }
> > > > >>> +
> > > > >>> +    unix_fd = atoi(fd_string);
> > > > >>> +
> > > > >>> +    if (!unix_fd) {
> > > > >>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> > > > >>> +        return EXIT_FAILURE;
> > > > >>> +    }
> > > > >>> +
> > > > >>> +    ebpf_rss_init(&ctx);
> > > > >>> +    if (!ebpf_rss_load(&ctx)) {
> > > > >>> +        fprintf(stderr, "Can't load ebpf.\n");
> > > > >>> +        return EXIT_FAILURE;
> > > > >>> +    }
> > > > >>> +    fds[0] = ctx.program_fd;
> > > > >>> +    fds[1] = ctx.map_configuration;
> > > > >>> +
> > > > >>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> > > > >>> +    if (ret < 0) {
> > > > >>> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
> > > > >>> +    }
> > > > >>> +
> > > > >>> +    ebpf_rss_unload(&ctx);
> > > > >>> +
> > > > >>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> > > > >>> +}
> > > > >>> +
> > > > >>> diff --git a/meson.build b/meson.build
> > > > >>> index 257e51d91b..913aa1fee5 100644
> > > > >>> --- a/meson.build
> > > > >>> +++ b/meson.build
> > > > >>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> > > > >>>    endif
> > > > >>>
> > > > >>>    # libbpf
> > > > >>> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > > > >>> -if libbpf.found() and not cc.links('''
> > > > >>> -   #include <bpf/libbpf.h>
> > > > >>> -   int main(void)
> > > > >>> -   {
> > > > >>> -     bpf_object__destroy_skeleton(NULL);
> > > > >>> -     return 0;
> > > > >>> -   }''', dependencies: libbpf)
> > > > >>> -  libbpf = not_found
> > > > >>> -  if get_option('bpf').enabled()
> > > > >>> -    error('libbpf skeleton test failed')
> > > > >>> -  else
> > > > >>> -    warning('libbpf skeleton test failed, disabling')
> > > > >>> +libbpf = not_found
> > > > >>> +if targetos == 'linux'
> > > > >>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > > > >>> +  if libbpf.found() and not cc.links('''
> > > > >>> +    #include <bpf/libbpf.h>
> > > > >>> +    int main(void)
> > > > >>> +    {
> > > > >>> +      bpf_object__destroy_skeleton(NULL);
> > > > >>
> > > > >> Do we need to test whether the bpf can do mmap() here?
> > > > >>
> > > > >> Thanks
> > > > >>
> > > > >>
> > > > >>> +      return 0;
> > > > >>> +    }''', dependencies: libbpf)
> > > > >>> +    libbpf = not_found
> > > > >>> +    if get_option('bpf').enabled()
> > > > >>> +      error('libbpf skeleton test failed')
> > > > >>> +    else
> > > > >>> +      warning('libbpf skeleton test failed, disabling')
> > > > >>> +    endif
> > > > >>>      endif
> > > > >>>    endif
> > > > >>>
> > > > >>> @@ -2423,6 +2426,14 @@ if have_tools
> > > > >>>                   dependencies: [authz, crypto, io, qom, qemuutil,
> > > > >>>                                  libcap_ng, mpathpersist],
> > > > >>>                   install: true)
> > > > >>> +
> > > > >>> +    if libbpf.found()
> > > > >>> +        executable('qemu-ebpf-rss-helper', files(
> > > > >>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> > > > >>> +                   dependencies: [qemuutil, libbpf, glib],
> > > > >>> +                   install: true,
> > > > >>> +                   install_dir: get_option('libexecdir'))
> > > > >>> +    endif
> > > > >>>      endif
> > > > >>>
> > > > >>>      if 'CONFIG_IVSHMEM' in config_host
> > > >
> > >
> >
>
Yuri Benditovich Sept. 9, 2021, 11:43 p.m. UTC | #12
On Thu, Sep 9, 2021 at 4:16 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Sep 9, 2021 at 8:00 AM Yuri Benditovich
> <yuri.benditovich@daynix.com> wrote:
> >
> > On Wed, Sep 8, 2021 at 6:45 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
> > > <yuri.benditovich@daynix.com> wrote:
> > > >
> > > > On Wed, Sep 1, 2021 at 9:42 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > >
> > > > > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > > > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >>
> > > > > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > > > > >>> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
> > > > > >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> > > > > >>
> > > > > >> I wonder if this can be done as helper for TAP/bridge.
> > > > > >>
> > > > > >> E.g it's the qemu to launch those helper with set-uid.
> > > > > >>
> > > > > >> Then libvirt won't even need to care about that?
> > > > > >>
> > > > > > There are pros and cons for such a solution with set-uid.
> > > > > >  From my point of view one of the cons is that set-uid is efficient
> > > > > > only at install time so the coexistence of different qemu builds (and
> > > > > > different helpers for each one) is kind of problematic.
> > > > > > With the current solution this does not present any problem: the
> > > > > > developer can have several different builds, each one automatically
> > > > > > has its own helper and there is no conflict between these builds and
> > > > > > between these builds and installed qemu package. Changing the
> > > > > > 'emulator' in the libvirt profile automatically brings the proper
> > > > > > helper to work.
> > > > >
> > > > >
> > > > > I'm not sure I get you here. We can still have default/sample helper to
> > > > > make sure it works for different builds.
> > > > >
> > > > > If we can avoid the involvement of libvirt, that would be better.
> > > >
> > > > Hi Jason,
> > > >
> > > > Indeed I did not get the idea, can you please explain it in more
> > > > details (as detailed as possible to avoid future misunderstanding),
> > > > especially how exactly we can use the set-uid and what is the 'default' helper.
> > > > We also would prefer to do everything from qemu but we do not see how
> > > > we can do that.
> > >
> > >
> > Some more questions to understand the idea better:
> > > Something like:
> > >
> > > 1) -netdev tap,rss_helper=/path/to/name
> >
> > So, on each editing of 'emulator' in the xml  the helper path should
> > be set manually or be default?
>
> It could done manually, or we can have a default path.
>
> >
> > > 2) having a sample/default helper implemented in Qemu
> >
> > Does it mean the default helper is the code in the qemu (without
> > running additional executable, like it does today)
>
> Yes.
If the "default helper" is just a keyword and it is like what we have
today (i.e. part of qemu) it can't work under libvirt and should never
be used by libvirt.
>
>  or this is qemu
> > itself with dedicated command line?
> > As far as I remember Daniel had strong objections of ever running qemu
> > with capabilities
>
> Qemu won't run with capabilities but the helper.
So under libvirt the helper is always separate executable and not
"default helper"

>
> >
> > > 3) we can introduce something special path like "default", then if
> > > -netdev tap,rss_helper="default" is specified, qemu will use the
> > > sample helper
> >
> > Probably this is not so important but the rss helper and rss in
> > general has no relation to netdev, much more they are related to
> > virtio-net
>
> So I think the reason for this is that we currently only support
> eBPF/RSS for tap.

This is just because only tap supports respective ioctls.

>
> >
> > >
> > > So we have:
> > > 1) set set-uid for the helper
> > Who and when does set-uid to the helper binary? Only installer or
> > libvirt can do that, correct?
>
> Yes, it could be done the installer, or other system provision tools.

So this changes the rule of the game: currently libvirt runs helpers
that require privileges and qemu runs helpers that do not require any
privileges (like TPM).
If we follow your suggestion - only for RSS we will create the helper
that must be used with set-uid.
Who are stakeholders we need to have a consensus with?

>
> >
> > > 2) libvirt may just choose to launch the default helper
> > All this discussion is to avoid launching the helper from libvirt, correct?
>
> Sorry, it's a typo. I meant, libvirt launch qemu, and then qemu will
> launch the helper.
>
> Thanks
>
> >
> > >
> > > >
> > > > Our main points (what should be addressed):
> > > > - qemu should be able to load ebpf and use the maps when it runs from
> > > > libvirt (without special caps) and standalone (with caps)
> > >
> > > This is solved by leaving the privileged operations to the helper with set-uid.
> > >
> > > > - it is possible that there are different qemu builds on the machine,
> > > > one of them might be installed, their ebpf's might be different and
> > > > the interface between qemu and ebpf (exact content of maps and number
> > > > of maps)
> > >
> > > We can use different helpers in this way.
> > >
> > > > - qemu configures the RSS dynamically according to the commands
> > > > provided by the guest
> > >
> > > Consider we decided to use mmap() based maps, this is not an issue.
> > >
> > > Or am I missing something?
> > >
> > > Thanks
> > >
> > > >
> > > > Thanks in advance
> > > > Yuri
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > >
> > > > > >>> Also, libbpf dependency now exclusively for Linux.
> > > > > >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> > > > > >>> There is no reason yet to build eBPF loader and helper for non Linux systems,
> > > > > >>> even if libbpf is present.
> > > > > >>>
> > > > > >>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > > > >>> ---
> > > > > >>>    ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
> > > > > >>>    meson.build                 |  37 ++++++----
> > > > > >>>    2 files changed, 154 insertions(+), 13 deletions(-)
> > > > > >>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> > > > > >>>
> > > > > >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > > > > >>> new file mode 100644
> > > > > >>> index 0000000000..fe68758f57
> > > > > >>> --- /dev/null
> > > > > >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> > > > > >>> @@ -0,0 +1,130 @@
> > > > > >>> +/*
> > > > > >>> + * eBPF RSS Helper
> > > > > >>> + *
> > > > > >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > > > > >>> + *
> > > > > >>> + * Authors:
> > > > > >>> + *  Andrew Melnychenko <andrew@daynix.com>
> > > > > >>> + *
> > > > > >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > > > > >>> + * the COPYING file in the top-level directory.
> > > > > >>> + *
> > > > > >>> + * Description: This is helper program for libvirtd.
> > > > > >>> + *              It loads eBPF RSS program and passes fds through unix socket.
> > > > > >>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> > > > > >>> + */
> > > > > >>> +
> > > > > >>> +#include <stdio.h>
> > > > > >>> +#include <stdint.h>
> > > > > >>> +#include <stdlib.h>
> > > > > >>> +#include <stdbool.h>
> > > > > >>> +#include <getopt.h>
> > > > > >>> +#include <memory.h>
> > > > > >>> +#include <errno.h>
> > > > > >>> +#include <sys/socket.h>
> > > > > >>> +
> > > > > >>> +#include "ebpf_rss.h"
> > > > > >>> +
> > > > > >>> +#include "qemu-helper-stamp.h"
> > > > > >>> +
> > > > > >>> +void QEMU_HELPER_STAMP(void) {}
> > > > > >>> +
> > > > > >>> +static int send_fds(int socket, int *fds, int n)
> > > > > >>> +{
> > > > > >>> +    struct msghdr msg = {};
> > > > > >>> +    struct cmsghdr *cmsg = NULL;
> > > > > >>> +    char buf[CMSG_SPACE(n * sizeof(int))];
> > > > > >>> +    char dummy_buffer = 0;
> > > > > >>> +    struct iovec io = { .iov_base = &dummy_buffer,
> > > > > >>> +                        .iov_len = sizeof(dummy_buffer) };
> > > > > >>> +
> > > > > >>> +    memset(buf, 0, sizeof(buf));
> > > > > >>> +
> > > > > >>> +    msg.msg_iov = &io;
> > > > > >>> +    msg.msg_iovlen = 1;
> > > > > >>> +    msg.msg_control = buf;
> > > > > >>> +    msg.msg_controllen = sizeof(buf);
> > > > > >>> +
> > > > > >>> +    cmsg = CMSG_FIRSTHDR(&msg);
> > > > > >>> +    cmsg->cmsg_level = SOL_SOCKET;
> > > > > >>> +    cmsg->cmsg_type = SCM_RIGHTS;
> > > > > >>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> > > > > >>> +
> > > > > >>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> > > > > >>> +
> > > > > >>> +    return sendmsg(socket, &msg, 0);
> > > > > >>> +}
> > > > > >>> +
> > > > > >>> +static void print_help_and_exit(const char *prog, int exitcode)
> > > > > >>> +{
> > > > > >>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> > > > > >>> +            " through unix socket.\n", prog);
> > > > > >>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
> > > > > >>> +            " used to pass eBPF fds.\n");
> > > > > >>> +    fprintf(stderr, "\t--help, -h - this help.\n");
> > > > > >>> +    exit(exitcode);
> > > > > >>> +}
> > > > > >>> +
> > > > > >>> +int main(int argc, char **argv)
> > > > > >>> +{
> > > > > >>> +    char *fd_string = NULL;
> > > > > >>> +    int unix_fd = 0;
> > > > > >>> +    struct EBPFRSSContext ctx = {};
> > > > > >>> +    int fds[EBPF_RSS_MAX_FDS] = {};
> > > > > >>> +    int ret = -1;
> > > > > >>> +
> > > > > >>> +    for (;;) {
> > > > > >>> +        int c;
> > > > > >>> +        static struct option long_options[] = {
> > > > > >>> +                {"help",  no_argument, 0, 'h'},
> > > > > >>> +                {"fd",  required_argument, 0, 'f'},
> > > > > >>> +                {0, 0, 0, 0}
> > > > > >>> +        };
> > > > > >>> +        c = getopt_long(argc, argv, "hf:",
> > > > > >>> +                long_options, NULL);
> > > > > >>> +
> > > > > >>> +        if (c == -1) {
> > > > > >>> +            break;
> > > > > >>> +        }
> > > > > >>> +
> > > > > >>> +        switch (c) {
> > > > > >>> +        case 'f':
> > > > > >>> +            fd_string = optarg;
> > > > > >>> +            break;
> > > > > >>> +        case 'h':
> > > > > >>> +        default:
> > > > > >>> +            print_help_and_exit(argv[0],
> > > > > >>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> > > > > >>> +        }
> > > > > >>> +    }
> > > > > >>> +
> > > > > >>> +    if (!fd_string) {
> > > > > >>> +        fprintf(stderr, "Unix file descriptor not present.\n");
> > > > > >>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
> > > > > >>> +    }
> > > > > >>> +
> > > > > >>> +    unix_fd = atoi(fd_string);
> > > > > >>> +
> > > > > >>> +    if (!unix_fd) {
> > > > > >>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> > > > > >>> +        return EXIT_FAILURE;
> > > > > >>> +    }
> > > > > >>> +
> > > > > >>> +    ebpf_rss_init(&ctx);
> > > > > >>> +    if (!ebpf_rss_load(&ctx)) {
> > > > > >>> +        fprintf(stderr, "Can't load ebpf.\n");
> > > > > >>> +        return EXIT_FAILURE;
> > > > > >>> +    }
> > > > > >>> +    fds[0] = ctx.program_fd;
> > > > > >>> +    fds[1] = ctx.map_configuration;
> > > > > >>> +
> > > > > >>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> > > > > >>> +    if (ret < 0) {
> > > > > >>> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
> > > > > >>> +    }
> > > > > >>> +
> > > > > >>> +    ebpf_rss_unload(&ctx);
> > > > > >>> +
> > > > > >>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> > > > > >>> +}
> > > > > >>> +
> > > > > >>> diff --git a/meson.build b/meson.build
> > > > > >>> index 257e51d91b..913aa1fee5 100644
> > > > > >>> --- a/meson.build
> > > > > >>> +++ b/meson.build
> > > > > >>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> > > > > >>>    endif
> > > > > >>>
> > > > > >>>    # libbpf
> > > > > >>> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > > > > >>> -if libbpf.found() and not cc.links('''
> > > > > >>> -   #include <bpf/libbpf.h>
> > > > > >>> -   int main(void)
> > > > > >>> -   {
> > > > > >>> -     bpf_object__destroy_skeleton(NULL);
> > > > > >>> -     return 0;
> > > > > >>> -   }''', dependencies: libbpf)
> > > > > >>> -  libbpf = not_found
> > > > > >>> -  if get_option('bpf').enabled()
> > > > > >>> -    error('libbpf skeleton test failed')
> > > > > >>> -  else
> > > > > >>> -    warning('libbpf skeleton test failed, disabling')
> > > > > >>> +libbpf = not_found
> > > > > >>> +if targetos == 'linux'
> > > > > >>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > > > > >>> +  if libbpf.found() and not cc.links('''
> > > > > >>> +    #include <bpf/libbpf.h>
> > > > > >>> +    int main(void)
> > > > > >>> +    {
> > > > > >>> +      bpf_object__destroy_skeleton(NULL);
> > > > > >>
> > > > > >> Do we need to test whether the bpf can do mmap() here?
> > > > > >>
> > > > > >> Thanks
> > > > > >>
> > > > > >>
> > > > > >>> +      return 0;
> > > > > >>> +    }''', dependencies: libbpf)
> > > > > >>> +    libbpf = not_found
> > > > > >>> +    if get_option('bpf').enabled()
> > > > > >>> +      error('libbpf skeleton test failed')
> > > > > >>> +    else
> > > > > >>> +      warning('libbpf skeleton test failed, disabling')
> > > > > >>> +    endif
> > > > > >>>      endif
> > > > > >>>    endif
> > > > > >>>
> > > > > >>> @@ -2423,6 +2426,14 @@ if have_tools
> > > > > >>>                   dependencies: [authz, crypto, io, qom, qemuutil,
> > > > > >>>                                  libcap_ng, mpathpersist],
> > > > > >>>                   install: true)
> > > > > >>> +
> > > > > >>> +    if libbpf.found()
> > > > > >>> +        executable('qemu-ebpf-rss-helper', files(
> > > > > >>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> > > > > >>> +                   dependencies: [qemuutil, libbpf, glib],
> > > > > >>> +                   install: true,
> > > > > >>> +                   install_dir: get_option('libexecdir'))
> > > > > >>> +    endif
> > > > > >>>      endif
> > > > > >>>
> > > > > >>>      if 'CONFIG_IVSHMEM' in config_host
> > > > >
> > > >
> > >
> >
>
Jason Wang Sept. 10, 2021, 1:37 a.m. UTC | #13
On Fri, Sep 10, 2021 at 7:44 AM Yuri Benditovich
<yuri.benditovich@daynix.com> wrote:
>
> On Thu, Sep 9, 2021 at 4:16 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Sep 9, 2021 at 8:00 AM Yuri Benditovich
> > <yuri.benditovich@daynix.com> wrote:
> > >
> > > On Wed, Sep 8, 2021 at 6:45 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
> > > > <yuri.benditovich@daynix.com> wrote:
> > > > >
> > > > > On Wed, Sep 1, 2021 at 9:42 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > >
> > > > > > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > > > > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >>
> > > > > > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > > > > > >>> Helper program. Loads eBPF RSS program and maps and passes them through unix socket.
> > > > > > >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> > > > > > >>
> > > > > > >> I wonder if this can be done as helper for TAP/bridge.
> > > > > > >>
> > > > > > >> E.g it's the qemu to launch those helper with set-uid.
> > > > > > >>
> > > > > > >> Then libvirt won't even need to care about that?
> > > > > > >>
> > > > > > > There are pros and cons for such a solution with set-uid.
> > > > > > >  From my point of view one of the cons is that set-uid is efficient
> > > > > > > only at install time so the coexistence of different qemu builds (and
> > > > > > > different helpers for each one) is kind of problematic.
> > > > > > > With the current solution this does not present any problem: the
> > > > > > > developer can have several different builds, each one automatically
> > > > > > > has its own helper and there is no conflict between these builds and
> > > > > > > between these builds and installed qemu package. Changing the
> > > > > > > 'emulator' in the libvirt profile automatically brings the proper
> > > > > > > helper to work.
> > > > > >
> > > > > >
> > > > > > I'm not sure I get you here. We can still have default/sample helper to
> > > > > > make sure it works for different builds.
> > > > > >
> > > > > > If we can avoid the involvement of libvirt, that would be better.
> > > > >
> > > > > Hi Jason,
> > > > >
> > > > > Indeed I did not get the idea, can you please explain it in more
> > > > > details (as detailed as possible to avoid future misunderstanding),
> > > > > especially how exactly we can use the set-uid and what is the 'default' helper.
> > > > > We also would prefer to do everything from qemu but we do not see how
> > > > > we can do that.
> > > >
> > > >
> > > Some more questions to understand the idea better:
> > > > Something like:
> > > >
> > > > 1) -netdev tap,rss_helper=/path/to/name
> > >
> > > So, on each editing of 'emulator' in the xml  the helper path should
> > > be set manually or be default?
> >
> > It could done manually, or we can have a default path.
> >
> > >
> > > > 2) having a sample/default helper implemented in Qemu
> > >
> > > Does it mean the default helper is the code in the qemu (without
> > > running additional executable, like it does today)
> >
> > Yes.
> If the "default helper" is just a keyword and it is like what we have
> today (i.e. part of qemu)

Yes, it's something like "-netdev tap,script=no"

> it can't work under libvirt and should never
> be used by libvirt.

Any reason for this?

> >
> >  or this is qemu
> > > itself with dedicated command line?
> > > As far as I remember Daniel had strong objections of ever running qemu
> > > with capabilities
> >
> > Qemu won't run with capabilities but the helper.
> So under libvirt the helper is always separate executable and not
> "default helper"

That's fine, we can ship the helper as an independent package I think.

>
> >
> > >
> > > > 3) we can introduce something special path like "default", then if
> > > > -netdev tap,rss_helper="default" is specified, qemu will use the
> > > > sample helper
> > >
> > > Probably this is not so important but the rss helper and rss in
> > > general has no relation to netdev, much more they are related to
> > > virtio-net
> >
> > So I think the reason for this is that we currently only support
> > eBPF/RSS for tap.
>
> This is just because only tap supports respective ioctls.
>
> >
> > >
> > > >
> > > > So we have:
> > > > 1) set set-uid for the helper
> > > Who and when does set-uid to the helper binary? Only installer or
> > > libvirt can do that, correct?
> >
> > Yes, it could be done the installer, or other system provision tools.
>
> So this changes the rule of the game: currently libvirt runs helpers
> that require privileges and qemu runs helpers that do not require any
> privileges (like TPM).

I don't understand here, if the helper doesn't need any privilege, why
not do that in qemu itself?

> If we follow your suggestion - only for RSS we will create the helper
> that must be used with set-uid.
> Who are stakeholders we need to have a consensus with?

I think we need inputs from libvirt guys. Daniel, any idea on this?

Thanks

>
> >
> > >
> > > > 2) libvirt may just choose to launch the default helper
> > > All this discussion is to avoid launching the helper from libvirt, correct?
> >
> > Sorry, it's a typo. I meant, libvirt launch qemu, and then qemu will
> > launch the helper.
> >
> > Thanks
> >
> > >
> > > >
> > > > >
> > > > > Our main points (what should be addressed):
> > > > > - qemu should be able to load ebpf and use the maps when it runs from
> > > > > libvirt (without special caps) and standalone (with caps)
> > > >
> > > > This is solved by leaving the privileged operations to the helper with set-uid.
> > > >
> > > > > - it is possible that there are different qemu builds on the machine,
> > > > > one of them might be installed, their ebpf's might be different and
> > > > > the interface between qemu and ebpf (exact content of maps and number
> > > > > of maps)
> > > >
> > > > We can use different helpers in this way.
> > > >
> > > > > - qemu configures the RSS dynamically according to the commands
> > > > > provided by the guest
> > > >
> > > > Consider we decided to use mmap() based maps, this is not an issue.
> > > >
> > > > Or am I missing something?
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks in advance
> > > > > Yuri
> > > > >
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > >
> > > > > > >
> > > > > > >>> Also, libbpf dependency now exclusively for Linux.
> > > > > > >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> > > > > > >>> There is no reason yet to build eBPF loader and helper for non Linux systems,
> > > > > > >>> even if libbpf is present.
> > > > > > >>>
> > > > > > >>> Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
> > > > > > >>> ---
> > > > > > >>>    ebpf/qemu-ebpf-rss-helper.c | 130 ++++++++++++++++++++++++++++++++++++
> > > > > > >>>    meson.build                 |  37 ++++++----
> > > > > > >>>    2 files changed, 154 insertions(+), 13 deletions(-)
> > > > > > >>>    create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> > > > > > >>>
> > > > > > >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > > > > > >>> new file mode 100644
> > > > > > >>> index 0000000000..fe68758f57
> > > > > > >>> --- /dev/null
> > > > > > >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> > > > > > >>> @@ -0,0 +1,130 @@
> > > > > > >>> +/*
> > > > > > >>> + * eBPF RSS Helper
> > > > > > >>> + *
> > > > > > >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > > > > > >>> + *
> > > > > > >>> + * Authors:
> > > > > > >>> + *  Andrew Melnychenko <andrew@daynix.com>
> > > > > > >>> + *
> > > > > > >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > > > > > >>> + * the COPYING file in the top-level directory.
> > > > > > >>> + *
> > > > > > >>> + * Description: This is helper program for libvirtd.
> > > > > > >>> + *              It loads eBPF RSS program and passes fds through unix socket.
> > > > > > >>> + *              Built by meson, target - 'qemu-ebpf-rss-helper'.
> > > > > > >>> + */
> > > > > > >>> +
> > > > > > >>> +#include <stdio.h>
> > > > > > >>> +#include <stdint.h>
> > > > > > >>> +#include <stdlib.h>
> > > > > > >>> +#include <stdbool.h>
> > > > > > >>> +#include <getopt.h>
> > > > > > >>> +#include <memory.h>
> > > > > > >>> +#include <errno.h>
> > > > > > >>> +#include <sys/socket.h>
> > > > > > >>> +
> > > > > > >>> +#include "ebpf_rss.h"
> > > > > > >>> +
> > > > > > >>> +#include "qemu-helper-stamp.h"
> > > > > > >>> +
> > > > > > >>> +void QEMU_HELPER_STAMP(void) {}
> > > > > > >>> +
> > > > > > >>> +static int send_fds(int socket, int *fds, int n)
> > > > > > >>> +{
> > > > > > >>> +    struct msghdr msg = {};
> > > > > > >>> +    struct cmsghdr *cmsg = NULL;
> > > > > > >>> +    char buf[CMSG_SPACE(n * sizeof(int))];
> > > > > > >>> +    char dummy_buffer = 0;
> > > > > > >>> +    struct iovec io = { .iov_base = &dummy_buffer,
> > > > > > >>> +                        .iov_len = sizeof(dummy_buffer) };
> > > > > > >>> +
> > > > > > >>> +    memset(buf, 0, sizeof(buf));
> > > > > > >>> +
> > > > > > >>> +    msg.msg_iov = &io;
> > > > > > >>> +    msg.msg_iovlen = 1;
> > > > > > >>> +    msg.msg_control = buf;
> > > > > > >>> +    msg.msg_controllen = sizeof(buf);
> > > > > > >>> +
> > > > > > >>> +    cmsg = CMSG_FIRSTHDR(&msg);
> > > > > > >>> +    cmsg->cmsg_level = SOL_SOCKET;
> > > > > > >>> +    cmsg->cmsg_type = SCM_RIGHTS;
> > > > > > >>> +    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> > > > > > >>> +
> > > > > > >>> +    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> > > > > > >>> +
> > > > > > >>> +    return sendmsg(socket, &msg, 0);
> > > > > > >>> +}
> > > > > > >>> +
> > > > > > >>> +static void print_help_and_exit(const char *prog, int exitcode)
> > > > > > >>> +{
> > > > > > >>> +    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> > > > > > >>> +            " through unix socket.\n", prog);
> > > > > > >>> +    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
> > > > > > >>> +            " used to pass eBPF fds.\n");
> > > > > > >>> +    fprintf(stderr, "\t--help, -h - this help.\n");
> > > > > > >>> +    exit(exitcode);
> > > > > > >>> +}
> > > > > > >>> +
> > > > > > >>> +int main(int argc, char **argv)
> > > > > > >>> +{
> > > > > > >>> +    char *fd_string = NULL;
> > > > > > >>> +    int unix_fd = 0;
> > > > > > >>> +    struct EBPFRSSContext ctx = {};
> > > > > > >>> +    int fds[EBPF_RSS_MAX_FDS] = {};
> > > > > > >>> +    int ret = -1;
> > > > > > >>> +
> > > > > > >>> +    for (;;) {
> > > > > > >>> +        int c;
> > > > > > >>> +        static struct option long_options[] = {
> > > > > > >>> +                {"help",  no_argument, 0, 'h'},
> > > > > > >>> +                {"fd",  required_argument, 0, 'f'},
> > > > > > >>> +                {0, 0, 0, 0}
> > > > > > >>> +        };
> > > > > > >>> +        c = getopt_long(argc, argv, "hf:",
> > > > > > >>> +                long_options, NULL);
> > > > > > >>> +
> > > > > > >>> +        if (c == -1) {
> > > > > > >>> +            break;
> > > > > > >>> +        }
> > > > > > >>> +
> > > > > > >>> +        switch (c) {
> > > > > > >>> +        case 'f':
> > > > > > >>> +            fd_string = optarg;
> > > > > > >>> +            break;
> > > > > > >>> +        case 'h':
> > > > > > >>> +        default:
> > > > > > >>> +            print_help_and_exit(argv[0],
> > > > > > >>> +                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> > > > > > >>> +        }
> > > > > > >>> +    }
> > > > > > >>> +
> > > > > > >>> +    if (!fd_string) {
> > > > > > >>> +        fprintf(stderr, "Unix file descriptor not present.\n");
> > > > > > >>> +        print_help_and_exit(argv[0], EXIT_FAILURE);
> > > > > > >>> +    }
> > > > > > >>> +
> > > > > > >>> +    unix_fd = atoi(fd_string);
> > > > > > >>> +
> > > > > > >>> +    if (!unix_fd) {
> > > > > > >>> +        fprintf(stderr, "Unix file descriptor is invalid.\n");
> > > > > > >>> +        return EXIT_FAILURE;
> > > > > > >>> +    }
> > > > > > >>> +
> > > > > > >>> +    ebpf_rss_init(&ctx);
> > > > > > >>> +    if (!ebpf_rss_load(&ctx)) {
> > > > > > >>> +        fprintf(stderr, "Can't load ebpf.\n");
> > > > > > >>> +        return EXIT_FAILURE;
> > > > > > >>> +    }
> > > > > > >>> +    fds[0] = ctx.program_fd;
> > > > > > >>> +    fds[1] = ctx.map_configuration;
> > > > > > >>> +
> > > > > > >>> +    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
> > > > > > >>> +    if (ret < 0) {
> > > > > > >>> +        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
> > > > > > >>> +    }
> > > > > > >>> +
> > > > > > >>> +    ebpf_rss_unload(&ctx);
> > > > > > >>> +
> > > > > > >>> +    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
> > > > > > >>> +}
> > > > > > >>> +
> > > > > > >>> diff --git a/meson.build b/meson.build
> > > > > > >>> index 257e51d91b..913aa1fee5 100644
> > > > > > >>> --- a/meson.build
> > > > > > >>> +++ b/meson.build
> > > > > > >>> @@ -1033,19 +1033,22 @@ if not get_option('fuse_lseek').disabled()
> > > > > > >>>    endif
> > > > > > >>>
> > > > > > >>>    # libbpf
> > > > > > >>> -libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > > > > > >>> -if libbpf.found() and not cc.links('''
> > > > > > >>> -   #include <bpf/libbpf.h>
> > > > > > >>> -   int main(void)
> > > > > > >>> -   {
> > > > > > >>> -     bpf_object__destroy_skeleton(NULL);
> > > > > > >>> -     return 0;
> > > > > > >>> -   }''', dependencies: libbpf)
> > > > > > >>> -  libbpf = not_found
> > > > > > >>> -  if get_option('bpf').enabled()
> > > > > > >>> -    error('libbpf skeleton test failed')
> > > > > > >>> -  else
> > > > > > >>> -    warning('libbpf skeleton test failed, disabling')
> > > > > > >>> +libbpf = not_found
> > > > > > >>> +if targetos == 'linux'
> > > > > > >>> +  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
> > > > > > >>> +  if libbpf.found() and not cc.links('''
> > > > > > >>> +    #include <bpf/libbpf.h>
> > > > > > >>> +    int main(void)
> > > > > > >>> +    {
> > > > > > >>> +      bpf_object__destroy_skeleton(NULL);
> > > > > > >>
> > > > > > >> Do we need to test whether the bpf can do mmap() here?
> > > > > > >>
> > > > > > >> Thanks
> > > > > > >>
> > > > > > >>
> > > > > > >>> +      return 0;
> > > > > > >>> +    }''', dependencies: libbpf)
> > > > > > >>> +    libbpf = not_found
> > > > > > >>> +    if get_option('bpf').enabled()
> > > > > > >>> +      error('libbpf skeleton test failed')
> > > > > > >>> +    else
> > > > > > >>> +      warning('libbpf skeleton test failed, disabling')
> > > > > > >>> +    endif
> > > > > > >>>      endif
> > > > > > >>>    endif
> > > > > > >>>
> > > > > > >>> @@ -2423,6 +2426,14 @@ if have_tools
> > > > > > >>>                   dependencies: [authz, crypto, io, qom, qemuutil,
> > > > > > >>>                                  libcap_ng, mpathpersist],
> > > > > > >>>                   install: true)
> > > > > > >>> +
> > > > > > >>> +    if libbpf.found()
> > > > > > >>> +        executable('qemu-ebpf-rss-helper', files(
> > > > > > >>> +                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
> > > > > > >>> +                   dependencies: [qemuutil, libbpf, glib],
> > > > > > >>> +                   install: true,
> > > > > > >>> +                   install_dir: get_option('libexecdir'))
> > > > > > >>> +    endif
> > > > > > >>>      endif
> > > > > > >>>
> > > > > > >>>      if 'CONFIG_IVSHMEM' in config_host
> > > > > >
> > > > >
> > > >
> > >
> >
>
diff mbox series

Patch

diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
new file mode 100644
index 0000000000..fe68758f57
--- /dev/null
+++ b/ebpf/qemu-ebpf-rss-helper.c
@@ -0,0 +1,130 @@ 
+/*
+ * eBPF RSS Helper
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Andrew Melnychenko <andrew@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Description: This is helper program for libvirtd.
+ *              It loads eBPF RSS program and passes fds through unix socket.
+ *              Built by meson, target - 'qemu-ebpf-rss-helper'.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <getopt.h>
+#include <memory.h>
+#include <errno.h>
+#include <sys/socket.h>
+
+#include "ebpf_rss.h"
+
+#include "qemu-helper-stamp.h"
+
+void QEMU_HELPER_STAMP(void) {}
+
+static int send_fds(int socket, int *fds, int n)
+{
+    struct msghdr msg = {};
+    struct cmsghdr *cmsg = NULL;
+    char buf[CMSG_SPACE(n * sizeof(int))];
+    char dummy_buffer = 0;
+    struct iovec io = { .iov_base = &dummy_buffer,
+                        .iov_len = sizeof(dummy_buffer) };
+
+    memset(buf, 0, sizeof(buf));
+
+    msg.msg_iov = &io;
+    msg.msg_iovlen = 1;
+    msg.msg_control = buf;
+    msg.msg_controllen = sizeof(buf);
+
+    cmsg = CMSG_FIRSTHDR(&msg);
+    cmsg->cmsg_level = SOL_SOCKET;
+    cmsg->cmsg_type = SCM_RIGHTS;
+    cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
+
+    memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
+
+    return sendmsg(socket, &msg, 0);
+}
+
+static void print_help_and_exit(const char *prog, int exitcode)
+{
+    fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
+            " through unix socket.\n", prog);
+    fprintf(stderr, "\t--fd <num>, -f <num> - unix socket file descriptor"
+            " used to pass eBPF fds.\n");
+    fprintf(stderr, "\t--help, -h - this help.\n");
+    exit(exitcode);
+}
+
+int main(int argc, char **argv)
+{
+    char *fd_string = NULL;
+    int unix_fd = 0;
+    struct EBPFRSSContext ctx = {};
+    int fds[EBPF_RSS_MAX_FDS] = {};
+    int ret = -1;
+
+    for (;;) {
+        int c;
+        static struct option long_options[] = {
+                {"help",  no_argument, 0, 'h'},
+                {"fd",  required_argument, 0, 'f'},
+                {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, "hf:",
+                long_options, NULL);
+
+        if (c == -1) {
+            break;
+        }
+
+        switch (c) {
+        case 'f':
+            fd_string = optarg;
+            break;
+        case 'h':
+        default:
+            print_help_and_exit(argv[0],
+                    c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
+        }
+    }
+
+    if (!fd_string) {
+        fprintf(stderr, "Unix file descriptor not present.\n");
+        print_help_and_exit(argv[0], EXIT_FAILURE);
+    }
+
+    unix_fd = atoi(fd_string);
+
+    if (!unix_fd) {
+        fprintf(stderr, "Unix file descriptor is invalid.\n");
+        return EXIT_FAILURE;
+    }
+
+    ebpf_rss_init(&ctx);
+    if (!ebpf_rss_load(&ctx)) {
+        fprintf(stderr, "Can't load ebpf.\n");
+        return EXIT_FAILURE;
+    }
+    fds[0] = ctx.program_fd;
+    fds[1] = ctx.map_configuration;
+
+    ret = send_fds(unix_fd, fds, EBPF_RSS_MAX_FDS);
+    if (ret < 0) {
+        fprintf(stderr, "Issue while sending fds: %s.\n", strerror(errno));
+    }
+
+    ebpf_rss_unload(&ctx);
+
+    return ret < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
diff --git a/meson.build b/meson.build
index 257e51d91b..913aa1fee5 100644
--- a/meson.build
+++ b/meson.build
@@ -1033,19 +1033,22 @@  if not get_option('fuse_lseek').disabled()
 endif
 
 # libbpf
-libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
-if libbpf.found() and not cc.links('''
-   #include <bpf/libbpf.h>
-   int main(void)
-   {
-     bpf_object__destroy_skeleton(NULL);
-     return 0;
-   }''', dependencies: libbpf)
-  libbpf = not_found
-  if get_option('bpf').enabled()
-    error('libbpf skeleton test failed')
-  else
-    warning('libbpf skeleton test failed, disabling')
+libbpf = not_found
+if targetos == 'linux'
+  libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
+  if libbpf.found() and not cc.links('''
+    #include <bpf/libbpf.h>
+    int main(void)
+    {
+      bpf_object__destroy_skeleton(NULL);
+      return 0;
+    }''', dependencies: libbpf)
+    libbpf = not_found
+    if get_option('bpf').enabled()
+      error('libbpf skeleton test failed')
+    else
+      warning('libbpf skeleton test failed, disabling')
+    endif
   endif
 endif
 
@@ -2423,6 +2426,14 @@  if have_tools
                dependencies: [authz, crypto, io, qom, qemuutil,
                               libcap_ng, mpathpersist],
                install: true)
+
+    if libbpf.found()
+        executable('qemu-ebpf-rss-helper', files(
+                   'ebpf/qemu-ebpf-rss-helper.c', 'ebpf/ebpf_rss.c'),
+                   dependencies: [qemuutil, libbpf, glib],
+                   install: true,
+                   install_dir: get_option('libexecdir'))
+    endif
   endif
 
   if 'CONFIG_IVSHMEM' in config_host