Message ID | 20190320173332.18105-4-alban@kinvolk.io |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf-next,v1,1/7] tools: bpftool: fix infinite loop | expand |
2019-03-20 18:33 UTC+0100 ~ Alban Crequy <alban.crequy@gmail.com> > From: Alban Crequy <alban@kinvolk.io> > > The map exec commands allows to open an existing map and pass the file > descriptor to a child process. This enables applications to use an > existing BPF map even when they don't support bpffs. > > Example of usage: > # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99 > anon_inode:bpf-map > > Documentation and bash completion updated as well. > > Signed-off-by: Alban Crequy <alban@kinvolk.io> > --- > .../bpf/bpftool/Documentation/bpftool-map.rst | 11 ++++ > tools/bpf/bpftool/bash-completion/bpftool | 22 +++++++- > tools/bpf/bpftool/map.c | 53 +++++++++++++++++++ > 3 files changed, 85 insertions(+), 1 deletion(-) > > diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst > index b685641bfd74..dfd8352fa453 100644 > --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst > +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst > @@ -30,6 +30,7 @@ MAP COMMANDS > | **bpftool** **map getnext** *MAP* [**key** *DATA*] > | **bpftool** **map delete** *MAP* **key** *DATA* > | **bpftool** **map pin** *MAP* *FILE* > +| **bpftool** **map exec** *MAP* **fd** *FD* **cmd** -- *CMD* > | **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] > | **bpftool** **map peek** *MAP* > | **bpftool** **map push** *MAP* **value** *VALUE* > @@ -101,6 +102,10 @@ DESCRIPTION > contain a dot character ('.'), which is reserved for future > extensions of *bpffs*. > > + **bpftool map exec** *MAP* **fd** *FD* **cmd** -- *CMD* > + Load map and exec an external command, passing the file > + descriptor. > + Please use tabs for indent. Could you please add a line to explain that the process spawned get the the link to the map in the file descriptor passed as *FD*? I'm being pedantic, I know it's not that hard to understand with the examples, just trying to make things as clear as possible for users reading the page. I have nothing against the double dash in the examples or the commit log, but putting it in the description in the command itself makes it look like it is some kind of mandatory syntax. Would you consider removing the occurrences above? We don't really "exec" the map... Just thinking... Would another keyword like "pass" or "passfd" be more relevant? > **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] > Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map. > > @@ -254,6 +259,12 @@ would be lost as soon as bpftool exits). > key: 00 00 00 00 value: 22 02 00 00 > Found 1 element > > +**# bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99** > + > +:: > + > + anon_inode:bpf-map > + > SEE ALSO > ======== > **bpf**\ (2), > diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool > index 9e37de8bb227..63cd53c4d3a7 100644 > --- a/tools/bpf/bpftool/bash-completion/bpftool > +++ b/tools/bpf/bpftool/bash-completion/bpftool > @@ -583,6 +583,26 @@ _bpftool() > fi > return 0 > ;; > + exec) > + case $prev in > + $command) > + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) > + return 0 > + ;; > + fd) > + return 0 > + ;; > + cmd) > + COMPREPLY=( $(compgen -c -- "$cur" ) ) > + return 0 > + ;; > + *) > + _bpftool_once_attr 'fd' > + _bpftool_once_attr 'cmd' > + return 0 > + ;; > + esac > + ;; > event_pipe) > case $prev in > $command) > @@ -610,7 +630,7 @@ _bpftool() > [[ $prev == $object ]] && \ > COMPREPLY=( $( compgen -W 'delete dump getnext help \ > lookup pin event_pipe show list update create \ > - peek push enqueue pop dequeue' -- \ > + peek push enqueue pop dequeue exec' -- \ > "$cur" ) ) > ;; > esac > diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c > index a576f2a019be..768497364cee 100644 > --- a/tools/bpf/bpftool/map.c > +++ b/tools/bpf/bpftool/map.c > @@ -883,6 +883,58 @@ static int do_update(int argc, char **argv) > return err; > } > > +static int do_exec(int argc, char **argv) > +{ > + struct bpf_map_info info = {}; > + __u32 len = sizeof(info); > + int fd, ret; > + __u32 outfd = 0; Reverse-Christmas tree. > + > + if (argc < 2) > + usage(); > + > + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); > + if (fd < 0) > + return -1; > + > + while (argc) { > + if (is_prefix(*argv, "fd")) { > + if (parse_u32_arg(&argc, &argv, &outfd, > + "out fd")) > + return -1; > + } else if (is_prefix(*argv, "cmd")){ > + NEXT_ARG(); > + if (!REQ_ARGS(1)) > + return -1; > + > + break; > + } else { > + p_err("unknown arg %s", *argv); > + return -1; > + } > + } > + > + if (outfd == 0) { > + p_err("invalid fd"); > + return -1; > + } > + if (*argv == NULL) { > + p_err("empty command"); > + return -1; > + } > + > + Spurious blank line. > + ret = dup2(fd, (int)outfd); > + if (ret == -1) { > + p_err("dup2 failed: %s", strerror(errno)); > + return -1; > + } > + > + execvp(argv[0], argv); > + p_err("execvp failed: %s", strerror(errno)); > + return -1; > +} > + > static void print_key_value(struct bpf_map_info *info, void *key, > void *value) > { > @@ -1355,6 +1407,7 @@ static const struct cmd cmds[] = { > { "enqueue", do_update }, > { "pop", do_pop_dequeue }, > { "dequeue", do_pop_dequeue }, > + { "exec", do_exec }, > { 0 } > }; > > Please also add the new command in the interactive help (do_help()). By the way, I like this new command a lot!
On Wed, 20 Mar 2019 18:33:29 +0100, Alban Crequy wrote: > From: Alban Crequy <alban@kinvolk.io> > > The map exec commands allows to open an existing map and pass the file > descriptor to a child process. This enables applications to use an > existing BPF map even when they don't support bpffs. > > Example of usage: > # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99 > anon_inode:bpf-map Would you mind telling us a little more about the use for this feature? It seems fairly limited. If it's about probing objects (finding out if they are a map or a program) perhaps we can add a command just for that? (I guess bpftool -f isn't really the cleanest way of getting at that info.) > Documentation and bash completion updated as well. > > Signed-off-by: Alban Crequy <alban@kinvolk.io>
On Wed, Mar 20, 2019 at 10:23 PM Jakub Kicinski <jakub.kicinski@netronome.com> wrote: > > On Wed, 20 Mar 2019 18:33:29 +0100, Alban Crequy wrote: > > From: Alban Crequy <alban@kinvolk.io> > > > > The map exec commands allows to open an existing map and pass the file > > descriptor to a child process. This enables applications to use an > > existing BPF map even when they don't support bpffs. > > > > Example of usage: > > # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99 > > anon_inode:bpf-map > > Would you mind telling us a little more about the use for this feature? > It seems fairly limited. If it's about probing objects (finding out if > they are a map or a program) perhaps we can add a command just for that? I needed to know the name of the map too. I was preparing a demo based on python bcc tools (opensnoop) but with added feature that requires using a pinned map, created and maintained externally. At the moment, the python API for bcc does not support pinning or using external maps. Ideally, this should be added in the python API (some discussion on https://github.com/iovisor/bcc/issues/2223) but meanwhile, I use a workaround by executing bpftool from the python code. Arguably, my use case is a temporary hack until we have better support in python bcc. But other tools implements similar commands to pass file descriptors between processes: "ip netns exec" and "tc exec bpf". So I think it could be useful for other scripting use cases. In my demo, I used the two hacks: - if the pinned map fd is not given to the python script, re-execute itself with bpftool: os.execvp("bpftool", ["bpftool", "map", "exec", "pinned", pin_path, "fd", "90", "cmd", "--"] + sys.argv) - once we have the fd 90 (number specified above) of the pinned map in the python script, overwrite the empty fd created by bcc: os.dup2(90, 6) I call dup2() between the bpf map creation and the bpf program creation. To check which map fd to overwrite, I just call os.system("bpftool map show fd 6..."). Thanks a lot for the reviews. I'll need some time to address it (maybe a week or 2). > (I guess bpftool -f isn't really the cleanest way of getting at that > info.) > > > Documentation and bash completion updated as well. > > > > Signed-off-by: Alban Crequy <alban@kinvolk.io>
On Thu, 21 Mar 2019 10:57:32 +0100, Alban Crequy wrote: > On Wed, Mar 20, 2019 at 10:23 PM Jakub Kicinski > <jakub.kicinski@netronome.com> wrote: > > > > On Wed, 20 Mar 2019 18:33:29 +0100, Alban Crequy wrote: > > > From: Alban Crequy <alban@kinvolk.io> > > > > > > The map exec commands allows to open an existing map and pass the file > > > descriptor to a child process. This enables applications to use an > > > existing BPF map even when they don't support bpffs. > > > > > > Example of usage: > > > # bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99 > > > anon_inode:bpf-map > > > > Would you mind telling us a little more about the use for this feature? > > It seems fairly limited. If it's about probing objects (finding out if > > they are a map or a program) perhaps we can add a command just for that? > > I needed to know the name of the map too. I was preparing a demo based > on python bcc tools (opensnoop) but with added feature that requires > using a pinned map, created and maintained externally. At the moment, > the python API for bcc does not support pinning or using external > maps. Ideally, this should be added in the python API (some discussion > on https://github.com/iovisor/bcc/issues/2223) but meanwhile, I use a > workaround by executing bpftool from the python code. > > Arguably, my use case is a temporary hack until we have better support > in python bcc. But other tools implements similar commands to pass > file descriptors between processes: "ip netns exec" and "tc exec bpf". > So I think it could be useful for other scripting use cases. The thing is the receiver of the FD has to be bpf-aware, because there isn't really much one can do with that file descriptor, in which case it's kind of strange that the receiver doesn't know how to open a pinned object.. > In my demo, I used the two hacks: > - if the pinned map fd is not given to the python script, re-execute > itself with bpftool: > os.execvp("bpftool", ["bpftool", "map", "exec", "pinned", pin_path, > "fd", "90", "cmd", "--"] + sys.argv) > - once we have the fd 90 (number specified above) of the pinned map in > the python script, overwrite the empty fd created by bcc: > os.dup2(90, 6) > I call dup2() between the bpf map creation and the bpf program > creation. To check which map fd to overwrite, I just call > os.system("bpftool map show fd 6..."). I see, thanks for the explanation. That does indeed seems like a hack. > Thanks a lot for the reviews. I'll need some time to address it (maybe > a week or 2). > > > (I guess bpftool -f isn't really the cleanest way of getting at that > > info.) > > > > > Documentation and bash completion updated as well. > > > > > > Signed-off-by: Alban Crequy <alban@kinvolk.io>
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index b685641bfd74..dfd8352fa453 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -30,6 +30,7 @@ MAP COMMANDS | **bpftool** **map getnext** *MAP* [**key** *DATA*] | **bpftool** **map delete** *MAP* **key** *DATA* | **bpftool** **map pin** *MAP* *FILE* +| **bpftool** **map exec** *MAP* **fd** *FD* **cmd** -- *CMD* | **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] | **bpftool** **map peek** *MAP* | **bpftool** **map push** *MAP* **value** *VALUE* @@ -101,6 +102,10 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. + **bpftool map exec** *MAP* **fd** *FD* **cmd** -- *CMD* + Load map and exec an external command, passing the file + descriptor. + **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map. @@ -254,6 +259,12 @@ would be lost as soon as bpftool exits). key: 00 00 00 00 value: 22 02 00 00 Found 1 element +**# bpftool map exec pinned /sys/fs/bpf/foo fd 99 cmd -- readlink /proc/self/fd/99** + +:: + + anon_inode:bpf-map + SEE ALSO ======== **bpf**\ (2), diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 9e37de8bb227..63cd53c4d3a7 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -583,6 +583,26 @@ _bpftool() fi return 0 ;; + exec) + case $prev in + $command) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + fd) + return 0 + ;; + cmd) + COMPREPLY=( $(compgen -c -- "$cur" ) ) + return 0 + ;; + *) + _bpftool_once_attr 'fd' + _bpftool_once_attr 'cmd' + return 0 + ;; + esac + ;; event_pipe) case $prev in $command) @@ -610,7 +630,7 @@ _bpftool() [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'delete dump getnext help \ lookup pin event_pipe show list update create \ - peek push enqueue pop dequeue' -- \ + peek push enqueue pop dequeue exec' -- \ "$cur" ) ) ;; esac diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a576f2a019be..768497364cee 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -883,6 +883,58 @@ static int do_update(int argc, char **argv) return err; } +static int do_exec(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int fd, ret; + __u32 outfd = 0; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + while (argc) { + if (is_prefix(*argv, "fd")) { + if (parse_u32_arg(&argc, &argv, &outfd, + "out fd")) + return -1; + } else if (is_prefix(*argv, "cmd")){ + NEXT_ARG(); + if (!REQ_ARGS(1)) + return -1; + + break; + } else { + p_err("unknown arg %s", *argv); + return -1; + } + } + + if (outfd == 0) { + p_err("invalid fd"); + return -1; + } + if (*argv == NULL) { + p_err("empty command"); + return -1; + } + + + ret = dup2(fd, (int)outfd); + if (ret == -1) { + p_err("dup2 failed: %s", strerror(errno)); + return -1; + } + + execvp(argv[0], argv); + p_err("execvp failed: %s", strerror(errno)); + return -1; +} + static void print_key_value(struct bpf_map_info *info, void *key, void *value) { @@ -1355,6 +1407,7 @@ static const struct cmd cmds[] = { { "enqueue", do_update }, { "pop", do_pop_dequeue }, { "dequeue", do_pop_dequeue }, + { "exec", do_exec }, { 0 } };