Message ID | 159981835908.134722.4550898174324943652.stgit@toke.dk |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: Support multi-attach for freplace programs | expand |
On Fri, Sep 11, 2020 at 3:01 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > From: Toke Høiland-Jørgensen <toke@redhat.com> > > This enables support for attaching freplace programs to multiple attach > points. It does this by amending UAPI for bpf_raw_tracepoint_open with a > target prog fd and btf ID pair that can be used to supply the new > attachment point. The target must be compatible with the target that was > supplied at program load time. > > The implementation reuses the checks that were factored out of > check_attach_btf_id() to ensure compatibility between the BTF types of the > old and new attachment. If these match, a new bpf_tracing_link will be > created for the new attach target, allowing multiple attachments to > co-exist simultaneously. > > The code could theoretically support multiple-attach of other types of > tracing programs as well, but since I don't have a use case for any of > those, the bpf_tracing_prog_attach() function will reject new targets for > anything other than PROG_TYPE_EXT programs. > > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> > --- It feels like using a semi-constructed bpf_tracing_link inside prog->aux->tgt_link is just an unnecessary complication, after reading this and previous patches. Seems more straightforward and simpler to store tgt_attach_type/tgt_prog_type (permanently) and tgt_prog/tgt_trampoline (until first attachment) in prog->aux and then properly create bpf_link on attach. > include/linux/bpf.h | 3 + > include/uapi/linux/bpf.h | 6 ++- > kernel/bpf/syscall.c | 96 +++++++++++++++++++++++++++++++++++++++------- > kernel/bpf/verifier.c | 9 ++++ > 4 files changed, 97 insertions(+), 17 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 722c60f1c1fc..c6b856b2d296 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -753,6 +753,9 @@ struct bpf_prog_aux { > struct hlist_node tramp_hlist; > /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ > const struct btf_type *attach_func_proto; > + /* target BPF prog types for trace programs */ > + enum bpf_prog_type tgt_prog_type; > + enum bpf_attach_type tgt_attach_type; > /* function name for valid attach_btf_id */ > const char *attach_func_name; > struct bpf_prog **func; > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 90359cab501d..0885ab6ac8d9 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -595,8 +595,10 @@ union bpf_attr { > } query; > > struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ > - __u64 name; > - __u32 prog_fd; > + __u64 name; > + __u32 prog_fd; > + __u32 tgt_prog_fd; > + __u32 tgt_btf_id; > } raw_tracepoint; rant: any chance of putting this into LINK_CREATE instead of extending very unfortunately named RAW_TRACEPOINT_OPEN? > > struct { /* anonymous struct for BPF_BTF_LOAD */ > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 2d238aa8962e..7b1da5f063eb 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -4,6 +4,7 @@ > #include <linux/bpf.h> > #include <linux/bpf_trace.h> > #include <linux/bpf_lirc.h> > +#include <linux/bpf_verifier.h> > #include <linux/btf.h> > #include <linux/syscalls.h> > #include <linux/slab.h> > @@ -2582,10 +2583,16 @@ static struct bpf_tracing_link *bpf_tracing_link_create(struct bpf_prog *prog, > return link; > } > > -static int bpf_tracing_prog_attach(struct bpf_prog *prog) > +static int bpf_tracing_prog_attach(struct bpf_prog *prog, > + int tgt_prog_fd, > + u32 btf_id) > { > - struct bpf_tracing_link *link, *olink; > struct bpf_link_primer link_primer; > + struct bpf_prog *tgt_prog = NULL; > + struct bpf_tracing_link *link; > + struct btf_func_model fmodel; > + long addr; > + u64 key; > int err; > > switch (prog->type) { > @@ -2613,28 +2620,80 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) > err = -EINVAL; > goto out_put_prog; > } > + if (tgt_prog_fd) { > + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ > + if (prog->type != BPF_PROG_TYPE_EXT || > + !btf_id) { > + err = -EINVAL; > + goto out_put_prog; > + } > > - link = READ_ONCE(prog->aux->tgt_link); > - if (!link) { > - err = -ENOENT; > + tgt_prog = bpf_prog_get(tgt_prog_fd); > + if (IS_ERR(tgt_prog)) { > + err = PTR_ERR(tgt_prog); > + tgt_prog = NULL; > + goto out_put_prog; > + } > + > + key = ((u64)tgt_prog->aux->id) << 32 | btf_id; > + } else if (btf_id) { > + err = -EINVAL; > goto out_put_prog; > } > - olink = cmpxchg(&prog->aux->tgt_link, link, NULL); > - if (olink != link) { > - err = -ENOENT; > - goto out_put_prog; > + > + link = READ_ONCE(prog->aux->tgt_link); > + if (link) { > + if (tgt_prog && link->trampoline->key != key) { I think we need to have a proper locking about this. Imagine two attaches racing, both read non-NULL tgt_link, one of them proceeds to cmpxchg, attach, detach, and free link. Then this one wakes up and tries to access freed memory here. We are coordinating multiple threads on this, it needs to be locked, at least for simplicity, given that performance is not critical here. > + link = NULL; > + } else { > + struct bpf_tracing_link *olink; > + > + olink = cmpxchg(&prog->aux->tgt_link, link, NULL); > + if (olink != link) { > + link = NULL; > + } else if (tgt_prog) { > + /* re-using link that already has ref on > + * tgt_prog, don't take another > + */ > + bpf_prog_put(tgt_prog); > + tgt_prog = NULL; > + } > + } > + } > + > + if (!link) { > + if (!tgt_prog) { > + err = -ENOENT; > + goto out_put_prog; > + } > + > + err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, > + &fmodel, &addr, NULL, NULL); > + if (err) > + goto out_put_prog; > + > + link = bpf_tracing_link_create(prog, tgt_prog); > + if (IS_ERR(link)) { > + err = PTR_ERR(link); > + goto out_put_prog; > + } > + tgt_prog = NULL; > + > + err = bpf_trampoline_get(key, (void *)addr, &fmodel, &link->trampoline); > + if (err) > + goto out_put_link; see previous patch, let's avoid bpf_link_put before bpf_link_settle. bpf_link_cleanup() is for after priming, otherwise it's just a kfree. > } > > err = bpf_link_prime(&link->link, &link_primer); > if (err) { > kfree(link); > - goto out_put_prog; > + goto out_put_link; hm... did you try running this with KASAN? you are freeing link and then bpf_link_put() on it? > } > > err = bpf_trampoline_link_prog(prog, link->trampoline); > if (err) { > bpf_link_cleanup(&link_primer); > - goto out_put_prog; > + goto out_put_link; similarly, you've already bpf_link_cleanup()'d, no need to do extra bpf_link_put() > } > > /* at this point the link is no longer referenced from struct bpf_prog, > @@ -2643,8 +2702,12 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) > link->link.prog = prog; > > return bpf_link_settle(&link_primer); > +out_put_link: > + bpf_link_put(&link->link); > out_put_prog: > bpf_prog_put(prog); > + if (tgt_prog) > + bpf_prog_put(tgt_prog); > return err; > } > [...]
Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: > On Fri, Sep 11, 2020 at 3:01 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: >> >> From: Toke Høiland-Jørgensen <toke@redhat.com> >> >> This enables support for attaching freplace programs to multiple attach >> points. It does this by amending UAPI for bpf_raw_tracepoint_open with a >> target prog fd and btf ID pair that can be used to supply the new >> attachment point. The target must be compatible with the target that was >> supplied at program load time. >> >> The implementation reuses the checks that were factored out of >> check_attach_btf_id() to ensure compatibility between the BTF types of the >> old and new attachment. If these match, a new bpf_tracing_link will be >> created for the new attach target, allowing multiple attachments to >> co-exist simultaneously. >> >> The code could theoretically support multiple-attach of other types of >> tracing programs as well, but since I don't have a use case for any of >> those, the bpf_tracing_prog_attach() function will reject new targets for >> anything other than PROG_TYPE_EXT programs. >> >> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> >> --- > > It feels like using a semi-constructed bpf_tracing_link inside > prog->aux->tgt_link is just an unnecessary complication, after reading > this and previous patches. Seems more straightforward and simpler to > store tgt_attach_type/tgt_prog_type (permanently) and > tgt_prog/tgt_trampoline (until first attachment) in prog->aux and then > properly create bpf_link on attach. I updated v4 with your comments, but kept the link in prog->aux; the reason being that having a container for the two pointers makes it possible to atomically swap it out with xchg() as you suggested previously. Could you please take a look at v4? If you still think it's better to just keep two separate pointers (and add a lock) in prog->aux, I can change it to that. But I'd rather avoid the lock if possible... -Toke
On Mon, Sep 14, 2020 at 9:08 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: > > > On Fri, Sep 11, 2020 at 3:01 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: > >> > >> From: Toke Høiland-Jørgensen <toke@redhat.com> > >> > >> This enables support for attaching freplace programs to multiple attach > >> points. It does this by amending UAPI for bpf_raw_tracepoint_open with a > >> target prog fd and btf ID pair that can be used to supply the new > >> attachment point. The target must be compatible with the target that was > >> supplied at program load time. > >> > >> The implementation reuses the checks that were factored out of > >> check_attach_btf_id() to ensure compatibility between the BTF types of the > >> old and new attachment. If these match, a new bpf_tracing_link will be > >> created for the new attach target, allowing multiple attachments to > >> co-exist simultaneously. > >> > >> The code could theoretically support multiple-attach of other types of > >> tracing programs as well, but since I don't have a use case for any of > >> those, the bpf_tracing_prog_attach() function will reject new targets for > >> anything other than PROG_TYPE_EXT programs. > >> > >> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> > >> --- > > > > It feels like using a semi-constructed bpf_tracing_link inside > > prog->aux->tgt_link is just an unnecessary complication, after reading > > this and previous patches. Seems more straightforward and simpler to > > store tgt_attach_type/tgt_prog_type (permanently) and > > tgt_prog/tgt_trampoline (until first attachment) in prog->aux and then > > properly create bpf_link on attach. > > I updated v4 with your comments, but kept the link in prog->aux; the > reason being that having a container for the two pointers makes it > possible to atomically swap it out with xchg() as you suggested > previously. Could you please take a look at v4? If you still think it's > better to just keep two separate pointers (and add a lock) in prog->aux, > I can change it to that. But I'd rather avoid the lock if possible... I took a very quick look at this specific bit, planning to do another pass tomorrow. What's the problem with adding a mutex to bpf_prog_aux? In your case, now you introduced (unlikely, but still) extra state transition for tgt_link from non-NULL to NULL and then back to non-NULL? And why? Just to use atomic xchg, while using atomic operation is not an absolute necessity because it's not a performance-critical path at all. We are not optimizing for millions of freplace attachments a second, right? On the other hand, having a mutex there won't require restoration logic, it will be dead simple, obvious and straightforward. So yeah, I still think mutex is better there. BTW, check Stanislav's latest patch set. He's adding used_maps_mutex to bpf_prog_aux with no problems at all. It seems to me that we might want to generalize that used_maps_mutex to be just bpf_prog_aux's mutex ('prog_aux_mutex' or whatever we'd call it) and use it for such kinds of low-frequency bpf_prog metadata manipulations/checks. Thoughts? > > -Toke >
Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: > On Mon, Sep 14, 2020 at 9:08 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: >> >> Andrii Nakryiko <andrii.nakryiko@gmail.com> writes: >> >> > On Fri, Sep 11, 2020 at 3:01 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: >> >> >> >> From: Toke Høiland-Jørgensen <toke@redhat.com> >> >> >> >> This enables support for attaching freplace programs to multiple attach >> >> points. It does this by amending UAPI for bpf_raw_tracepoint_open with a >> >> target prog fd and btf ID pair that can be used to supply the new >> >> attachment point. The target must be compatible with the target that was >> >> supplied at program load time. >> >> >> >> The implementation reuses the checks that were factored out of >> >> check_attach_btf_id() to ensure compatibility between the BTF types of the >> >> old and new attachment. If these match, a new bpf_tracing_link will be >> >> created for the new attach target, allowing multiple attachments to >> >> co-exist simultaneously. >> >> >> >> The code could theoretically support multiple-attach of other types of >> >> tracing programs as well, but since I don't have a use case for any of >> >> those, the bpf_tracing_prog_attach() function will reject new targets for >> >> anything other than PROG_TYPE_EXT programs. >> >> >> >> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> >> >> --- >> > >> > It feels like using a semi-constructed bpf_tracing_link inside >> > prog->aux->tgt_link is just an unnecessary complication, after reading >> > this and previous patches. Seems more straightforward and simpler to >> > store tgt_attach_type/tgt_prog_type (permanently) and >> > tgt_prog/tgt_trampoline (until first attachment) in prog->aux and then >> > properly create bpf_link on attach. >> >> I updated v4 with your comments, but kept the link in prog->aux; the >> reason being that having a container for the two pointers makes it >> possible to atomically swap it out with xchg() as you suggested >> previously. Could you please take a look at v4? If you still think it's >> better to just keep two separate pointers (and add a lock) in prog->aux, >> I can change it to that. But I'd rather avoid the lock if possible... > > I took a very quick look at this specific bit, planning to do another > pass tomorrow. > > What's the problem with adding a mutex to bpf_prog_aux? In your case, > now you introduced (unlikely, but still) extra state transition for > tgt_link from non-NULL to NULL and then back to non-NULL? And why? > Just to use atomic xchg, while using atomic operation is not an > absolute necessity because it's not a performance-critical path at > all. We are not optimizing for millions of freplace attachments a > second, right? On the other hand, having a mutex there won't require > restoration logic, it will be dead simple, obvious and > straightforward. So yeah, I still think mutex is better there. So I went ahead and implemented a mutex-based version of this. I'm not sure I agree with "dead simple", I'd say it's on par with the previous version; and that is only if I explicitly limit the scope of the mutex to *writing* of the tgt_* pointers (i.e., I haven't gone through and protected all the reads from within the verifier). The mutex version does have the benefit of still making it possible to retry a bpf_raw_tracepoint_open() if it fails, so I guess that is a benefit; I'll post it as v5 and you can have a look :) > BTW, check Stanislav's latest patch set. He's adding used_maps_mutex > to bpf_prog_aux with no problems at all. It seems to me that we might > want to generalize that used_maps_mutex to be just bpf_prog_aux's > mutex ('prog_aux_mutex' or whatever we'd call it) and use it for such > kinds of low-frequency bpf_prog metadata manipulations/checks. I'm not sure I like the idea of widening the scope of the mutex. Or at least I think that should be done as a follow-up patch that does a proper analysis of all the different fields it is supposed to protect and makes sure no deadlocks creep in. -Toke
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 722c60f1c1fc..c6b856b2d296 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -753,6 +753,9 @@ struct bpf_prog_aux { struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; + /* target BPF prog types for trace programs */ + enum bpf_prog_type tgt_prog_type; + enum bpf_attach_type tgt_attach_type; /* function name for valid attach_btf_id */ const char *attach_func_name; struct bpf_prog **func; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90359cab501d..0885ab6ac8d9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -595,8 +595,10 @@ union bpf_attr { } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ - __u64 name; - __u32 prog_fd; + __u64 name; + __u32 prog_fd; + __u32 tgt_prog_fd; + __u32 tgt_btf_id; } raw_tracepoint; struct { /* anonymous struct for BPF_BTF_LOAD */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2d238aa8962e..7b1da5f063eb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/bpf_lirc.h> +#include <linux/bpf_verifier.h> #include <linux/btf.h> #include <linux/syscalls.h> #include <linux/slab.h> @@ -2582,10 +2583,16 @@ static struct bpf_tracing_link *bpf_tracing_link_create(struct bpf_prog *prog, return link; } -static int bpf_tracing_prog_attach(struct bpf_prog *prog) +static int bpf_tracing_prog_attach(struct bpf_prog *prog, + int tgt_prog_fd, + u32 btf_id) { - struct bpf_tracing_link *link, *olink; struct bpf_link_primer link_primer; + struct bpf_prog *tgt_prog = NULL; + struct bpf_tracing_link *link; + struct btf_func_model fmodel; + long addr; + u64 key; int err; switch (prog->type) { @@ -2613,28 +2620,80 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) err = -EINVAL; goto out_put_prog; } + if (tgt_prog_fd) { + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ + if (prog->type != BPF_PROG_TYPE_EXT || + !btf_id) { + err = -EINVAL; + goto out_put_prog; + } - link = READ_ONCE(prog->aux->tgt_link); - if (!link) { - err = -ENOENT; + tgt_prog = bpf_prog_get(tgt_prog_fd); + if (IS_ERR(tgt_prog)) { + err = PTR_ERR(tgt_prog); + tgt_prog = NULL; + goto out_put_prog; + } + + key = ((u64)tgt_prog->aux->id) << 32 | btf_id; + } else if (btf_id) { + err = -EINVAL; goto out_put_prog; } - olink = cmpxchg(&prog->aux->tgt_link, link, NULL); - if (olink != link) { - err = -ENOENT; - goto out_put_prog; + + link = READ_ONCE(prog->aux->tgt_link); + if (link) { + if (tgt_prog && link->trampoline->key != key) { + link = NULL; + } else { + struct bpf_tracing_link *olink; + + olink = cmpxchg(&prog->aux->tgt_link, link, NULL); + if (olink != link) { + link = NULL; + } else if (tgt_prog) { + /* re-using link that already has ref on + * tgt_prog, don't take another + */ + bpf_prog_put(tgt_prog); + tgt_prog = NULL; + } + } + } + + if (!link) { + if (!tgt_prog) { + err = -ENOENT; + goto out_put_prog; + } + + err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, + &fmodel, &addr, NULL, NULL); + if (err) + goto out_put_prog; + + link = bpf_tracing_link_create(prog, tgt_prog); + if (IS_ERR(link)) { + err = PTR_ERR(link); + goto out_put_prog; + } + tgt_prog = NULL; + + err = bpf_trampoline_get(key, (void *)addr, &fmodel, &link->trampoline); + if (err) + goto out_put_link; } err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); - goto out_put_prog; + goto out_put_link; } err = bpf_trampoline_link_prog(prog, link->trampoline); if (err) { bpf_link_cleanup(&link_primer); - goto out_put_prog; + goto out_put_link; } /* at this point the link is no longer referenced from struct bpf_prog, @@ -2643,8 +2702,12 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) link->link.prog = prog; return bpf_link_settle(&link_primer); +out_put_link: + bpf_link_put(&link->link); out_put_prog: bpf_prog_put(prog); + if (tgt_prog) + bpf_prog_put(tgt_prog); return err; } @@ -2722,7 +2785,7 @@ static const struct bpf_link_ops bpf_raw_tp_link_lops = { .fill_link_info = bpf_raw_tp_link_fill_link_info, }; -#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd +#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.tgt_btf_id static int bpf_raw_tracepoint_open(const union bpf_attr *attr) { @@ -2746,8 +2809,9 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) case BPF_PROG_TYPE_EXT: case BPF_PROG_TYPE_LSM: if (attr->raw_tracepoint.name) { - /* The attach point for this category of programs - * should be specified via btf_id during program load. + /* The attach point for this category of programs should + * be specified via btf_id during program load, or using + * tgt_btf_id. */ err = -EINVAL; goto out_put_prog; @@ -2757,7 +2821,9 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog); + return bpf_tracing_prog_attach(prog, + attr->raw_tracepoint.tgt_prog_fd, + attr->raw_tracepoint.tgt_btf_id); case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d6714c24c4e7..df01e71b118d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11205,6 +11205,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (!btf_type_is_func_proto(t)) return -EINVAL; + if ((prog->aux->tgt_prog_type && + prog->aux->tgt_prog_type != tgt_prog->type) || + (prog->aux->tgt_attach_type && + prog->aux->tgt_attach_type != tgt_prog->expected_attach_type)) + return -EINVAL; + if (tgt_prog && conservative) t = NULL; @@ -11300,6 +11306,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return ret; if (tgt_prog) { + prog->aux->tgt_prog_type = tgt_prog->type; + prog->aux->tgt_attach_type = tgt_prog->expected_attach_type; + if (prog->type == BPF_PROG_TYPE_EXT) { env->ops = bpf_verifier_ops[tgt_prog->type]; prog->expected_attach_type =