Message ID | 20190408160432.151478-1-sdf@google.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf-next,v2,1/3] bpf: support input __sk_buff context in BPF_PROG_TEST_RUN | expand |
On Mon, Apr 08, 2019 at 09:04:30AM -0700, Stanislav Fomichev wrote: > Add new set of arguments to bpf_attr for BPF_PROG_TEST_RUN: > * ctx_in/ctx_size_in - input context > * ctx_out/ctx_size_out - output context > > The intended use case is to pass some meta data to the test runs that > operate on skb (this has being brought up on recent LPC). > > For programs that use bpf_prog_test_run_skb, support __sk_buff input and > output. Initially, from input __sk_buff, copy _only_ cb and priority into > skb, all other non-zero fields are prohibited (with EINVAL). > If the user has set ctx_out/ctx_size_out, copy the potentially modified > __sk_buff back to the userspace. > > We require all fields of input __sk_buff except the ones we explicitly > support to be set to zero. The expectation is that in the future we might > add support for more fields and we want to fail explicitly if the user > runs the program on the kernel where we don't yet support them. > > The API is intentionally vague (i.e. we don't explicitly add __sk_buff > to bpf_attr, but ctx_in) to potentially let other test_run types use > this interface in the future (this can be xdp_md for xdp types for > example). > > v2: > * Addressed comments from Martin Lau > > Cc: Martin Lau <kafai@fb.com> > Signed-off-by: Stanislav Fomichev <sdf@google.com> > --- > include/uapi/linux/bpf.h | 7 ++ > kernel/bpf/syscall.c | 10 ++- > net/bpf/test_run.c | 135 ++++++++++++++++++++++++++++++++++++--- > 3 files changed, 143 insertions(+), 9 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 837024512baf..8e96f99cebf8 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -396,6 +396,13 @@ union bpf_attr { > __aligned_u64 data_out; > __u32 repeat; > __u32 duration; > + __u32 ctx_size_in; /* input: len of ctx_in */ > + __u32 ctx_size_out; /* input/output: len of ctx_out > + * returns ENOSPC if ctx_out > + * is too small. > + */ > + __aligned_u64 ctx_in; > + __aligned_u64 ctx_out; > } test; > > struct { /* anonymous struct used by BPF_*_GET_*_ID */ > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 1d65e56594db..5bb963e8f9b0 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -1949,7 +1949,7 @@ static int bpf_prog_query(const union bpf_attr *attr, > return cgroup_bpf_prog_query(attr, uattr); > } > > -#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration > +#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out > > static int bpf_prog_test_run(const union bpf_attr *attr, > union bpf_attr __user *uattr) > @@ -1962,6 +1962,14 @@ static int bpf_prog_test_run(const union bpf_attr *attr, > if (CHECK_ATTR(BPF_PROG_TEST_RUN)) > return -EINVAL; > > + if ((attr->test.ctx_size_in && !attr->test.ctx_in) || > + (!attr->test.ctx_size_in && attr->test.ctx_in)) > + return -EINVAL; > + > + if ((attr->test.ctx_size_out && !attr->test.ctx_out) || > + (!attr->test.ctx_size_out && attr->test.ctx_out)) > + return -EINVAL; > + > prog = bpf_prog_get(attr->test.prog_fd); > if (IS_ERR(prog)) > return PTR_ERR(prog); > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c > index fab142b796ef..3c6e3855c886 100644 > --- a/net/bpf/test_run.c > +++ b/net/bpf/test_run.c > @@ -123,12 +123,118 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, > return data; > } > > +static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) > +{ > + void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); > + u32 size = kattr->test.ctx_size_in; > + void *data; > + > + if (!size) > + return NULL; A note for a later comment in bpf_prog_test_run_skb(). This function may return NULL. > + > + if (size > max_size) If should be fine if the kernel unsupported bytes are zeros (i.e. tailing zeros in data_in). Please refer to the existing bpf_check_uarg_tail_zero() usages or also CHECK_ATTR() in syscall.c. Something like this if bpf_check_uarg_tail_zero() is used, err = bpf_check_uarg_tail_zero(data_in, max_size, size)); if (err) return ERR_PTR(err); > + return ERR_PTR(-E2BIG); > + > + data = kzalloc(max_size, GFP_USER); > + if (!data) > + return ERR_PTR(-ENOMEM); > + > + if (copy_from_user(data, data_in, size)) { > + kfree(data); > + return ERR_PTR(-EFAULT); > + } > + return data; > +} > + > +static int bpf_ctx_finish(const union bpf_attr *kattr, > + union bpf_attr __user *uattr, const void *data, > + u32 size) > +{ > + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); > + int err = -EFAULT; > + u32 copy_size = size; > + data is not checked for NULL here. > + if (!kattr->test.ctx_size_out) > + return 0; > + > + if (copy_size > kattr->test.ctx_size_out) { > + copy_size = kattr->test.ctx_size_out; > + err = -ENOSPC; > + } > + > + if (copy_to_user(data_out, data, copy_size)) > + goto out; > + if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size))) > + goto out; > + if (err != -ENOSPC) > + err = 0; > +out: > + return err; > +} [ ... ] > int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > union bpf_attr __user *uattr) > { > bool is_l2 = false, is_direct_pkt_access = false; > u32 size = kattr->test.data_size_in; > u32 repeat = kattr->test.repeat; > + struct __sk_buff *ctx = NULL; > u32 retval, duration; > int hh_len = ETH_HLEN; > struct sk_buff *skb; > @@ -141,6 +247,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > if (IS_ERR(data)) > return PTR_ERR(data); > > + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); > + if (IS_ERR(ctx)) { > + kfree(data); > + return PTR_ERR(ctx); > + } > + > switch (prog->type) { > case BPF_PROG_TYPE_SCHED_CLS: > case BPF_PROG_TYPE_SCHED_ACT: > @@ -158,6 +270,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > sk = kzalloc(sizeof(struct sock), GFP_USER); > if (!sk) { > kfree(data); > + kfree(ctx); > return -ENOMEM; > } > sock_net_set(sk, current->nsproxy->net_ns); > @@ -166,6 +279,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > skb = build_skb(data, 0); > if (!skb) { > kfree(data); > + kfree(ctx); > kfree(sk); > return -ENOMEM; > } > @@ -180,32 +294,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > __skb_push(skb, hh_len); > if (is_direct_pkt_access) > bpf_compute_data_pointers(skb); > + ret = convert___skb_to_skb(skb, ctx); > + if (ret) > + goto out; > ret = bpf_test_run(prog, skb, repeat, &retval, &duration); > - if (ret) { > - kfree_skb(skb); > - kfree(sk); > - return ret; > - } > + if (ret) > + goto out; > if (!is_l2) { > if (skb_headroom(skb) < hh_len) { > int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); > > if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { > - kfree_skb(skb); > - kfree(sk); > - return -ENOMEM; > + ret = -ENOMEM; > + goto out; > } > } > memset(__skb_push(skb, hh_len), 0, hh_len); > } > + convert_skb_to___skb(skb, ctx); > > size = skb->len; > /* bpf program can never convert linear skb to non-linear */ > if (WARN_ON_ONCE(skb_is_nonlinear(skb))) > size = skb_headlen(skb); > ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); > + if (!ret) > + ret = bpf_ctx_finish(kattr, uattr, ctx, ctx is not checked for NULL in here either. > + sizeof(struct __sk_buff)); > +out: > kfree_skb(skb); > kfree(sk); > + kfree(ctx); > return ret; > } > > -- > 2.21.0.392.gf8f6787159e-goog >
On 04/09, Martin Lau wrote: > On Mon, Apr 08, 2019 at 09:04:30AM -0700, Stanislav Fomichev wrote: > > Add new set of arguments to bpf_attr for BPF_PROG_TEST_RUN: > > * ctx_in/ctx_size_in - input context > > * ctx_out/ctx_size_out - output context > > > > The intended use case is to pass some meta data to the test runs that > > operate on skb (this has being brought up on recent LPC). > > > > For programs that use bpf_prog_test_run_skb, support __sk_buff input and > > output. Initially, from input __sk_buff, copy _only_ cb and priority into > > skb, all other non-zero fields are prohibited (with EINVAL). > > If the user has set ctx_out/ctx_size_out, copy the potentially modified > > __sk_buff back to the userspace. > > > > We require all fields of input __sk_buff except the ones we explicitly > > support to be set to zero. The expectation is that in the future we might > > add support for more fields and we want to fail explicitly if the user > > runs the program on the kernel where we don't yet support them. > > > > The API is intentionally vague (i.e. we don't explicitly add __sk_buff > > to bpf_attr, but ctx_in) to potentially let other test_run types use > > this interface in the future (this can be xdp_md for xdp types for > > example). > > > > v2: > > * Addressed comments from Martin Lau > > > > Cc: Martin Lau <kafai@fb.com> > > Signed-off-by: Stanislav Fomichev <sdf@google.com> > > --- > > include/uapi/linux/bpf.h | 7 ++ > > kernel/bpf/syscall.c | 10 ++- > > net/bpf/test_run.c | 135 ++++++++++++++++++++++++++++++++++++--- > > 3 files changed, 143 insertions(+), 9 deletions(-) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index 837024512baf..8e96f99cebf8 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -396,6 +396,13 @@ union bpf_attr { > > __aligned_u64 data_out; > > __u32 repeat; > > __u32 duration; > > + __u32 ctx_size_in; /* input: len of ctx_in */ > > + __u32 ctx_size_out; /* input/output: len of ctx_out > > + * returns ENOSPC if ctx_out > > + * is too small. > > + */ > > + __aligned_u64 ctx_in; > > + __aligned_u64 ctx_out; > > } test; > > > > struct { /* anonymous struct used by BPF_*_GET_*_ID */ > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > > index 1d65e56594db..5bb963e8f9b0 100644 > > --- a/kernel/bpf/syscall.c > > +++ b/kernel/bpf/syscall.c > > @@ -1949,7 +1949,7 @@ static int bpf_prog_query(const union bpf_attr *attr, > > return cgroup_bpf_prog_query(attr, uattr); > > } > > > > -#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration > > +#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out > > > > static int bpf_prog_test_run(const union bpf_attr *attr, > > union bpf_attr __user *uattr) > > @@ -1962,6 +1962,14 @@ static int bpf_prog_test_run(const union bpf_attr *attr, > > if (CHECK_ATTR(BPF_PROG_TEST_RUN)) > > return -EINVAL; > > > > + if ((attr->test.ctx_size_in && !attr->test.ctx_in) || > > + (!attr->test.ctx_size_in && attr->test.ctx_in)) > > + return -EINVAL; > > + > > + if ((attr->test.ctx_size_out && !attr->test.ctx_out) || > > + (!attr->test.ctx_size_out && attr->test.ctx_out)) > > + return -EINVAL; > > + > > prog = bpf_prog_get(attr->test.prog_fd); > > if (IS_ERR(prog)) > > return PTR_ERR(prog); > > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c > > index fab142b796ef..3c6e3855c886 100644 > > --- a/net/bpf/test_run.c > > +++ b/net/bpf/test_run.c > > @@ -123,12 +123,118 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, > > return data; > > } > > > > +static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) > > +{ > > + void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); > > + u32 size = kattr->test.ctx_size_in; > > + void *data; > > + > > + if (!size) > > + return NULL; > A note for a later comment in bpf_prog_test_run_skb(). > This function may return NULL. > > > + > > + if (size > max_size) > If should be fine if the kernel unsupported bytes are zeros > (i.e. tailing zeros in data_in). > Please refer to the existing bpf_check_uarg_tail_zero() usages > or also CHECK_ATTR() in syscall.c. Something like this if > bpf_check_uarg_tail_zero() is used, > > err = bpf_check_uarg_tail_zero(data_in, max_size, size)); > if (err) > return ERR_PTR(err); > Nice! Will use that instead. > > + return ERR_PTR(-E2BIG); > > + > > + data = kzalloc(max_size, GFP_USER); > > + if (!data) > > + return ERR_PTR(-ENOMEM); > > + > > + if (copy_from_user(data, data_in, size)) { > > + kfree(data); > > + return ERR_PTR(-EFAULT); > > + } > > + return data; > > +} > > + > > +static int bpf_ctx_finish(const union bpf_attr *kattr, > > + union bpf_attr __user *uattr, const void *data, > > + u32 size) > > +{ > > + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); > > + int err = -EFAULT; > > + u32 copy_size = size; > > + > data is not checked for NULL here. I do check kattr->test.ctx_size_out on the next line. And I've added checks to the syscall.c that make sure that ctx_size_out and ctx_out are consistent (i.e. if ctx_size_out is 0, ctx_out is NULL). Let me instead check kattr->test.ctx_out, that should be more clear. > > + if (!kattr->test.ctx_size_out) > > + return 0; > > + > > + if (copy_size > kattr->test.ctx_size_out) { > > + copy_size = kattr->test.ctx_size_out; > > + err = -ENOSPC; > > + } > > + > > + if (copy_to_user(data_out, data, copy_size)) > > + goto out; > > + if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size))) > > + goto out; > > + if (err != -ENOSPC) > > + err = 0; > > +out: > > + return err; > > +} > > [ ... ] > > > int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > union bpf_attr __user *uattr) > > { > > bool is_l2 = false, is_direct_pkt_access = false; > > u32 size = kattr->test.data_size_in; > > u32 repeat = kattr->test.repeat; > > + struct __sk_buff *ctx = NULL; > > u32 retval, duration; > > int hh_len = ETH_HLEN; > > struct sk_buff *skb; > > @@ -141,6 +247,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > if (IS_ERR(data)) > > return PTR_ERR(data); > > > > + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); > > + if (IS_ERR(ctx)) { > > + kfree(data); > > + return PTR_ERR(ctx); > > + } > > + > > switch (prog->type) { > > case BPF_PROG_TYPE_SCHED_CLS: > > case BPF_PROG_TYPE_SCHED_ACT: > > @@ -158,6 +270,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > sk = kzalloc(sizeof(struct sock), GFP_USER); > > if (!sk) { > > kfree(data); > > + kfree(ctx); > > return -ENOMEM; > > } > > sock_net_set(sk, current->nsproxy->net_ns); > > @@ -166,6 +279,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > skb = build_skb(data, 0); > > if (!skb) { > > kfree(data); > > + kfree(ctx); > > kfree(sk); > > return -ENOMEM; > > } > > @@ -180,32 +294,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > __skb_push(skb, hh_len); > > if (is_direct_pkt_access) > > bpf_compute_data_pointers(skb); > > + ret = convert___skb_to_skb(skb, ctx); > > + if (ret) > > + goto out; > > ret = bpf_test_run(prog, skb, repeat, &retval, &duration); > > - if (ret) { > > - kfree_skb(skb); > > - kfree(sk); > > - return ret; > > - } > > + if (ret) > > + goto out; > > if (!is_l2) { > > if (skb_headroom(skb) < hh_len) { > > int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); > > > > if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { > > - kfree_skb(skb); > > - kfree(sk); > > - return -ENOMEM; > > + ret = -ENOMEM; > > + goto out; > > } > > } > > memset(__skb_push(skb, hh_len), 0, hh_len); > > } > > + convert_skb_to___skb(skb, ctx); > > > > size = skb->len; > > /* bpf program can never convert linear skb to non-linear */ > > if (WARN_ON_ONCE(skb_is_nonlinear(skb))) > > size = skb_headlen(skb); > > ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); > > + if (!ret) > > + ret = bpf_ctx_finish(kattr, uattr, ctx, > ctx is not checked for NULL in here either. Good catch, I didn't think about the case where ctx_in is not set, but ctx_out is set. Let me handle that in the v3. Thank you for a review! > > > + sizeof(struct __sk_buff)); > > +out: > > kfree_skb(skb); > > kfree(sk); > > + kfree(ctx); > > return ret; > > } > > > > -- > > 2.21.0.392.gf8f6787159e-goog > >
On Tue, Apr 09, 2019 at 09:08:12AM -0700, Stanislav Fomichev wrote: > On 04/09, Martin Lau wrote: > > On Mon, Apr 08, 2019 at 09:04:30AM -0700, Stanislav Fomichev wrote: > > > Add new set of arguments to bpf_attr for BPF_PROG_TEST_RUN: > > > * ctx_in/ctx_size_in - input context > > > * ctx_out/ctx_size_out - output context > > > > > > The intended use case is to pass some meta data to the test runs that > > > operate on skb (this has being brought up on recent LPC). > > > > > > For programs that use bpf_prog_test_run_skb, support __sk_buff input and > > > output. Initially, from input __sk_buff, copy _only_ cb and priority into > > > skb, all other non-zero fields are prohibited (with EINVAL). > > > If the user has set ctx_out/ctx_size_out, copy the potentially modified > > > __sk_buff back to the userspace. > > > > > > We require all fields of input __sk_buff except the ones we explicitly > > > support to be set to zero. The expectation is that in the future we might > > > add support for more fields and we want to fail explicitly if the user > > > runs the program on the kernel where we don't yet support them. > > > > > > The API is intentionally vague (i.e. we don't explicitly add __sk_buff > > > to bpf_attr, but ctx_in) to potentially let other test_run types use > > > this interface in the future (this can be xdp_md for xdp types for > > > example). > > > > > > v2: > > > * Addressed comments from Martin Lau > > > > > > Cc: Martin Lau <kafai@fb.com> > > > Signed-off-by: Stanislav Fomichev <sdf@google.com> > > > --- > > > include/uapi/linux/bpf.h | 7 ++ > > > kernel/bpf/syscall.c | 10 ++- > > > net/bpf/test_run.c | 135 ++++++++++++++++++++++++++++++++++++--- > > > 3 files changed, 143 insertions(+), 9 deletions(-) > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > index 837024512baf..8e96f99cebf8 100644 > > > --- a/include/uapi/linux/bpf.h > > > +++ b/include/uapi/linux/bpf.h > > > @@ -396,6 +396,13 @@ union bpf_attr { > > > __aligned_u64 data_out; > > > __u32 repeat; > > > __u32 duration; > > > + __u32 ctx_size_in; /* input: len of ctx_in */ > > > + __u32 ctx_size_out; /* input/output: len of ctx_out > > > + * returns ENOSPC if ctx_out > > > + * is too small. > > > + */ > > > + __aligned_u64 ctx_in; > > > + __aligned_u64 ctx_out; > > > } test; > > > > > > struct { /* anonymous struct used by BPF_*_GET_*_ID */ > > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > > > index 1d65e56594db..5bb963e8f9b0 100644 > > > --- a/kernel/bpf/syscall.c > > > +++ b/kernel/bpf/syscall.c > > > @@ -1949,7 +1949,7 @@ static int bpf_prog_query(const union bpf_attr *attr, > > > return cgroup_bpf_prog_query(attr, uattr); > > > } > > > > > > -#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration > > > +#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out > > > > > > static int bpf_prog_test_run(const union bpf_attr *attr, > > > union bpf_attr __user *uattr) > > > @@ -1962,6 +1962,14 @@ static int bpf_prog_test_run(const union bpf_attr *attr, > > > if (CHECK_ATTR(BPF_PROG_TEST_RUN)) > > > return -EINVAL; > > > > > > + if ((attr->test.ctx_size_in && !attr->test.ctx_in) || > > > + (!attr->test.ctx_size_in && attr->test.ctx_in)) > > > + return -EINVAL; > > > + > > > + if ((attr->test.ctx_size_out && !attr->test.ctx_out) || > > > + (!attr->test.ctx_size_out && attr->test.ctx_out)) > > > + return -EINVAL; > > > + > > > prog = bpf_prog_get(attr->test.prog_fd); > > > if (IS_ERR(prog)) > > > return PTR_ERR(prog); > > > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c > > > index fab142b796ef..3c6e3855c886 100644 > > > --- a/net/bpf/test_run.c > > > +++ b/net/bpf/test_run.c > > > @@ -123,12 +123,118 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, > > > return data; > > > } > > > > > > +static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) > > > +{ > > > + void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); > > > + u32 size = kattr->test.ctx_size_in; > > > + void *data; > > > + > > > + if (!size) > > > + return NULL; > > A note for a later comment in bpf_prog_test_run_skb(). > > This function may return NULL. > > > > > + > > > + if (size > max_size) > > If should be fine if the kernel unsupported bytes are zeros > > (i.e. tailing zeros in data_in). > > Please refer to the existing bpf_check_uarg_tail_zero() usages > > or also CHECK_ATTR() in syscall.c. Something like this if > > bpf_check_uarg_tail_zero() is used, > > > > err = bpf_check_uarg_tail_zero(data_in, max_size, size)); > > if (err) > > return ERR_PTR(err); > > > Nice! Will use that instead. > > > > + return ERR_PTR(-E2BIG); > > > + > > > + data = kzalloc(max_size, GFP_USER); > > > + if (!data) > > > + return ERR_PTR(-ENOMEM); > > > + > > > + if (copy_from_user(data, data_in, size)) { > > > + kfree(data); > > > + return ERR_PTR(-EFAULT); > > > + } > > > + return data; > > > +} > > > + > > > +static int bpf_ctx_finish(const union bpf_attr *kattr, > > > + union bpf_attr __user *uattr, const void *data, > > > + u32 size) > > > +{ > > > + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); > > > + int err = -EFAULT; > > > + u32 copy_size = size; > > > + > > data is not checked for NULL here. > I do check kattr->test.ctx_size_out on the next line. And I've added > checks to the syscall.c that make sure that ctx_size_out and ctx_out > are consistent (i.e. if ctx_size_out is 0, ctx_out is NULL). > > Let me instead check kattr->test.ctx_out, that should be more clear. Note that I meant "data" instead of "data_out". The caller "bpf_prog_test_run_skb()" passed in "ctx" (input) as "data" here. I think !kattr->test.ctx_size_out and !data_out are equally good. Your call ;) Thanks! [ ... ] > > > int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > > union bpf_attr __user *uattr) > > > { > > > bool is_l2 = false, is_direct_pkt_access = false; > > > u32 size = kattr->test.data_size_in; > > > u32 repeat = kattr->test.repeat; > > > + struct __sk_buff *ctx = NULL; > > > u32 retval, duration; > > > int hh_len = ETH_HLEN; > > > struct sk_buff *skb; > > > @@ -141,6 +247,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > > if (IS_ERR(data)) > > > return PTR_ERR(data); > > > > > > + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); > > > + if (IS_ERR(ctx)) { > > > + kfree(data); > > > + return PTR_ERR(ctx); > > > + } > > > + > > > switch (prog->type) { > > > case BPF_PROG_TYPE_SCHED_CLS: > > > case BPF_PROG_TYPE_SCHED_ACT: > > > @@ -158,6 +270,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > > sk = kzalloc(sizeof(struct sock), GFP_USER); > > > if (!sk) { > > > kfree(data); > > > + kfree(ctx); > > > return -ENOMEM; > > > } > > > sock_net_set(sk, current->nsproxy->net_ns); > > > @@ -166,6 +279,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > > skb = build_skb(data, 0); > > > if (!skb) { > > > kfree(data); > > > + kfree(ctx); > > > kfree(sk); > > > return -ENOMEM; > > > } > > > @@ -180,32 +294,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, > > > __skb_push(skb, hh_len); > > > if (is_direct_pkt_access) > > > bpf_compute_data_pointers(skb); > > > + ret = convert___skb_to_skb(skb, ctx); > > > + if (ret) > > > + goto out; > > > ret = bpf_test_run(prog, skb, repeat, &retval, &duration); > > > - if (ret) { > > > - kfree_skb(skb); > > > - kfree(sk); > > > - return ret; > > > - } > > > + if (ret) > > > + goto out; > > > if (!is_l2) { > > > if (skb_headroom(skb) < hh_len) { > > > int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); > > > > > > if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { > > > - kfree_skb(skb); > > > - kfree(sk); > > > - return -ENOMEM; > > > + ret = -ENOMEM; > > > + goto out; > > > } > > > } > > > memset(__skb_push(skb, hh_len), 0, hh_len); > > > } > > > + convert_skb_to___skb(skb, ctx); > > > > > > size = skb->len; > > > /* bpf program can never convert linear skb to non-linear */ > > > if (WARN_ON_ONCE(skb_is_nonlinear(skb))) > > > size = skb_headlen(skb); > > > ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); > > > + if (!ret) > > > + ret = bpf_ctx_finish(kattr, uattr, ctx, > > ctx is not checked for NULL in here either. > Good catch, I didn't think about the case where ctx_in is not set, > but ctx_out is set. Let me handle that in the v3. > > Thank you for a review! > > > > > + sizeof(struct __sk_buff)); > > > +out: > > > kfree_skb(skb); > > > kfree(sk); > > > + kfree(ctx); > > > return ret; > > > } > > > > > > -- > > > 2.21.0.392.gf8f6787159e-goog > > >
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 837024512baf..8e96f99cebf8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -396,6 +396,13 @@ union bpf_attr { __aligned_u64 data_out; __u32 repeat; __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1d65e56594db..5bb963e8f9b0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1949,7 +1949,7 @@ static int bpf_prog_query(const union bpf_attr *attr, return cgroup_bpf_prog_query(attr, uattr); } -#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration +#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out static int bpf_prog_test_run(const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -1962,6 +1962,14 @@ static int bpf_prog_test_run(const union bpf_attr *attr, if (CHECK_ATTR(BPF_PROG_TEST_RUN)) return -EINVAL; + if ((attr->test.ctx_size_in && !attr->test.ctx_in) || + (!attr->test.ctx_size_in && attr->test.ctx_in)) + return -EINVAL; + + if ((attr->test.ctx_size_out && !attr->test.ctx_out) || + (!attr->test.ctx_size_out && attr->test.ctx_out)) + return -EINVAL; + prog = bpf_prog_get(attr->test.prog_fd); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index fab142b796ef..3c6e3855c886 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -123,12 +123,118 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, return data; } +static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) +{ + void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); + u32 size = kattr->test.ctx_size_in; + void *data; + + if (!size) + return NULL; + + if (size > max_size) + return ERR_PTR(-E2BIG); + + data = kzalloc(max_size, GFP_USER); + if (!data) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(data, data_in, size)) { + kfree(data); + return ERR_PTR(-EFAULT); + } + return data; +} + +static int bpf_ctx_finish(const union bpf_attr *kattr, + union bpf_attr __user *uattr, const void *data, + u32 size) +{ + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); + int err = -EFAULT; + u32 copy_size = size; + + if (!kattr->test.ctx_size_out) + return 0; + + if (copy_size > kattr->test.ctx_size_out) { + copy_size = kattr->test.ctx_size_out; + err = -ENOSPC; + } + + if (copy_to_user(data_out, data, copy_size)) + goto out; + if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size))) + goto out; + if (err != -ENOSPC) + err = 0; +out: + return err; +} + +/** + * range_is_zero - test whether buffer is initialized + * @buf: buffer to check + * @from: check from this position + * @to: check up until (excluding) this position + * + * This function returns true if the there is a non-zero byte + * in the buf in the range [from,to). + */ +static inline bool range_is_zero(void *buf, size_t from, size_t to) +{ + return !memchr_inv((u8 *)buf + from, 0, to - from); +} + +static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) +{ + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + + if (!__skb) + return 0; + + /* make sure the fields we don't use are zeroed */ + if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority))) + return -EINVAL; + + /* priority is allowed */ + + if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) + + FIELD_SIZEOF(struct __sk_buff, priority), + offsetof(struct __sk_buff, cb))) + return -EINVAL; + + /* cb is allowed */ + + if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) + + FIELD_SIZEOF(struct __sk_buff, cb), + sizeof(struct __sk_buff))) + return -EINVAL; + + skb->priority = __skb->priority; + memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); + + return 0; +} + +static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) +{ + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + + if (!__skb) + return; + + __skb->priority = skb->priority; + memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); +} + int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { bool is_l2 = false, is_direct_pkt_access = false; u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; + struct __sk_buff *ctx = NULL; u32 retval, duration; int hh_len = ETH_HLEN; struct sk_buff *skb; @@ -141,6 +247,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(data)) return PTR_ERR(data); + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); + if (IS_ERR(ctx)) { + kfree(data); + return PTR_ERR(ctx); + } + switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: @@ -158,6 +270,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, sk = kzalloc(sizeof(struct sock), GFP_USER); if (!sk) { kfree(data); + kfree(ctx); return -ENOMEM; } sock_net_set(sk, current->nsproxy->net_ns); @@ -166,6 +279,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb = build_skb(data, 0); if (!skb) { kfree(data); + kfree(ctx); kfree(sk); return -ENOMEM; } @@ -180,32 +294,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); + ret = convert___skb_to_skb(skb, ctx); + if (ret) + goto out; ret = bpf_test_run(prog, skb, repeat, &retval, &duration); - if (ret) { - kfree_skb(skb); - kfree(sk); - return ret; - } + if (ret) + goto out; if (!is_l2) { if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { - kfree_skb(skb); - kfree(sk); - return -ENOMEM; + ret = -ENOMEM; + goto out; } } memset(__skb_push(skb, hh_len), 0, hh_len); } + convert_skb_to___skb(skb, ctx); size = skb->len; /* bpf program can never convert linear skb to non-linear */ if (WARN_ON_ONCE(skb_is_nonlinear(skb))) size = skb_headlen(skb); ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, ctx, + sizeof(struct __sk_buff)); +out: kfree_skb(skb); kfree(sk); + kfree(ctx); return ret; }
Add new set of arguments to bpf_attr for BPF_PROG_TEST_RUN: * ctx_in/ctx_size_in - input context * ctx_out/ctx_size_out - output context The intended use case is to pass some meta data to the test runs that operate on skb (this has being brought up on recent LPC). For programs that use bpf_prog_test_run_skb, support __sk_buff input and output. Initially, from input __sk_buff, copy _only_ cb and priority into skb, all other non-zero fields are prohibited (with EINVAL). If the user has set ctx_out/ctx_size_out, copy the potentially modified __sk_buff back to the userspace. We require all fields of input __sk_buff except the ones we explicitly support to be set to zero. The expectation is that in the future we might add support for more fields and we want to fail explicitly if the user runs the program on the kernel where we don't yet support them. The API is intentionally vague (i.e. we don't explicitly add __sk_buff to bpf_attr, but ctx_in) to potentially let other test_run types use this interface in the future (this can be xdp_md for xdp types for example). v2: * Addressed comments from Martin Lau Cc: Martin Lau <kafai@fb.com> Signed-off-by: Stanislav Fomichev <sdf@google.com> --- include/uapi/linux/bpf.h | 7 ++ kernel/bpf/syscall.c | 10 ++- net/bpf/test_run.c | 135 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 143 insertions(+), 9 deletions(-)