Message ID | 20190603163852.2535150-2-jonathan.lemon@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | Better handling of xskmap entries | expand |
On 06/03/2019 06:38 PM, Jonathan Lemon wrote: > Currently, the AF_XDP code uses a separate map in order to > determine if an xsk is bound to a queue. Instead of doing this, > have bpf_map_lookup_elem() return the queue_id, as a way of > indicating that there is a valid entry at the map index. > > Rearrange some xdp_sock members to eliminate structure holes. > > Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com> > Acked-by: Song Liu <songliubraving@fb.com> > Acked-by: Björn Töpel <bjorn.topel@intel.com> > --- > include/net/xdp_sock.h | 6 +++--- > kernel/bpf/verifier.c | 6 +++++- > kernel/bpf/xskmap.c | 4 +++- > .../selftests/bpf/verifier/prevent_map_lookup.c | 15 --------------- > 4 files changed, 11 insertions(+), 20 deletions(-) > > diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h > index d074b6d60f8a..7d84b1da43d2 100644 > --- a/include/net/xdp_sock.h > +++ b/include/net/xdp_sock.h > @@ -57,12 +57,12 @@ struct xdp_sock { > struct net_device *dev; > struct xdp_umem *umem; > struct list_head flush_node; > - u16 queue_id; > - struct xsk_queue *tx ____cacheline_aligned_in_smp; > - struct list_head list; > + u32 queue_id; > bool zc; > /* Protects multiple processes in the control path */ > struct mutex mutex; > + struct xsk_queue *tx ____cacheline_aligned_in_smp; > + struct list_head list; > /* Mutual exclusion of NAPI TX thread and sendmsg error paths > * in the SKB destructor callback. > */ > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 2778417e6e0c..91c730f85e92 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -2905,10 +2905,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > * appear. > */ > case BPF_MAP_TYPE_CPUMAP: > - case BPF_MAP_TYPE_XSKMAP: > if (func_id != BPF_FUNC_redirect_map) > goto error; > break; > + case BPF_MAP_TYPE_XSKMAP: > + if (func_id != BPF_FUNC_redirect_map && > + func_id != BPF_FUNC_map_lookup_elem) > + goto error; > + break; > case BPF_MAP_TYPE_ARRAY_OF_MAPS: > case BPF_MAP_TYPE_HASH_OF_MAPS: > if (func_id != BPF_FUNC_map_lookup_elem) > diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c > index 686d244e798d..249b22089014 100644 > --- a/kernel/bpf/xskmap.c > +++ b/kernel/bpf/xskmap.c > @@ -154,7 +154,9 @@ void __xsk_map_flush(struct bpf_map *map) > > static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) > { > - return ERR_PTR(-EOPNOTSUPP); > + struct xdp_sock *xs = __xsk_map_lookup_elem(map, *(u32 *)key); > + > + return xs ? &xs->queue_id : NULL; > } How do you guarantee that BPf programs don't mess around with the map values e.g. overriding xs->queue_id from the lookup? This should be read-only map from BPF program side. > static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, > diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c > index bbdba990fefb..da7a4b37cb98 100644 > --- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c > +++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c > @@ -28,21 +28,6 @@ > .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem", > .prog_type = BPF_PROG_TYPE_SOCK_OPS, > }, > -{ > - "prevent map lookup in xskmap", > - .insns = { > - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), > - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), > - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), > - BPF_LD_MAP_FD(BPF_REG_1, 0), > - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), > - BPF_EXIT_INSN(), > - }, > - .fixup_map_xskmap = { 3 }, > - .result = REJECT, > - .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem", > - .prog_type = BPF_PROG_TYPE_XDP, > -}, > { > "prevent map lookup in stack trace", > .insns = { >
On 06/04/2019 04:54 PM, Daniel Borkmann wrote: > On 06/03/2019 06:38 PM, Jonathan Lemon wrote: >> Currently, the AF_XDP code uses a separate map in order to >> determine if an xsk is bound to a queue. Instead of doing this, >> have bpf_map_lookup_elem() return the queue_id, as a way of >> indicating that there is a valid entry at the map index. >> >> Rearrange some xdp_sock members to eliminate structure holes. >> >> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com> >> Acked-by: Song Liu <songliubraving@fb.com> >> Acked-by: Björn Töpel <bjorn.topel@intel.com> >> --- >> include/net/xdp_sock.h | 6 +++--- >> kernel/bpf/verifier.c | 6 +++++- >> kernel/bpf/xskmap.c | 4 +++- >> .../selftests/bpf/verifier/prevent_map_lookup.c | 15 --------------- >> 4 files changed, 11 insertions(+), 20 deletions(-) >> >> diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h >> index d074b6d60f8a..7d84b1da43d2 100644 >> --- a/include/net/xdp_sock.h >> +++ b/include/net/xdp_sock.h >> @@ -57,12 +57,12 @@ struct xdp_sock { >> struct net_device *dev; >> struct xdp_umem *umem; >> struct list_head flush_node; >> - u16 queue_id; >> - struct xsk_queue *tx ____cacheline_aligned_in_smp; >> - struct list_head list; >> + u32 queue_id; >> bool zc; >> /* Protects multiple processes in the control path */ >> struct mutex mutex; >> + struct xsk_queue *tx ____cacheline_aligned_in_smp; >> + struct list_head list; >> /* Mutual exclusion of NAPI TX thread and sendmsg error paths >> * in the SKB destructor callback. >> */ >> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c >> index 2778417e6e0c..91c730f85e92 100644 >> --- a/kernel/bpf/verifier.c >> +++ b/kernel/bpf/verifier.c >> @@ -2905,10 +2905,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, >> * appear. >> */ >> case BPF_MAP_TYPE_CPUMAP: >> - case BPF_MAP_TYPE_XSKMAP: >> if (func_id != BPF_FUNC_redirect_map) >> goto error; >> break; >> + case BPF_MAP_TYPE_XSKMAP: >> + if (func_id != BPF_FUNC_redirect_map && >> + func_id != BPF_FUNC_map_lookup_elem) >> + goto error; >> + break; >> case BPF_MAP_TYPE_ARRAY_OF_MAPS: >> case BPF_MAP_TYPE_HASH_OF_MAPS: >> if (func_id != BPF_FUNC_map_lookup_elem) >> diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c >> index 686d244e798d..249b22089014 100644 >> --- a/kernel/bpf/xskmap.c >> +++ b/kernel/bpf/xskmap.c >> @@ -154,7 +154,9 @@ void __xsk_map_flush(struct bpf_map *map) >> >> static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) >> { >> - return ERR_PTR(-EOPNOTSUPP); >> + struct xdp_sock *xs = __xsk_map_lookup_elem(map, *(u32 *)key); >> + >> + return xs ? &xs->queue_id : NULL; >> } > > How do you guarantee that BPf programs don't mess around with the map values > e.g. overriding xs->queue_id from the lookup? This should be read-only map > from BPF program side. (Or via per-cpu scratch var where you move xs->queue_id into and return from here.)
On Mon, 3 Jun 2019 09:38:51 -0700 Jonathan Lemon <jonathan.lemon@gmail.com> wrote: > Currently, the AF_XDP code uses a separate map in order to > determine if an xsk is bound to a queue. Instead of doing this, > have bpf_map_lookup_elem() return the queue_id, as a way of > indicating that there is a valid entry at the map index. Just a reminder, that once we choose a return value, there the queue_id, then it basically becomes UAPI, and we cannot change it. Can we somehow use BTF to allow us to extend this later? I was also going to point out that, you cannot return a direct pointer to queue_id, as BPF-prog side can modify this... but Daniel already pointed this out.
On 4 Jun 2019, at 9:43, Jesper Dangaard Brouer wrote: > On Mon, 3 Jun 2019 09:38:51 -0700 > Jonathan Lemon <jonathan.lemon@gmail.com> wrote: > >> Currently, the AF_XDP code uses a separate map in order to >> determine if an xsk is bound to a queue. Instead of doing this, >> have bpf_map_lookup_elem() return the queue_id, as a way of >> indicating that there is a valid entry at the map index. > > Just a reminder, that once we choose a return value, there the > queue_id, then it basically becomes UAPI, and we cannot change it. Yes - Alexei initially wanted to return the sk_cookie instead, but that's 64 bits and opens up a whole other can of worms. > Can we somehow use BTF to allow us to extend this later? > > I was also going to point out that, you cannot return a direct pointer > to queue_id, as BPF-prog side can modify this... but Daniel already > pointed this out. So, I see three solutions here (for this and Toke's patchset also, which is encountering the same problem). 1) add a scratch register (Toke's approach) 2) add a PTR_TO_<type>, which has the access checked. This is the most flexible approach, but does seem a bit overkill at the moment. 3) add another helper function, say, bpf_map_elem_present() which just returns a boolean value indicating whether there is a valid map entry or not. I was starting to do 2), but wanted to get some more feedback first.
On Tue, Jun 04, 2019 at 10:25:23AM -0700, Jonathan Lemon wrote: > On 4 Jun 2019, at 9:43, Jesper Dangaard Brouer wrote: > > > On Mon, 3 Jun 2019 09:38:51 -0700 > > Jonathan Lemon <jonathan.lemon@gmail.com> wrote: > > > >> Currently, the AF_XDP code uses a separate map in order to > >> determine if an xsk is bound to a queue. Instead of doing this, > >> have bpf_map_lookup_elem() return the queue_id, as a way of > >> indicating that there is a valid entry at the map index. > > > > Just a reminder, that once we choose a return value, there the > > queue_id, then it basically becomes UAPI, and we cannot change it. > > Yes - Alexei initially wanted to return the sk_cookie instead, but > that's 64 bits and opens up a whole other can of worms. > > > > Can we somehow use BTF to allow us to extend this later? > > > > I was also going to point out that, you cannot return a direct pointer > > to queue_id, as BPF-prog side can modify this... but Daniel already > > pointed this out. > > So, I see three solutions here (for this and Toke's patchset also, > which is encountering the same problem). > > 1) add a scratch register (Toke's approach) > 2) add a PTR_TO_<type>, which has the access checked. This is the most > flexible approach, but does seem a bit overkill at the moment. I think it would be nice and more extensible to have PTR_TO_xxx. It could start with the existing PTR_TO_SOCKET or starting with a new PTR_TO_XDP_SOCK from the beginning is also fine. > 3) add another helper function, say, bpf_map_elem_present() which just > returns a boolean value indicating whether there is a valid map entry > or not. > > I was starting to do 2), but wanted to get some more feedback first. > -- > Jonathan
On 2019-06-04 19:25, Jonathan Lemon wrote: > On 4 Jun 2019, at 9:43, Jesper Dangaard Brouer wrote: > >> On Mon, 3 Jun 2019 09:38:51 -0700 >> Jonathan Lemon <jonathan.lemon@gmail.com> wrote: >> >>> Currently, the AF_XDP code uses a separate map in order to >>> determine if an xsk is bound to a queue. Instead of doing this, >>> have bpf_map_lookup_elem() return the queue_id, as a way of >>> indicating that there is a valid entry at the map index. >> >> Just a reminder, that once we choose a return value, there the >> queue_id, then it basically becomes UAPI, and we cannot change it. > > Yes - Alexei initially wanted to return the sk_cookie instead, but > that's 64 bits and opens up a whole other can of worms. > Hmm, what other info would be useful? ifindex? Or going the the other way, with read-only and just returning boolean? > >> Can we somehow use BTF to allow us to extend this later? >> >> I was also going to point out that, you cannot return a direct pointer >> to queue_id, as BPF-prog side can modify this... but Daniel already >> pointed this out. > Ugh, good thing Daniel found this! Björn > So, I see three solutions here (for this and Toke's patchset also, > which is encountering the same problem). > > 1) add a scratch register (Toke's approach) > 2) add a PTR_TO_<type>, which has the access checked. This is the most > flexible approach, but does seem a bit overkill at the moment. > 3) add another helper function, say, bpf_map_elem_present() which just > returns a boolean value indicating whether there is a valid map entry > or not. > > I was starting to do 2), but wanted to get some more feedback first. >
Jonathan Lemon <jonathan.lemon@gmail.com> writes: > On 4 Jun 2019, at 9:43, Jesper Dangaard Brouer wrote: > >> On Mon, 3 Jun 2019 09:38:51 -0700 >> Jonathan Lemon <jonathan.lemon@gmail.com> wrote: >> >>> Currently, the AF_XDP code uses a separate map in order to >>> determine if an xsk is bound to a queue. Instead of doing this, >>> have bpf_map_lookup_elem() return the queue_id, as a way of >>> indicating that there is a valid entry at the map index. >> >> Just a reminder, that once we choose a return value, there the >> queue_id, then it basically becomes UAPI, and we cannot change it. > > Yes - Alexei initially wanted to return the sk_cookie instead, but > that's 64 bits and opens up a whole other can of worms. > > >> Can we somehow use BTF to allow us to extend this later? >> >> I was also going to point out that, you cannot return a direct pointer >> to queue_id, as BPF-prog side can modify this... but Daniel already >> pointed this out. > > So, I see three solutions here (for this and Toke's patchset also, > which is encountering the same problem). > > 1) add a scratch register (Toke's approach) > 2) add a PTR_TO_<type>, which has the access checked. This is the most > flexible approach, but does seem a bit overkill at the moment. > 3) add another helper function, say, bpf_map_elem_present() which just > returns a boolean value indicating whether there is a valid map entry > or not. > > I was starting to do 2), but wanted to get some more feedback first. I think I prefer 2) over 3); since we have a verifier that can actually enforce something like read-only behaviour, actually having access to the value will probably be useful to someone. I can obviously live with 1) as well, of course (since I already did that; though I just now realise that I forgot to make the scratch space per-CPU)... :) -Toke
On Tue, 4 Jun 2019 at 20:13, Martin Lau <kafai@fb.com> wrote: > > On Tue, Jun 04, 2019 at 10:25:23AM -0700, Jonathan Lemon wrote: > > On 4 Jun 2019, at 9:43, Jesper Dangaard Brouer wrote: > > > > > On Mon, 3 Jun 2019 09:38:51 -0700 > > > Jonathan Lemon <jonathan.lemon@gmail.com> wrote: > > > > > >> Currently, the AF_XDP code uses a separate map in order to > > >> determine if an xsk is bound to a queue. Instead of doing this, > > >> have bpf_map_lookup_elem() return the queue_id, as a way of > > >> indicating that there is a valid entry at the map index. > > > > > > Just a reminder, that once we choose a return value, there the > > > queue_id, then it basically becomes UAPI, and we cannot change it. > > > > Yes - Alexei initially wanted to return the sk_cookie instead, but > > that's 64 bits and opens up a whole other can of worms. > > > > > > > Can we somehow use BTF to allow us to extend this later? > > > > > > I was also going to point out that, you cannot return a direct pointer > > > to queue_id, as BPF-prog side can modify this... but Daniel already > > > pointed this out. > > > > So, I see three solutions here (for this and Toke's patchset also, > > which is encountering the same problem). > > > > 1) add a scratch register (Toke's approach) > > 2) add a PTR_TO_<type>, which has the access checked. This is the most > > flexible approach, but does seem a bit overkill at the moment. > I think it would be nice and more extensible to have PTR_TO_xxx. > It could start with the existing PTR_TO_SOCKET > > or starting with a new PTR_TO_XDP_SOCK from the beginning is also fine. > Doesn't the PTR_TO_SOCKET path involve taking a ref and mandating sk_release() from the fast path? :-( Björn > > 3) add another helper function, say, bpf_map_elem_present() which just > > returns a boolean value indicating whether there is a valid map entry > > or not. > > > > I was starting to do 2), but wanted to get some more feedback first. > > -- > > Jonathan
On 5 Jun 2019, at 1:45, Björn Töpel wrote: > On Tue, 4 Jun 2019 at 20:13, Martin Lau <kafai@fb.com> wrote: >> >> On Tue, Jun 04, 2019 at 10:25:23AM -0700, Jonathan Lemon wrote: >>> On 4 Jun 2019, at 9:43, Jesper Dangaard Brouer wrote: >>> >>>> On Mon, 3 Jun 2019 09:38:51 -0700 >>>> Jonathan Lemon <jonathan.lemon@gmail.com> wrote: >>>> >>>>> Currently, the AF_XDP code uses a separate map in order to >>>>> determine if an xsk is bound to a queue. Instead of doing this, >>>>> have bpf_map_lookup_elem() return the queue_id, as a way of >>>>> indicating that there is a valid entry at the map index. >>>> >>>> Just a reminder, that once we choose a return value, there the >>>> queue_id, then it basically becomes UAPI, and we cannot change it. >>> >>> Yes - Alexei initially wanted to return the sk_cookie instead, but >>> that's 64 bits and opens up a whole other can of worms. >>> >>> >>>> Can we somehow use BTF to allow us to extend this later? >>>> >>>> I was also going to point out that, you cannot return a direct pointer >>>> to queue_id, as BPF-prog side can modify this... but Daniel already >>>> pointed this out. >>> >>> So, I see three solutions here (for this and Toke's patchset also, >>> which is encountering the same problem). >>> >>> 1) add a scratch register (Toke's approach) >>> 2) add a PTR_TO_<type>, which has the access checked. This is the most >>> flexible approach, but does seem a bit overkill at the moment. >> I think it would be nice and more extensible to have PTR_TO_xxx. >> It could start with the existing PTR_TO_SOCKET >> >> or starting with a new PTR_TO_XDP_SOCK from the beginning is also fine. >> > > Doesn't the PTR_TO_SOCKET path involve taking a ref and mandating > sk_release() from the fast path? :-( AF_XDP sockets are created with SOCK_RCU_FREE, and used under rcu, so I don't think that they need to be refcounted. bpf_sk_release is a NOP in the RCU_FREE case.
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index d074b6d60f8a..7d84b1da43d2 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -57,12 +57,12 @@ struct xdp_sock { struct net_device *dev; struct xdp_umem *umem; struct list_head flush_node; - u16 queue_id; - struct xsk_queue *tx ____cacheline_aligned_in_smp; - struct list_head list; + u32 queue_id; bool zc; /* Protects multiple processes in the control path */ struct mutex mutex; + struct xsk_queue *tx ____cacheline_aligned_in_smp; + struct list_head list; /* Mutual exclusion of NAPI TX thread and sendmsg error paths * in the SKB destructor callback. */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2778417e6e0c..91c730f85e92 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2905,10 +2905,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, * appear. */ case BPF_MAP_TYPE_CPUMAP: - case BPF_MAP_TYPE_XSKMAP: if (func_id != BPF_FUNC_redirect_map) goto error; break; + case BPF_MAP_TYPE_XSKMAP: + if (func_id != BPF_FUNC_redirect_map && + func_id != BPF_FUNC_map_lookup_elem) + goto error; + break; case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: if (func_id != BPF_FUNC_map_lookup_elem) diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 686d244e798d..249b22089014 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -154,7 +154,9 @@ void __xsk_map_flush(struct bpf_map *map) static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) { - return ERR_PTR(-EOPNOTSUPP); + struct xdp_sock *xs = __xsk_map_lookup_elem(map, *(u32 *)key); + + return xs ? &xs->queue_id : NULL; } static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c index bbdba990fefb..da7a4b37cb98 100644 --- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c +++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c @@ -28,21 +28,6 @@ .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem", .prog_type = BPF_PROG_TYPE_SOCK_OPS, }, -{ - "prevent map lookup in xskmap", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_xskmap = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem", - .prog_type = BPF_PROG_TYPE_XDP, -}, { "prevent map lookup in stack trace", .insns = {