Message ID | 279ae3717cb3d03c0ffeb511493c93c450a01e1a.1591108731.git.daniel@iogearbox.net |
---|---|
State | Accepted |
Delegated to: | BPF Maintainers |
Headers | show |
Series | Fix csum unnecessary on bpf_skb_adjust_room | expand |
On Tue, 2 Jun 2020 at 15:58, Daniel Borkmann <daniel@iogearbox.net> wrote: > > Add a bpf_csum_level() helper which BPF programs can use in combination > with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET > flag to the latter to avoid falling back to CHECKSUM_NONE. > > The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels > via BPF_CSUM_LEVEL_{INC,DEC} which calls __skb_{incr,decr}_checksum_unnecessary() > on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the skb's > csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the > current level. Without this helper, there is no way to otherwise adjust the > skb->csum_level. I did not add an extra dummy flags as there is plenty of free > bitspace in level argument itself iff ever needed in future. > > Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> > --- > include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- > net/core/filter.c | 38 ++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- > 3 files changed, 122 insertions(+), 2 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 3ba2bbbed80c..46622901cba7 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3220,6 +3220,38 @@ union bpf_attr { > * calculation. > * Return > * Requested value, or 0, if flags are not recognized. > + * > + * int bpf_csum_level(struct sk_buff *skb, u64 level) u64 flags? We can also stuff things into level I guess. > + * Description > + * Change the skbs checksum level by one layer up or down, or > + * reset it entirely to none in order to have the stack perform > + * checksum validation. The level is applicable to the following > + * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of > + * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | > + * through **bpf_skb_adjust_room**\ () helper with passing in > + * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call > + * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since > + * the UDP header is removed. Similarly, an encap of the latter > + * into the former could be accompanied by a helper call to > + * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the > + * skb is still intended to be processed in higher layers of the > + * stack instead of just egressing at tc. > + * > + * There are three supported level settings at this time: > + * > + * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs > + * with CHECKSUM_UNNECESSARY. > + * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs > + * with CHECKSUM_UNNECESSARY. > + * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and > + * sets CHECKSUM_NONE to force checksum validation by the stack. > + * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current > + * skb->csum_level. > + * Return > + * 0 on success, or a negative error in case of failure. In the > + * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level > + * is returned or the error code -EACCES in case the skb is not > + * subject to CHECKSUM_UNNECESSARY. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -3356,7 +3388,8 @@ union bpf_attr { > FN(ringbuf_reserve), \ > FN(ringbuf_submit), \ > FN(ringbuf_discard), \ > - FN(ringbuf_query), > + FN(ringbuf_query), \ > + FN(csum_level), > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > * function eBPF program intends to call > @@ -3433,6 +3466,14 @@ enum { > BPF_F_CURRENT_NETNS = (-1L), > }; > > +/* BPF_FUNC_csum_level level values. */ > +enum { > + BPF_CSUM_LEVEL_QUERY, > + BPF_CSUM_LEVEL_INC, > + BPF_CSUM_LEVEL_DEC, > + BPF_CSUM_LEVEL_RESET, > +}; > + > /* BPF_FUNC_skb_adjust_room flags. */ > enum { > BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), > diff --git a/net/core/filter.c b/net/core/filter.c > index 278dcc0af961..d01a244b5087 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -2015,6 +2015,40 @@ static const struct bpf_func_proto bpf_csum_update_proto = { > .arg2_type = ARG_ANYTHING, > }; > > +BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level) > +{ > + /* The interface is to be used in combination with bpf_skb_adjust_room() > + * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET > + * is passed as flags, for example. > + */ > + switch (level) { > + case BPF_CSUM_LEVEL_INC: > + __skb_incr_checksum_unnecessary(skb); > + break; > + case BPF_CSUM_LEVEL_DEC: > + __skb_decr_checksum_unnecessary(skb); > + break; > + case BPF_CSUM_LEVEL_RESET: > + __skb_reset_checksum_unnecessary(skb); > + break; > + case BPF_CSUM_LEVEL_QUERY: > + return skb->ip_summed == CHECKSUM_UNNECESSARY ? > + skb->csum_level : -EACCES; > + default: > + return -EINVAL; > + } > + > + return 0; > +} > + > +static const struct bpf_func_proto bpf_csum_level_proto = { > + .func = bpf_csum_level, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_ANYTHING, > +}; > + > static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) > { > return dev_forward_skb(dev, skb); > @@ -6280,6 +6314,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_csum_diff_proto; > case BPF_FUNC_csum_update: > return &bpf_csum_update_proto; > + case BPF_FUNC_csum_level: > + return &bpf_csum_level_proto; > case BPF_FUNC_l3_csum_replace: > return &bpf_l3_csum_replace_proto; > case BPF_FUNC_l4_csum_replace: > @@ -6613,6 +6649,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_skb_store_bytes_proto; > case BPF_FUNC_csum_update: > return &bpf_csum_update_proto; > + case BPF_FUNC_csum_level: > + return &bpf_csum_level_proto; > case BPF_FUNC_l3_csum_replace: > return &bpf_l3_csum_replace_proto; > case BPF_FUNC_l4_csum_replace: > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 3ba2bbbed80c..46622901cba7 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -3220,6 +3220,38 @@ union bpf_attr { > * calculation. > * Return > * Requested value, or 0, if flags are not recognized. > + * > + * int bpf_csum_level(struct sk_buff *skb, u64 level) > + * Description > + * Change the skbs checksum level by one layer up or down, or > + * reset it entirely to none in order to have the stack perform > + * checksum validation. The level is applicable to the following > + * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of > + * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | > + * through **bpf_skb_adjust_room**\ () helper with passing in > + * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call > + * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since > + * the UDP header is removed. Similarly, an encap of the latter > + * into the former could be accompanied by a helper call to > + * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the > + * skb is still intended to be processed in higher layers of the > + * stack instead of just egressing at tc. > + * > + * There are three supported level settings at this time: > + * > + * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs > + * with CHECKSUM_UNNECESSARY. > + * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs > + * with CHECKSUM_UNNECESSARY. > + * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and > + * sets CHECKSUM_NONE to force checksum validation by the stack. > + * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current > + * skb->csum_level. > + * Return > + * 0 on success, or a negative error in case of failure. In the > + * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level > + * is returned or the error code -EACCES in case the skb is not > + * subject to CHECKSUM_UNNECESSARY. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -3356,7 +3388,8 @@ union bpf_attr { > FN(ringbuf_reserve), \ > FN(ringbuf_submit), \ > FN(ringbuf_discard), \ > - FN(ringbuf_query), > + FN(ringbuf_query), \ > + FN(csum_level), > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > * function eBPF program intends to call > @@ -3433,6 +3466,14 @@ enum { > BPF_F_CURRENT_NETNS = (-1L), > }; > > +/* BPF_FUNC_csum_level level values. */ > +enum { > + BPF_CSUM_LEVEL_QUERY, > + BPF_CSUM_LEVEL_INC, > + BPF_CSUM_LEVEL_DEC, > + BPF_CSUM_LEVEL_RESET, > +}; > + > /* BPF_FUNC_skb_adjust_room flags. */ > enum { > BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), > -- > 2.21.0 > Acked-by: Lorenz Bauer <lmb@cloudflare.com>
On 6/2/20 5:19 PM, Lorenz Bauer wrote: > On Tue, 2 Jun 2020 at 15:58, Daniel Borkmann <daniel@iogearbox.net> wrote: >> >> Add a bpf_csum_level() helper which BPF programs can use in combination >> with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET >> flag to the latter to avoid falling back to CHECKSUM_NONE. >> >> The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels >> via BPF_CSUM_LEVEL_{INC,DEC} which calls __skb_{incr,decr}_checksum_unnecessary() >> on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the skb's >> csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the >> current level. Without this helper, there is no way to otherwise adjust the >> skb->csum_level. I did not add an extra dummy flags as there is plenty of free >> bitspace in level argument itself iff ever needed in future. >> >> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> >> --- >> include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- >> net/core/filter.c | 38 ++++++++++++++++++++++++++++++ >> tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- >> 3 files changed, 122 insertions(+), 2 deletions(-) >> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index 3ba2bbbed80c..46622901cba7 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -3220,6 +3220,38 @@ union bpf_attr { >> * calculation. >> * Return >> * Requested value, or 0, if flags are not recognized. >> + * >> + * int bpf_csum_level(struct sk_buff *skb, u64 level) > > u64 flags? We can also stuff things into level I guess. Yeah, I did mention it in the commit log. There is plenty of bit space to extend with flags in there iff ever needed. Originally, helper was called bpf_csum_adjust() but then renamed into bpf_csum_level() to be more 'topic specific' (aka do one thing and do it well...) and avoid future api overloading, so if necessary level can be used since I don't think the enum will be extended much further from what we have here anyway. [...] > > Acked-by: Lorenz Bauer <lmb@cloudflare.com> Thanks!
On Tue, 2 Jun 2020, Daniel Borkmann wrote: > On 6/2/20 5:19 PM, Lorenz Bauer wrote: > > On Tue, 2 Jun 2020 at 15:58, Daniel Borkmann <daniel@iogearbox.net> wrote: > >> > >> Add a bpf_csum_level() helper which BPF programs can use in combination > >> with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET > >> flag to the latter to avoid falling back to CHECKSUM_NONE. > >> > >> The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels > >> via BPF_CSUM_LEVEL_{INC,DEC} which calls > >> __skb_{incr,decr}_checksum_unnecessary() > >> on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the > >> skb's > >> csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the > >> current level. Without this helper, there is no way to otherwise adjust the > >> skb->csum_level. I did not add an extra dummy flags as there is plenty of > >> free > >> bitspace in level argument itself iff ever needed in future. > >> > >> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> > >> --- > >> include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- > >> net/core/filter.c | 38 ++++++++++++++++++++++++++++++ > >> tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- > >> 3 files changed, 122 insertions(+), 2 deletions(-) > >> > >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > >> index 3ba2bbbed80c..46622901cba7 100644 > >> --- a/include/uapi/linux/bpf.h > >> +++ b/include/uapi/linux/bpf.h > >> @@ -3220,6 +3220,38 @@ union bpf_attr { > >> * calculation. > >> * Return > >> * Requested value, or 0, if flags are not recognized. > >> + * > >> + * int bpf_csum_level(struct sk_buff *skb, u64 level) > > > > u64 flags? We can also stuff things into level I guess. > > Yeah, I did mention it in the commit log. There is plenty of bit space to > extend > with flags in there iff ever needed. Originally, helper was called > bpf_csum_adjust() > but then renamed into bpf_csum_level() to be more 'topic specific' (aka do one > thing > and do it well...) and avoid future api overloading, so if necessary level can > be > used since I don't think the enum will be extended much further from what we > have > here anyway. > > [...] > > > > Acked-by: Lorenz Bauer <lmb@cloudflare.com> > Looks great! The only thing that gave me pause was the -EACCES return value for the case where we query and the skb is not subject to CHECKSUM_UNNECESSESARY ; -ENOENT ("no such level") feels slightly closer to the situation to me but either is a reasonable choice I think. Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
On 6/2/20 6:41 PM, Alan Maguire wrote: > On Tue, 2 Jun 2020, Daniel Borkmann wrote: >> On 6/2/20 5:19 PM, Lorenz Bauer wrote: >>> On Tue, 2 Jun 2020 at 15:58, Daniel Borkmann <daniel@iogearbox.net> wrote: >>>> >>>> Add a bpf_csum_level() helper which BPF programs can use in combination >>>> with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET >>>> flag to the latter to avoid falling back to CHECKSUM_NONE. >>>> >>>> The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels >>>> via BPF_CSUM_LEVEL_{INC,DEC} which calls >>>> __skb_{incr,decr}_checksum_unnecessary() >>>> on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the >>>> skb's >>>> csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the >>>> current level. Without this helper, there is no way to otherwise adjust the >>>> skb->csum_level. I did not add an extra dummy flags as there is plenty of >>>> free >>>> bitspace in level argument itself iff ever needed in future. >>>> >>>> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> >>>> --- >>>> include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- >>>> net/core/filter.c | 38 ++++++++++++++++++++++++++++++ >>>> tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- >>>> 3 files changed, 122 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >>>> index 3ba2bbbed80c..46622901cba7 100644 >>>> --- a/include/uapi/linux/bpf.h >>>> +++ b/include/uapi/linux/bpf.h >>>> @@ -3220,6 +3220,38 @@ union bpf_attr { >>>> * calculation. >>>> * Return >>>> * Requested value, or 0, if flags are not recognized. >>>> + * >>>> + * int bpf_csum_level(struct sk_buff *skb, u64 level) >>> >>> u64 flags? We can also stuff things into level I guess. >> >> Yeah, I did mention it in the commit log. There is plenty of bit space to >> extend >> with flags in there iff ever needed. Originally, helper was called >> bpf_csum_adjust() >> but then renamed into bpf_csum_level() to be more 'topic specific' (aka do one >> thing >> and do it well...) and avoid future api overloading, so if necessary level can >> be >> used since I don't think the enum will be extended much further from what we >> have >> here anyway. >> >> [...] >>> >>> Acked-by: Lorenz Bauer <lmb@cloudflare.com> > > Looks great! The only thing that gave me pause was > the -EACCES return value for the case where we query > and the skb is not subject to CHECKSUM_UNNECESSESARY ; > -ENOENT ("no such level") feels slightly closer to the > situation to me but either is a reasonable choice I think. My thinking was in the line of 'error since we cannot access skb->csum_level for the given skb->ip_summed'. I don't feel strong about which code it is either way though; important thing is that it is documented & distinguishable from other errors, so that the program has a way to make sense of the data returned by BPF_CSUM_LEVEL_QUERY. > Reviewed-by: Alan Maguire <alan.maguire@oracle.com> Thanks! Daniel
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3ba2bbbed80c..46622901cba7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3220,6 +3220,38 @@ union bpf_attr { * calculation. * Return * Requested value, or 0, if flags are not recognized. + * + * int bpf_csum_level(struct sk_buff *skb, u64 level) + * Description + * Change the skbs checksum level by one layer up or down, or + * reset it entirely to none in order to have the stack perform + * checksum validation. The level is applicable to the following + * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of + * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | + * through **bpf_skb_adjust_room**\ () helper with passing in + * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call + * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since + * the UDP header is removed. Similarly, an encap of the latter + * into the former could be accompanied by a helper call to + * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the + * skb is still intended to be processed in higher layers of the + * stack instead of just egressing at tc. + * + * There are three supported level settings at this time: + * + * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs + * with CHECKSUM_UNNECESSARY. + * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs + * with CHECKSUM_UNNECESSARY. + * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and + * sets CHECKSUM_NONE to force checksum validation by the stack. + * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current + * skb->csum_level. + * Return + * 0 on success, or a negative error in case of failure. In the + * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level + * is returned or the error code -EACCES in case the skb is not + * subject to CHECKSUM_UNNECESSARY. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3356,7 +3388,8 @@ union bpf_attr { FN(ringbuf_reserve), \ FN(ringbuf_submit), \ FN(ringbuf_discard), \ - FN(ringbuf_query), + FN(ringbuf_query), \ + FN(csum_level), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3433,6 +3466,14 @@ enum { BPF_F_CURRENT_NETNS = (-1L), }; +/* BPF_FUNC_csum_level level values. */ +enum { + BPF_CSUM_LEVEL_QUERY, + BPF_CSUM_LEVEL_INC, + BPF_CSUM_LEVEL_DEC, + BPF_CSUM_LEVEL_RESET, +}; + /* BPF_FUNC_skb_adjust_room flags. */ enum { BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), diff --git a/net/core/filter.c b/net/core/filter.c index 278dcc0af961..d01a244b5087 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2015,6 +2015,40 @@ static const struct bpf_func_proto bpf_csum_update_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level) +{ + /* The interface is to be used in combination with bpf_skb_adjust_room() + * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET + * is passed as flags, for example. + */ + switch (level) { + case BPF_CSUM_LEVEL_INC: + __skb_incr_checksum_unnecessary(skb); + break; + case BPF_CSUM_LEVEL_DEC: + __skb_decr_checksum_unnecessary(skb); + break; + case BPF_CSUM_LEVEL_RESET: + __skb_reset_checksum_unnecessary(skb); + break; + case BPF_CSUM_LEVEL_QUERY: + return skb->ip_summed == CHECKSUM_UNNECESSARY ? + skb->csum_level : -EACCES; + default: + return -EINVAL; + } + + return 0; +} + +static const struct bpf_func_proto bpf_csum_level_proto = { + .func = bpf_csum_level, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) { return dev_forward_skb(dev, skb); @@ -6280,6 +6314,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_csum_diff_proto; case BPF_FUNC_csum_update: return &bpf_csum_update_proto; + case BPF_FUNC_csum_level: + return &bpf_csum_level_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: @@ -6613,6 +6649,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_store_bytes_proto; case BPF_FUNC_csum_update: return &bpf_csum_update_proto; + case BPF_FUNC_csum_level: + return &bpf_csum_level_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3ba2bbbed80c..46622901cba7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3220,6 +3220,38 @@ union bpf_attr { * calculation. * Return * Requested value, or 0, if flags are not recognized. + * + * int bpf_csum_level(struct sk_buff *skb, u64 level) + * Description + * Change the skbs checksum level by one layer up or down, or + * reset it entirely to none in order to have the stack perform + * checksum validation. The level is applicable to the following + * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of + * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | + * through **bpf_skb_adjust_room**\ () helper with passing in + * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call + * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since + * the UDP header is removed. Similarly, an encap of the latter + * into the former could be accompanied by a helper call to + * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the + * skb is still intended to be processed in higher layers of the + * stack instead of just egressing at tc. + * + * There are three supported level settings at this time: + * + * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs + * with CHECKSUM_UNNECESSARY. + * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs + * with CHECKSUM_UNNECESSARY. + * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and + * sets CHECKSUM_NONE to force checksum validation by the stack. + * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current + * skb->csum_level. + * Return + * 0 on success, or a negative error in case of failure. In the + * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level + * is returned or the error code -EACCES in case the skb is not + * subject to CHECKSUM_UNNECESSARY. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3356,7 +3388,8 @@ union bpf_attr { FN(ringbuf_reserve), \ FN(ringbuf_submit), \ FN(ringbuf_discard), \ - FN(ringbuf_query), + FN(ringbuf_query), \ + FN(csum_level), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3433,6 +3466,14 @@ enum { BPF_F_CURRENT_NETNS = (-1L), }; +/* BPF_FUNC_csum_level level values. */ +enum { + BPF_CSUM_LEVEL_QUERY, + BPF_CSUM_LEVEL_INC, + BPF_CSUM_LEVEL_DEC, + BPF_CSUM_LEVEL_RESET, +}; + /* BPF_FUNC_skb_adjust_room flags. */ enum { BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
Add a bpf_csum_level() helper which BPF programs can use in combination with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET flag to the latter to avoid falling back to CHECKSUM_NONE. The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels via BPF_CSUM_LEVEL_{INC,DEC} which calls __skb_{incr,decr}_checksum_unnecessary() on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the skb's csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the current level. Without this helper, there is no way to otherwise adjust the skb->csum_level. I did not add an extra dummy flags as there is plenty of free bitspace in level argument itself iff ever needed in future. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> --- include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- net/core/filter.c | 38 ++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++- 3 files changed, 122 insertions(+), 2 deletions(-)