Message ID | 1519863101-112083-2-git-send-email-u9012063@gmail.com |
---|---|
State | Superseded, archived |
Delegated to: | David Miller |
Headers | show |
Series | gre: add sequence number for collect md mode. | expand |
On 03/01/2018 01:11 AM, William Tu wrote: > Currently GRE sequence number can only be used in native > tunnel mode. This patch adds sequence number support for > gre collect metadata mode. RFC2890 defines GRE sequence > number to be specific to the traffic flow identified by the > key. However, this patch does not implement per-key seqno. > The sequence number is shared in the same tunnel device. > That is, different tunnel keys using the same collect_md > tunnel share single sequence number. > > Signed-off-by: William Tu <u9012063@gmail.com> > --- > include/uapi/linux/bpf.h | 1 + > net/core/filter.c | 4 +++- > net/ipv4/ip_gre.c | 7 +++++-- > net/ipv6/ip6_gre.c | 13 ++++++++----- > 4 files changed, 17 insertions(+), 8 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index db6bdc375126..2c6dd942953d 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -800,6 +800,7 @@ enum bpf_func_id { > /* BPF_FUNC_skb_set_tunnel_key flags. */ > #define BPF_F_ZERO_CSUM_TX (1ULL << 1) > #define BPF_F_DONT_FRAGMENT (1ULL << 2) > +#define BPF_F_GRE_SEQ (1ULL << 3) > > /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and > * BPF_FUNC_perf_event_read_value flags. > diff --git a/net/core/filter.c b/net/core/filter.c > index 0c121adbdbaa..010305e0791a 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -2991,7 +2991,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, > struct ip_tunnel_info *info; > > if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | > - BPF_F_DONT_FRAGMENT))) > + BPF_F_DONT_FRAGMENT | BPF_F_GRE_SEQ))) > return -EINVAL; > if (unlikely(size != sizeof(struct bpf_tunnel_key))) { > switch (size) { > @@ -3025,6 +3025,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, > info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; > if (flags & BPF_F_ZERO_CSUM_TX) > info->key.tun_flags &= ~TUNNEL_CSUM; > + if (flags & BPF_F_GRE_SEQ) > + info->key.tun_flags |= TUNNEL_SEQ; Ok, looks fine. My only minor request would be to rename BPF_F_GRE_SEQ into e.g. BPF_F_SEQ_NUMBER to at least not have something GRE specific in the name in case we could later on reuse it elsewhere as well, and the bpf_skb_set_tunnel_key() is unaware of the underlying encap anyway. > info->key.tun_id = cpu_to_be64(from->tunnel_id); > info->key.tos = from->tunnel_tos; > diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c > index 0fe1d69b5df4..95fd225f402e 100644 > --- a/net/ipv4/ip_gre.c > +++ b/net/ipv4/ip_gre.c > @@ -522,6 +522,7 @@ static struct rtable *prepare_fb_xmit(struct sk_buff *skb, > static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, > __be16 proto) > { > + struct ip_tunnel *tunnel = netdev_priv(dev); > struct ip_tunnel_info *tun_info; > const struct ip_tunnel_key *key; > struct rtable *rt = NULL; > @@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, > if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) > goto err_free_rt; > > - flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); > + flags = tun_info->key.tun_flags & > + (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); > gre_build_header(skb, tunnel_hlen, flags, proto, > - tunnel_id_to_key32(tun_info->key.tun_id), 0); > + tunnel_id_to_key32(tun_info->key.tun_id), > + (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); > > df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; > > diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c > index 4f150a394387..16c5dfcbd195 100644 > --- a/net/ipv6/ip6_gre.c > +++ b/net/ipv6/ip6_gre.c > @@ -695,9 +695,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, > else > fl6->daddr = tunnel->parms.raddr; > > - if (tunnel->parms.o_flags & TUNNEL_SEQ) > - tunnel->o_seqno++; > - > /* Push GRE header. */ > protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; > > @@ -720,14 +717,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, > fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); > > dsfield = key->tos; > - flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); > + flags = key->tun_flags & > + (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); > tunnel->tun_hlen = gre_calc_hlen(flags); > > gre_build_header(skb, tunnel->tun_hlen, > flags, protocol, > - tunnel_id_to_key32(tun_info->key.tun_id), 0); > + tunnel_id_to_key32(tun_info->key.tun_id), > + (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) > + : 0); > > } else { > + if (tunnel->parms.o_flags & TUNNEL_SEQ) > + tunnel->o_seqno++; > + > gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, > protocol, tunnel->parms.o_key, > htonl(tunnel->o_seqno)); >
On Thu, Mar 1, 2018 at 2:18 AM, Daniel Borkmann <daniel@iogearbox.net> wrote: > On 03/01/2018 01:11 AM, William Tu wrote: >> Currently GRE sequence number can only be used in native >> tunnel mode. This patch adds sequence number support for >> gre collect metadata mode. RFC2890 defines GRE sequence >> number to be specific to the traffic flow identified by the >> key. However, this patch does not implement per-key seqno. >> The sequence number is shared in the same tunnel device. >> That is, different tunnel keys using the same collect_md >> tunnel share single sequence number. >> >> Signed-off-by: William Tu <u9012063@gmail.com> >> --- >> include/uapi/linux/bpf.h | 1 + >> net/core/filter.c | 4 +++- >> net/ipv4/ip_gre.c | 7 +++++-- >> net/ipv6/ip6_gre.c | 13 ++++++++----- >> 4 files changed, 17 insertions(+), 8 deletions(-) >> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index db6bdc375126..2c6dd942953d 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -800,6 +800,7 @@ enum bpf_func_id { >> /* BPF_FUNC_skb_set_tunnel_key flags. */ >> #define BPF_F_ZERO_CSUM_TX (1ULL << 1) >> #define BPF_F_DONT_FRAGMENT (1ULL << 2) >> +#define BPF_F_GRE_SEQ (1ULL << 3) >> >> /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and >> * BPF_FUNC_perf_event_read_value flags. >> diff --git a/net/core/filter.c b/net/core/filter.c >> index 0c121adbdbaa..010305e0791a 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -2991,7 +2991,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, >> struct ip_tunnel_info *info; >> >> if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | >> - BPF_F_DONT_FRAGMENT))) >> + BPF_F_DONT_FRAGMENT | BPF_F_GRE_SEQ))) >> return -EINVAL; >> if (unlikely(size != sizeof(struct bpf_tunnel_key))) { >> switch (size) { >> @@ -3025,6 +3025,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, >> info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; >> if (flags & BPF_F_ZERO_CSUM_TX) >> info->key.tun_flags &= ~TUNNEL_CSUM; >> + if (flags & BPF_F_GRE_SEQ) >> + info->key.tun_flags |= TUNNEL_SEQ; > > Ok, looks fine. My only minor request would be to rename BPF_F_GRE_SEQ > into e.g. BPF_F_SEQ_NUMBER to at least not have something GRE specific > in the name in case we could later on reuse it elsewhere as well, and > the bpf_skb_set_tunnel_key() is unaware of the underlying encap anyway. OK, make sense. Thanks! I will rename it in the next version. William
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index db6bdc375126..2c6dd942953d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -800,6 +800,7 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key flags. */ #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) +#define BPF_F_GRE_SEQ (1ULL << 3) /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. diff --git a/net/core/filter.c b/net/core/filter.c index 0c121adbdbaa..010305e0791a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2991,7 +2991,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, struct ip_tunnel_info *info; if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | - BPF_F_DONT_FRAGMENT))) + BPF_F_DONT_FRAGMENT | BPF_F_GRE_SEQ))) return -EINVAL; if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { @@ -3025,6 +3025,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; if (flags & BPF_F_ZERO_CSUM_TX) info->key.tun_flags &= ~TUNNEL_CSUM; + if (flags & BPF_F_GRE_SEQ) + info->key.tun_flags |= TUNNEL_SEQ; info->key.tun_id = cpu_to_be64(from->tunnel_id); info->key.tos = from->tunnel_tos; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0fe1d69b5df4..95fd225f402e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -522,6 +522,7 @@ static struct rtable *prepare_fb_xmit(struct sk_buff *skb, static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { + struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct rtable *rt = NULL; @@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) goto err_free_rt; - flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); + flags = tun_info->key.tun_flags & + (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, - tunnel_id_to_key32(tun_info->key.tun_id), 0); + tunnel_id_to_key32(tun_info->key.tun_id), + (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4f150a394387..16c5dfcbd195 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -695,9 +695,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, else fl6->daddr = tunnel->parms.raddr; - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; - /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; @@ -720,14 +717,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = key->tos; - flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); + flags = key->tun_flags & + (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); tunnel->tun_hlen = gre_calc_hlen(flags); gre_build_header(skb, tunnel->tun_hlen, flags, protocol, - tunnel_id_to_key32(tun_info->key.tun_id), 0); + tunnel_id_to_key32(tun_info->key.tun_id), + (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) + : 0); } else { + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
Currently GRE sequence number can only be used in native tunnel mode. This patch adds sequence number support for gre collect metadata mode. RFC2890 defines GRE sequence number to be specific to the traffic flow identified by the key. However, this patch does not implement per-key seqno. The sequence number is shared in the same tunnel device. That is, different tunnel keys using the same collect_md tunnel share single sequence number. Signed-off-by: William Tu <u9012063@gmail.com> --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 4 +++- net/ipv4/ip_gre.c | 7 +++++-- net/ipv6/ip6_gre.c | 13 ++++++++----- 4 files changed, 17 insertions(+), 8 deletions(-)