diff mbox

[V2,net-next,1/3] ipv6: add the IPV6_FL_F_REFLECT flag to IPV6_FL_A_GET

Message ID 1389785403-6401-1-git-send-email-florent.fourcot@enst-bretagne.fr
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Florent Fourcot Jan. 15, 2014, 11:30 a.m. UTC
With this option, the socket will reply with the flow label value read
on received packets.

The goal is to have a connection with the same flow label in both
direction of the communication.

Signed-off-by: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
---
 include/linux/ipv6.h     |  1 +
 include/uapi/linux/in6.h |  1 +
 net/ipv6/ip6_flowlabel.c | 21 +++++++++++++++++++++
 net/ipv6/tcp_ipv6.c      | 10 ++++++++++
 4 files changed, 33 insertions(+)

Comments

Hannes Frederic Sowa Jan. 15, 2014, 10:47 p.m. UTC | #1
On Wed, Jan 15, 2014 at 12:30:01PM +0100, Florent Fourcot wrote:
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index ffd5fa8..f61bedc 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -483,6 +483,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
>  				    &ireq->ir_v6_rmt_addr);
>  
>  		fl6->daddr = ireq->ir_v6_rmt_addr;
> +		if (np->repflow)
> +			fl6->flowlabel = np->flow_label;
>  		skb_set_queue_mapping(skb, queue_mapping);
>  		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
>  		err = net_xmit_eval(err);
> @@ -1000,6 +1002,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
>  	ireq = inet_rsk(req);
>  	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
>  	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
> +	if (np->repflow)
> +		np->flow_label = ip6_flowlabel(ipv6_hdr(skb));
>  	if (!want_cookie || tmp_opt.tstamp_ok)
>  		TCP_ECN_create_request(req, skb, sock_net(sk));
>  
> @@ -1138,6 +1142,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>  		newnp->mcast_oif   = inet6_iif(skb);
>  		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
>  		newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
> +		if (np->repflow)
> +			newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));

Just asking:

Was there a specific reason you did not use np->flow_label here and just
mirroring the flowlabel from the first packet of the connection for the
whole connection?

I don't know if it makes a difference, but maybe it was done on purpose?

Otherwise looks good.

>  		/*
>  		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
> @@ -1218,6 +1224,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>  	newnp->mcast_oif  = inet6_iif(skb);
>  	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
>  	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
> +	if (np->repflow)
> +		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
>  
>  	/* Clone native IPv6 options from listening socket (if any)
>  
> @@ -1429,6 +1437,8 @@ ipv6_pktoptions:
>  			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
>  		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
>  			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
> +		if (np->repflow)
> +			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
>  		if (ipv6_opt_accepted(sk, opt_skb)) {
>  			skb_set_owner_r(opt_skb, sk);
>  			opt_skb = xchg(&np->pktoptions, opt_skb);
> -- 
> 1.8.5.2

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Frederic Sowa Jan. 16, 2014, 12:35 p.m. UTC | #2
On Wed, Jan 15, 2014 at 11:47:26PM +0100, Hannes Frederic Sowa wrote:
> > @@ -1138,6 +1142,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
> >  		newnp->mcast_oif   = inet6_iif(skb);
> >  		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
> >  		newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
> > +		if (np->repflow)
> > +			newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
> 
> Just asking:
> 
> Was there a specific reason you did not use np->flow_label here and just
> mirroring the flowlabel from the first packet of the connection for the
> whole connection?
> 
> I don't know if it makes a difference, but maybe it was done on purpose?

I thought about it and am actually in favor of reusing the flowid from the syn
packet so userspace does report correct outgoing flowlabel even in case of
strange tcp peer changing it mid-stream.

Thanks,

  Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florent Fourcot Jan. 16, 2014, 12:45 p.m. UTC | #3
>> Was there a specific reason you did not use np->flow_label here and just
>> mirroring the flowlabel from the first packet of the connection for the
>> whole connection?
>>
>> I don't know if it makes a difference, but maybe it was done on purpose?
> 
> I thought about it and am actually in favor of reusing the flowid from the syn
> packet so userspace does report correct outgoing flowlabel even in case of
> strange tcp peer changing it mid-stream.
> 

Actually, the idea was that the remote could changed the flow label
during the lifetime of a connection.
I do not have a strong opinion on that, but in a "reflect" mode, I
except that the last received value will be in used.

Second, in case of SYN cookie, is the SYN flow label stored somewhere?

Thanks,

Florent.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Frederic Sowa Jan. 16, 2014, 1:07 p.m. UTC | #4
On Thu, Jan 16, 2014 at 01:45:18PM +0100, Florent Fourcot wrote:
> 
> >> Was there a specific reason you did not use np->flow_label here and just
> >> mirroring the flowlabel from the first packet of the connection for the
> >> whole connection?
> >>
> >> I don't know if it makes a difference, but maybe it was done on purpose?
> > 
> > I thought about it and am actually in favor of reusing the flowid from the syn
> > packet so userspace does report correct outgoing flowlabel even in case of
> > strange tcp peer changing it mid-stream.
> > 
> 
> Actually, the idea was that the remote could changed the flow label
> during the lifetime of a connection.
> I do not have a strong opinion on that, but in a "reflect" mode, I
> except that the last received value will be in used.

Would it make sense to sync the flowlabel if it changes with the socket, so
user space can query the label really used? Otherwise I wouldn't even report
it.

> Second, in case of SYN cookie, is the SYN flow label stored somewhere?

Should then be synced as soon as the cookie is validated. You can test it by
setting tcp_syncookies to 2. It uses syncookies unconditionally then.

Greetings,

  Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florent Fourcot Jan. 16, 2014, 2:34 p.m. UTC | #5
Le 16/01/2014 14:07, Hannes Frederic Sowa a écrit :
> Would it make sense to sync the flowlabel if it changes with the socket, so
> user space can query the label really used? Otherwise I wouldn't even report
> it.

I see at least one use case. In the same way that openssh sets the
tclass field after the SSH session initialization, a software could set
the label after the beginning of the L7 session (and ignoring QoS
priority for non-interactive packet at the beginning).


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 7e1ded0..1084304 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -191,6 +191,7 @@  struct ipv6_pinfo {
 	/* sockopt flags */
 	__u16			recverr:1,
 	                        sndflow:1,
+				repflow:1,
 				pmtudisc:3,
 				ipv6only:1,
 				srcprefs:3,	/* 001: prefer temporary address
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index f94f1d0..a4359b1 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -85,6 +85,7 @@  struct in6_flowlabel_req {
 
 #define IPV6_FL_F_CREATE	1
 #define IPV6_FL_F_EXCL		2
+#define IPV6_FL_F_REFLECT 	4
 
 #define IPV6_FL_S_NONE		0
 #define IPV6_FL_S_EXCL		1
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index e7fb710..ba23643 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -486,6 +486,11 @@  int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_fl_socklist *sfl;
 
+	if (np->repflow) {
+		freq->flr_label = np->flow_label;
+		return 0;
+	}
+
 	rcu_read_lock_bh();
 
 	for_each_sk_fl_rcu(np, sfl) {
@@ -527,6 +532,15 @@  int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 	switch (freq.flr_action) {
 	case IPV6_FL_A_PUT:
+		if (freq.flr_flags & IPV6_FL_F_REFLECT) {
+			if (sk->sk_protocol != IPPROTO_TCP)
+				return -ENOPROTOOPT;
+			if (!np->repflow)
+				return -ESRCH;
+			np->flow_label = 0;
+			np->repflow = 0;
+			return 0;
+		}
 		spin_lock_bh(&ip6_sk_fl_lock);
 		for (sflp = &np->ipv6_fl_list;
 		     (sfl = rcu_dereference(*sflp))!=NULL;
@@ -567,6 +581,13 @@  int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 		return -ESRCH;
 
 	case IPV6_FL_A_GET:
+		if (freq.flr_flags & IPV6_FL_F_REFLECT) {
+			if (sk->sk_protocol != IPPROTO_TCP)
+				return -ENOPROTOOPT;
+			np->repflow = 1;
+			return 0;
+		}
+
 		if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
 			return -EINVAL;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ffd5fa8..f61bedc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -483,6 +483,8 @@  static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 				    &ireq->ir_v6_rmt_addr);
 
 		fl6->daddr = ireq->ir_v6_rmt_addr;
+		if (np->repflow)
+			fl6->flowlabel = np->flow_label;
 		skb_set_queue_mapping(skb, queue_mapping);
 		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
 		err = net_xmit_eval(err);
@@ -1000,6 +1002,8 @@  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+	if (np->repflow)
+		np->flow_label = ip6_flowlabel(ipv6_hdr(skb));
 	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, skb, sock_net(sk));
 
@@ -1138,6 +1142,8 @@  static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newnp->mcast_oif   = inet6_iif(skb);
 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
 		newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+		if (np->repflow)
+			newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1218,6 +1224,8 @@  static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newnp->mcast_oif  = inet6_iif(skb);
 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+	if (np->repflow)
+		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
 
 	/* Clone native IPv6 options from listening socket (if any)
 
@@ -1429,6 +1437,8 @@  ipv6_pktoptions:
 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
+		if (np->repflow)
+			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);