diff mbox series

[net-next] net: openvswitch: Set OvS recirc_id from tc chain index

Message ID 1565179722-22488-1-git-send-email-paulb@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [net-next] net: openvswitch: Set OvS recirc_id from tc chain index | expand

Commit Message

Paul Blakey Aug. 7, 2019, 12:08 p.m. UTC
Offloaded OvS datapath rules are translated one to one to tc rules,
for example the following simplified OvS rule:

recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)

Will be translated to the following tc rule:

$ tc filter add dev dev1 ingress \
	    prio 1 chain 0 proto ip \
		flower tcp ct_state -trk \
		action ct pipe \
		action goto chain 2

Received packets will first travel though tc, and if they aren't stolen
by it, like in the above rule, they will continue to OvS datapath.
Since we already did some actions (action ct in this case) which might
modify the packets, and updated action stats, we would like to continue
the proccessing with the correct recirc_id in OvS (here recirc_id(2))
where we left off.

To support this, introduce a new skb extension for tc, which
will be used for translating tc chain to ovs recirc_id to
handle these miss cases. Last tc chain index will be set
by tc goto chain action and read by OvS datapath.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 include/linux/skbuff.h    | 13 +++++++++++++
 include/net/sch_generic.h |  5 ++++-
 net/core/skbuff.c         |  6 ++++++
 net/openvswitch/flow.c    |  9 +++++++++
 net/sched/Kconfig         | 13 +++++++++++++
 net/sched/act_api.c       |  1 +
 net/sched/cls_api.c       | 12 ++++++++++++
 7 files changed, 58 insertions(+), 1 deletion(-)

Comments

Marcelo Ricardo Leitner Aug. 7, 2019, 3 p.m. UTC | #1
On Wed, Aug 07, 2019 at 03:08:42PM +0300, Paul Blakey wrote:
> Offloaded OvS datapath rules are translated one to one to tc rules,
> for example the following simplified OvS rule:
> 
> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
> 
> Will be translated to the following tc rule:
> 
> $ tc filter add dev dev1 ingress \
> 	    prio 1 chain 0 proto ip \
> 		flower tcp ct_state -trk \
> 		action ct pipe \
> 		action goto chain 2
> 
> Received packets will first travel though tc, and if they aren't stolen
> by it, like in the above rule, they will continue to OvS datapath.
> Since we already did some actions (action ct in this case) which might
> modify the packets, and updated action stats, we would like to continue
> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
> where we left off.
> 
> To support this, introduce a new skb extension for tc, which
> will be used for translating tc chain to ovs recirc_id to
> handle these miss cases. Last tc chain index will be set
> by tc goto chain action and read by OvS datapath.
> 
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

> ---
>  include/linux/skbuff.h    | 13 +++++++++++++
>  include/net/sch_generic.h |  5 ++++-
>  net/core/skbuff.c         |  6 ++++++
>  net/openvswitch/flow.c    |  9 +++++++++
>  net/sched/Kconfig         | 13 +++++++++++++
>  net/sched/act_api.c       |  1 +
>  net/sched/cls_api.c       | 12 ++++++++++++
>  7 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 3aef8d8..fb2a792 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -279,6 +279,16 @@ struct nf_bridge_info {
>  };
>  #endif
>  
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +/* Chain in tc_skb_ext will be used to share the tc chain with
> + * ovs recirc_id. It will be set to the current chain by tc
> + * and read by ovs to recirc_id.
> + */
> +struct tc_skb_ext {
> +	__u32 chain;
> +};
> +#endif
> +
>  struct sk_buff_head {
>  	/* These two members must be first. */
>  	struct sk_buff	*next;
> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
>  #ifdef CONFIG_XFRM
>  	SKB_EXT_SEC_PATH,
>  #endif
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +	TC_SKB_EXT,
> +#endif
>  	SKB_EXT_NUM, /* must be last */
>  };
>  
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 6b6b012..871feea 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -275,7 +275,10 @@ struct tcf_result {
>  			unsigned long	class;
>  			u32		classid;
>  		};
> -		const struct tcf_proto *goto_tp;
> +		struct {
> +			const struct tcf_proto *goto_tp;
> +			u32 goto_index;
> +		};
>  
>  		/* used in the skb_tc_reinsert function */
>  		struct {
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index ea8e8d3..2b40b5a 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  #ifdef CONFIG_XFRM
>  	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
>  #endif
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +	[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
> +#endif
>  };
>  
>  static __always_inline unsigned int skb_ext_total_length(void)
> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
>  #ifdef CONFIG_XFRM
>  		skb_ext_type_len[SKB_EXT_SEC_PATH] +
>  #endif
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +		skb_ext_type_len[TC_SKB_EXT] +
> +#endif
>  		0;
>  }
>  
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index bc89e16..0287ead 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
>  int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>  			 struct sk_buff *skb, struct sw_flow_key *key)
>  {
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +	struct tc_skb_ext *tc_ext;
> +#endif
>  	int res, err;
>  
>  	/* Extract metadata from packet. */
> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>  	if (res < 0)
>  		return res;
>  	key->mac_proto = res;
> +
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +	tc_ext = skb_ext_find(skb, TC_SKB_EXT);
> +	key->recirc_id = tc_ext ? tc_ext->chain : 0;
> +#else
>  	key->recirc_id = 0;
> +#endif
>  
>  	err = key_extract(skb, key);
>  	if (!err)
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index afd2ba1..b3faafe 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX
>          tristate "Support to encoding decoding skb tcindex on IFE action"
>          depends on NET_ACT_IFE
>  
> +config NET_TC_SKB_EXT
> +	bool "TC recirculation support"
> +	depends on NET_CLS_ACT
> +	default y if NET_CLS_ACT
> +	select SKB_EXTENSIONS
> +
> +	help
> +	  Say Y here to allow tc chain misses to continue in OvS datapath in
> +	  the correct recirc_id, and hardware chain misses to continue in
> +	  the correct chain in tc software datapath.
> +
> +	  Say N here if you won't be using tc<->ovs offload or tc chains offload.
> +
>  endif # NET_SCHED
>  
>  config NET_SCH_FIFO
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index 3397122..c393604 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -27,6 +27,7 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
>  {
>  	const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
>  
> +	res->goto_index = chain->index;
>  	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
>  }
>  
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 3565d9a..b0b829a 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -1660,6 +1660,18 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>  			goto reset;
>  		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
>  			first_tp = res->goto_tp;
> +
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +			{
> +				struct tc_skb_ext *ext;
> +
> +				ext = skb_ext_add(skb, TC_SKB_EXT);
> +				if (WARN_ON_ONCE(!ext))
> +					return TC_ACT_SHOT;
> +
> +				ext->chain = res->goto_index;
> +			}
> +#endif
>  			goto reset;
>  		}
>  #endif
> -- 
> 1.8.3.1
>
Paul Blakey Aug. 8, 2019, 6:39 a.m. UTC | #2
On 8/7/2019 6:00 PM, Marcelo Ricardo Leitner wrote:
> On Wed, Aug 07, 2019 at 03:08:42PM +0300, Paul Blakey wrote:
>> Offloaded OvS datapath rules are translated one to one to tc rules,
>> for example the following simplified OvS rule:
>>
>> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
>>
>> Will be translated to the following tc rule:
>>
>> $ tc filter add dev dev1 ingress \
>> 	    prio 1 chain 0 proto ip \
>> 		flower tcp ct_state -trk \
>> 		action ct pipe \
>> 		action goto chain 2
>>
>> Received packets will first travel though tc, and if they aren't stolen
>> by it, like in the above rule, they will continue to OvS datapath.
>> Since we already did some actions (action ct in this case) which might
>> modify the packets, and updated action stats, we would like to continue
>> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
>> where we left off.
>>
>> To support this, introduce a new skb extension for tc, which
>> will be used for translating tc chain to ovs recirc_id to
>> handle these miss cases. Last tc chain index will be set
>> by tc goto chain action and read by OvS datapath.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>> Acked-by: Jiri Pirko <jiri@mellanox.com>
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

Thanks!


>> ---
>>   include/linux/skbuff.h    | 13 +++++++++++++
>>   include/net/sch_generic.h |  5 ++++-
>>   net/core/skbuff.c         |  6 ++++++
>>   net/openvswitch/flow.c    |  9 +++++++++
>>   net/sched/Kconfig         | 13 +++++++++++++
>>   net/sched/act_api.c       |  1 +
>>   net/sched/cls_api.c       | 12 ++++++++++++
>>   7 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 3aef8d8..fb2a792 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -279,6 +279,16 @@ struct nf_bridge_info {
>>   };
>>   #endif
>>   
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +/* Chain in tc_skb_ext will be used to share the tc chain with
>> + * ovs recirc_id. It will be set to the current chain by tc
>> + * and read by ovs to recirc_id.
>> + */
>> +struct tc_skb_ext {
>> +	__u32 chain;
>> +};
>> +#endif
>> +
>>   struct sk_buff_head {
>>   	/* These two members must be first. */
>>   	struct sk_buff	*next;
>> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
>>   #ifdef CONFIG_XFRM
>>   	SKB_EXT_SEC_PATH,
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +	TC_SKB_EXT,
>> +#endif
>>   	SKB_EXT_NUM, /* must be last */
>>   };
>>   
>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> index 6b6b012..871feea 100644
>> --- a/include/net/sch_generic.h
>> +++ b/include/net/sch_generic.h
>> @@ -275,7 +275,10 @@ struct tcf_result {
>>   			unsigned long	class;
>>   			u32		classid;
>>   		};
>> -		const struct tcf_proto *goto_tp;
>> +		struct {
>> +			const struct tcf_proto *goto_tp;
>> +			u32 goto_index;
>> +		};
>>   
>>   		/* used in the skb_tc_reinsert function */
>>   		struct {
>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index ea8e8d3..2b40b5a 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>>   #ifdef CONFIG_XFRM
>>   	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +	[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
>> +#endif
>>   };
>>   
>>   static __always_inline unsigned int skb_ext_total_length(void)
>> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
>>   #ifdef CONFIG_XFRM
>>   		skb_ext_type_len[SKB_EXT_SEC_PATH] +
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +		skb_ext_type_len[TC_SKB_EXT] +
>> +#endif
>>   		0;
>>   }
>>   
>> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
>> index bc89e16..0287ead 100644
>> --- a/net/openvswitch/flow.c
>> +++ b/net/openvswitch/flow.c
>> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
>>   int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>   			 struct sk_buff *skb, struct sw_flow_key *key)
>>   {
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +	struct tc_skb_ext *tc_ext;
>> +#endif
>>   	int res, err;
>>   
>>   	/* Extract metadata from packet. */
>> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>   	if (res < 0)
>>   		return res;
>>   	key->mac_proto = res;
>> +
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +	tc_ext = skb_ext_find(skb, TC_SKB_EXT);
>> +	key->recirc_id = tc_ext ? tc_ext->chain : 0;
>> +#else
>>   	key->recirc_id = 0;
>> +#endif
>>   
>>   	err = key_extract(skb, key);
>>   	if (!err)
>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>> index afd2ba1..b3faafe 100644
>> --- a/net/sched/Kconfig
>> +++ b/net/sched/Kconfig
>> @@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX
>>           tristate "Support to encoding decoding skb tcindex on IFE action"
>>           depends on NET_ACT_IFE
>>   
>> +config NET_TC_SKB_EXT
>> +	bool "TC recirculation support"
>> +	depends on NET_CLS_ACT
>> +	default y if NET_CLS_ACT
>> +	select SKB_EXTENSIONS
>> +
>> +	help
>> +	  Say Y here to allow tc chain misses to continue in OvS datapath in
>> +	  the correct recirc_id, and hardware chain misses to continue in
>> +	  the correct chain in tc software datapath.
>> +
>> +	  Say N here if you won't be using tc<->ovs offload or tc chains offload.
>> +
>>   endif # NET_SCHED
>>   
>>   config NET_SCH_FIFO
>> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>> index 3397122..c393604 100644
>> --- a/net/sched/act_api.c
>> +++ b/net/sched/act_api.c
>> @@ -27,6 +27,7 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
>>   {
>>   	const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
>>   
>> +	res->goto_index = chain->index;
>>   	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
>>   }
>>   
>> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
>> index 3565d9a..b0b829a 100644
>> --- a/net/sched/cls_api.c
>> +++ b/net/sched/cls_api.c
>> @@ -1660,6 +1660,18 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>>   			goto reset;
>>   		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
>>   			first_tp = res->goto_tp;
>> +
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +			{
>> +				struct tc_skb_ext *ext;
>> +
>> +				ext = skb_ext_add(skb, TC_SKB_EXT);
>> +				if (WARN_ON_ONCE(!ext))
>> +					return TC_ACT_SHOT;
>> +
>> +				ext->chain = res->goto_index;
>> +			}
>> +#endif
>>   			goto reset;
>>   		}
>>   #endif
>> -- 
>> 1.8.3.1
>>
Pravin Shelar Aug. 8, 2019, 8:53 p.m. UTC | #3
On Wed, Aug 7, 2019 at 5:08 AM Paul Blakey <paulb@mellanox.com> wrote:
>
> Offloaded OvS datapath rules are translated one to one to tc rules,
> for example the following simplified OvS rule:
>
> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
>
> Will be translated to the following tc rule:
>
> $ tc filter add dev dev1 ingress \
>             prio 1 chain 0 proto ip \
>                 flower tcp ct_state -trk \
>                 action ct pipe \
>                 action goto chain 2
>
> Received packets will first travel though tc, and if they aren't stolen
> by it, like in the above rule, they will continue to OvS datapath.
> Since we already did some actions (action ct in this case) which might
> modify the packets, and updated action stats, we would like to continue
> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
> where we left off.
>
> To support this, introduce a new skb extension for tc, which
> will be used for translating tc chain to ovs recirc_id to
> handle these miss cases. Last tc chain index will be set
> by tc goto chain action and read by OvS datapath.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>
> ---
>  include/linux/skbuff.h    | 13 +++++++++++++
>  include/net/sch_generic.h |  5 ++++-
>  net/core/skbuff.c         |  6 ++++++
>  net/openvswitch/flow.c    |  9 +++++++++
>  net/sched/Kconfig         | 13 +++++++++++++
>  net/sched/act_api.c       |  1 +
>  net/sched/cls_api.c       | 12 ++++++++++++
>  7 files changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 3aef8d8..fb2a792 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -279,6 +279,16 @@ struct nf_bridge_info {
>  };
>  #endif
>
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +/* Chain in tc_skb_ext will be used to share the tc chain with
> + * ovs recirc_id. It will be set to the current chain by tc
> + * and read by ovs to recirc_id.
> + */
> +struct tc_skb_ext {
> +       __u32 chain;
> +};
> +#endif
> +
>  struct sk_buff_head {
>         /* These two members must be first. */
>         struct sk_buff  *next;
> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
>  #ifdef CONFIG_XFRM
>         SKB_EXT_SEC_PATH,
>  #endif
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +       TC_SKB_EXT,
> +#endif
>         SKB_EXT_NUM, /* must be last */
>  };
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 6b6b012..871feea 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -275,7 +275,10 @@ struct tcf_result {
>                         unsigned long   class;
>                         u32             classid;
>                 };
> -               const struct tcf_proto *goto_tp;
> +               struct {
> +                       const struct tcf_proto *goto_tp;
> +                       u32 goto_index;
> +               };
>
>                 /* used in the skb_tc_reinsert function */
>                 struct {
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index ea8e8d3..2b40b5a 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  #ifdef CONFIG_XFRM
>         [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
>  #endif
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +       [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
> +#endif
>  };
>
>  static __always_inline unsigned int skb_ext_total_length(void)
> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
>  #ifdef CONFIG_XFRM
>                 skb_ext_type_len[SKB_EXT_SEC_PATH] +
>  #endif
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +               skb_ext_type_len[TC_SKB_EXT] +
> +#endif
>                 0;
>  }
>
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index bc89e16..0287ead 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
>  int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>                          struct sk_buff *skb, struct sw_flow_key *key)
>  {
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +       struct tc_skb_ext *tc_ext;
> +#endif
>         int res, err;
>
>         /* Extract metadata from packet. */
> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>         if (res < 0)
>                 return res;
>         key->mac_proto = res;
> +
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +       tc_ext = skb_ext_find(skb, TC_SKB_EXT);
> +       key->recirc_id = tc_ext ? tc_ext->chain : 0;
> +#else
>         key->recirc_id = 0;
> +#endif
>
Most of cases the config would be turned on, so the ifdef is not that
useful. Can you add static key to avoid searching the skb-ext in non
offload cases.
Paul Blakey Aug. 11, 2019, 10:46 a.m. UTC | #4
On 8/8/2019 11:53 PM, Pravin Shelar wrote:
> On Wed, Aug 7, 2019 at 5:08 AM Paul Blakey <paulb@mellanox.com> wrote:
>> Offloaded OvS datapath rules are translated one to one to tc rules,
>> for example the following simplified OvS rule:
>>
>> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
>>
>> Will be translated to the following tc rule:
>>
>> $ tc filter add dev dev1 ingress \
>>              prio 1 chain 0 proto ip \
>>                  flower tcp ct_state -trk \
>>                  action ct pipe \
>>                  action goto chain 2
>>
>> Received packets will first travel though tc, and if they aren't stolen
>> by it, like in the above rule, they will continue to OvS datapath.
>> Since we already did some actions (action ct in this case) which might
>> modify the packets, and updated action stats, we would like to continue
>> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
>> where we left off.
>>
>> To support this, introduce a new skb extension for tc, which
>> will be used for translating tc chain to ovs recirc_id to
>> handle these miss cases. Last tc chain index will be set
>> by tc goto chain action and read by OvS datapath.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>> Acked-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>>   include/linux/skbuff.h    | 13 +++++++++++++
>>   include/net/sch_generic.h |  5 ++++-
>>   net/core/skbuff.c         |  6 ++++++
>>   net/openvswitch/flow.c    |  9 +++++++++
>>   net/sched/Kconfig         | 13 +++++++++++++
>>   net/sched/act_api.c       |  1 +
>>   net/sched/cls_api.c       | 12 ++++++++++++
>>   7 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 3aef8d8..fb2a792 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -279,6 +279,16 @@ struct nf_bridge_info {
>>   };
>>   #endif
>>
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +/* Chain in tc_skb_ext will be used to share the tc chain with
>> + * ovs recirc_id. It will be set to the current chain by tc
>> + * and read by ovs to recirc_id.
>> + */
>> +struct tc_skb_ext {
>> +       __u32 chain;
>> +};
>> +#endif
>> +
>>   struct sk_buff_head {
>>          /* These two members must be first. */
>>          struct sk_buff  *next;
>> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
>>   #ifdef CONFIG_XFRM
>>          SKB_EXT_SEC_PATH,
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       TC_SKB_EXT,
>> +#endif
>>          SKB_EXT_NUM, /* must be last */
>>   };
>>
>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> index 6b6b012..871feea 100644
>> --- a/include/net/sch_generic.h
>> +++ b/include/net/sch_generic.h
>> @@ -275,7 +275,10 @@ struct tcf_result {
>>                          unsigned long   class;
>>                          u32             classid;
>>                  };
>> -               const struct tcf_proto *goto_tp;
>> +               struct {
>> +                       const struct tcf_proto *goto_tp;
>> +                       u32 goto_index;
>> +               };
>>
>>                  /* used in the skb_tc_reinsert function */
>>                  struct {
>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index ea8e8d3..2b40b5a 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>>   #ifdef CONFIG_XFRM
>>          [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
>> +#endif
>>   };
>>
>>   static __always_inline unsigned int skb_ext_total_length(void)
>> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
>>   #ifdef CONFIG_XFRM
>>                  skb_ext_type_len[SKB_EXT_SEC_PATH] +
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +               skb_ext_type_len[TC_SKB_EXT] +
>> +#endif
>>                  0;
>>   }
>>
>> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
>> index bc89e16..0287ead 100644
>> --- a/net/openvswitch/flow.c
>> +++ b/net/openvswitch/flow.c
>> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
>>   int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>                           struct sk_buff *skb, struct sw_flow_key *key)
>>   {
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       struct tc_skb_ext *tc_ext;
>> +#endif
>>          int res, err;
>>
>>          /* Extract metadata from packet. */
>> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>          if (res < 0)
>>                  return res;
>>          key->mac_proto = res;
>> +
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       tc_ext = skb_ext_find(skb, TC_SKB_EXT);
>> +       key->recirc_id = tc_ext ? tc_ext->chain : 0;
>> +#else
>>          key->recirc_id = 0;
>> +#endif
>>
> Most of cases the config would be turned on, so the ifdef is not that
> useful. Can you add static key to avoid searching the skb-ext in non
> offload cases.

Hi,

What do you mean by a static key?
Paul Blakey Aug. 11, 2019, 10:48 a.m. UTC | #5
On 8/8/2019 11:53 PM, Pravin Shelar wrote:
> On Wed, Aug 7, 2019 at 5:08 AM Paul Blakey <paulb@mellanox.com> wrote:
>> Offloaded OvS datapath rules are translated one to one to tc rules,
>> for example the following simplified OvS rule:
>>
>> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
>>
>> Will be translated to the following tc rule:
>>
>> $ tc filter add dev dev1 ingress \
>>              prio 1 chain 0 proto ip \
>>                  flower tcp ct_state -trk \
>>                  action ct pipe \
>>                  action goto chain 2
>>
>> Received packets will first travel though tc, and if they aren't stolen
>> by it, like in the above rule, they will continue to OvS datapath.
>> Since we already did some actions (action ct in this case) which might
>> modify the packets, and updated action stats, we would like to continue
>> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
>> where we left off.
>>
>> To support this, introduce a new skb extension for tc, which
>> will be used for translating tc chain to ovs recirc_id to
>> handle these miss cases. Last tc chain index will be set
>> by tc goto chain action and read by OvS datapath.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>> Acked-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>>   include/linux/skbuff.h    | 13 +++++++++++++
>>   include/net/sch_generic.h |  5 ++++-
>>   net/core/skbuff.c         |  6 ++++++
>>   net/openvswitch/flow.c    |  9 +++++++++
>>   net/sched/Kconfig         | 13 +++++++++++++
>>   net/sched/act_api.c       |  1 +
>>   net/sched/cls_api.c       | 12 ++++++++++++
>>   7 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 3aef8d8..fb2a792 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -279,6 +279,16 @@ struct nf_bridge_info {
>>   };
>>   #endif
>>
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +/* Chain in tc_skb_ext will be used to share the tc chain with
>> + * ovs recirc_id. It will be set to the current chain by tc
>> + * and read by ovs to recirc_id.
>> + */
>> +struct tc_skb_ext {
>> +       __u32 chain;
>> +};
>> +#endif
>> +
>>   struct sk_buff_head {
>>          /* These two members must be first. */
>>          struct sk_buff  *next;
>> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
>>   #ifdef CONFIG_XFRM
>>          SKB_EXT_SEC_PATH,
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       TC_SKB_EXT,
>> +#endif
>>          SKB_EXT_NUM, /* must be last */
>>   };
>>
>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> index 6b6b012..871feea 100644
>> --- a/include/net/sch_generic.h
>> +++ b/include/net/sch_generic.h
>> @@ -275,7 +275,10 @@ struct tcf_result {
>>                          unsigned long   class;
>>                          u32             classid;
>>                  };
>> -               const struct tcf_proto *goto_tp;
>> +               struct {
>> +                       const struct tcf_proto *goto_tp;
>> +                       u32 goto_index;
>> +               };
>>
>>                  /* used in the skb_tc_reinsert function */
>>                  struct {
>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index ea8e8d3..2b40b5a 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>>   #ifdef CONFIG_XFRM
>>          [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
>> +#endif
>>   };
>>
>>   static __always_inline unsigned int skb_ext_total_length(void)
>> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
>>   #ifdef CONFIG_XFRM
>>                  skb_ext_type_len[SKB_EXT_SEC_PATH] +
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +               skb_ext_type_len[TC_SKB_EXT] +
>> +#endif
>>                  0;
>>   }
>>
>> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
>> index bc89e16..0287ead 100644
>> --- a/net/openvswitch/flow.c
>> +++ b/net/openvswitch/flow.c
>> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
>>   int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>                           struct sk_buff *skb, struct sw_flow_key *key)
>>   {
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       struct tc_skb_ext *tc_ext;
>> +#endif
>>          int res, err;
>>
>>          /* Extract metadata from packet. */
>> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>          if (res < 0)
>>                  return res;
>>          key->mac_proto = res;
>> +
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       tc_ext = skb_ext_find(skb, TC_SKB_EXT);
>> +       key->recirc_id = tc_ext ? tc_ext->chain : 0;
>> +#else
>>          key->recirc_id = 0;
>> +#endif
>>
> Most of cases the config would be turned on, so the ifdef is not that
> useful. Can you add static key to avoid searching the skb-ext in non
> offload cases.


Also regarding the ifdefs, yeah , I need them unless I always define the 
enum, which other extension don't do.
Pravin Shelar Aug. 12, 2019, 4:18 p.m. UTC | #6
On Sun, Aug 11, 2019 at 3:46 AM Paul Blakey <paulb@mellanox.com> wrote:
>
>
> On 8/8/2019 11:53 PM, Pravin Shelar wrote:
> > On Wed, Aug 7, 2019 at 5:08 AM Paul Blakey <paulb@mellanox.com> wrote:
> >> Offloaded OvS datapath rules are translated one to one to tc rules,
> >> for example the following simplified OvS rule:
> >>
> >> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
> >>
> >> Will be translated to the following tc rule:
> >>
> >> $ tc filter add dev dev1 ingress \
> >>              prio 1 chain 0 proto ip \
> >>                  flower tcp ct_state -trk \
> >>                  action ct pipe \
> >>                  action goto chain 2
> >>
> >> Received packets will first travel though tc, and if they aren't stolen
> >> by it, like in the above rule, they will continue to OvS datapath.
> >> Since we already did some actions (action ct in this case) which might
> >> modify the packets, and updated action stats, we would like to continue
> >> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
> >> where we left off.
> >>
> >> To support this, introduce a new skb extension for tc, which
> >> will be used for translating tc chain to ovs recirc_id to
> >> handle these miss cases. Last tc chain index will be set
> >> by tc goto chain action and read by OvS datapath.
> >>
> >> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> >> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
> >> Acked-by: Jiri Pirko <jiri@mellanox.com>
> >> ---
> >>   include/linux/skbuff.h    | 13 +++++++++++++
> >>   include/net/sch_generic.h |  5 ++++-
> >>   net/core/skbuff.c         |  6 ++++++
> >>   net/openvswitch/flow.c    |  9 +++++++++
> >>   net/sched/Kconfig         | 13 +++++++++++++
> >>   net/sched/act_api.c       |  1 +
> >>   net/sched/cls_api.c       | 12 ++++++++++++
> >>   7 files changed, 58 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> >> index 3aef8d8..fb2a792 100644
> >> --- a/include/linux/skbuff.h
> >> +++ b/include/linux/skbuff.h
> >> @@ -279,6 +279,16 @@ struct nf_bridge_info {
> >>   };
> >>   #endif
> >>
> >> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >> +/* Chain in tc_skb_ext will be used to share the tc chain with
> >> + * ovs recirc_id. It will be set to the current chain by tc
> >> + * and read by ovs to recirc_id.
> >> + */
> >> +struct tc_skb_ext {
> >> +       __u32 chain;
> >> +};
> >> +#endif
> >> +
> >>   struct sk_buff_head {
> >>          /* These two members must be first. */
> >>          struct sk_buff  *next;
> >> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
> >>   #ifdef CONFIG_XFRM
> >>          SKB_EXT_SEC_PATH,
> >>   #endif
> >> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >> +       TC_SKB_EXT,
> >> +#endif
> >>          SKB_EXT_NUM, /* must be last */
> >>   };
> >>
> >> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> >> index 6b6b012..871feea 100644
> >> --- a/include/net/sch_generic.h
> >> +++ b/include/net/sch_generic.h
> >> @@ -275,7 +275,10 @@ struct tcf_result {
> >>                          unsigned long   class;
> >>                          u32             classid;
> >>                  };
> >> -               const struct tcf_proto *goto_tp;
> >> +               struct {
> >> +                       const struct tcf_proto *goto_tp;
> >> +                       u32 goto_index;
> >> +               };
> >>
> >>                  /* used in the skb_tc_reinsert function */
> >>                  struct {
> >> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> >> index ea8e8d3..2b40b5a 100644
> >> --- a/net/core/skbuff.c
> >> +++ b/net/core/skbuff.c
> >> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
> >>   #ifdef CONFIG_XFRM
> >>          [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
> >>   #endif
> >> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >> +       [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
> >> +#endif
> >>   };
> >>
> >>   static __always_inline unsigned int skb_ext_total_length(void)
> >> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
> >>   #ifdef CONFIG_XFRM
> >>                  skb_ext_type_len[SKB_EXT_SEC_PATH] +
> >>   #endif
> >> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >> +               skb_ext_type_len[TC_SKB_EXT] +
> >> +#endif
> >>                  0;
> >>   }
> >>
> >> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> >> index bc89e16..0287ead 100644
> >> --- a/net/openvswitch/flow.c
> >> +++ b/net/openvswitch/flow.c
> >> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
> >>   int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
> >>                           struct sk_buff *skb, struct sw_flow_key *key)
> >>   {
> >> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >> +       struct tc_skb_ext *tc_ext;
> >> +#endif
> >>          int res, err;
> >>
> >>          /* Extract metadata from packet. */
> >> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
> >>          if (res < 0)
> >>                  return res;
> >>          key->mac_proto = res;
> >> +
> >> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >> +       tc_ext = skb_ext_find(skb, TC_SKB_EXT);
> >> +       key->recirc_id = tc_ext ? tc_ext->chain : 0;
> >> +#else
> >>          key->recirc_id = 0;
> >> +#endif
> >>
> > Most of cases the config would be turned on, so the ifdef is not that
> > useful. Can you add static key to avoid searching the skb-ext in non
> > offload cases.
>
> Hi,
>
> What do you mean by a static key?
>
https://www.kernel.org/doc/Documentation/static-keys.txt

Static key can be enabled when a flow is added to the tc filter.
Paul Blakey Aug. 13, 2019, 8:29 a.m. UTC | #7
On 8/12/2019 7:18 PM, Pravin Shelar wrote:
> On Sun, Aug 11, 2019 at 3:46 AM Paul Blakey <paulb@mellanox.com> wrote:
>>
>> On 8/8/2019 11:53 PM, Pravin Shelar wrote:
>>> On Wed, Aug 7, 2019 at 5:08 AM Paul Blakey <paulb@mellanox.com> wrote:
>>>> Offloaded OvS datapath rules are translated one to one to tc rules,
>>>> for example the following simplified OvS rule:
>>>>
>>>> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
>>>>
>>>> Will be translated to the following tc rule:
>>>>
>>>> $ tc filter add dev dev1 ingress \
>>>>               prio 1 chain 0 proto ip \
>>>>                   flower tcp ct_state -trk \
>>>>                   action ct pipe \
>>>>                   action goto chain 2
>>>>
>>>> Received packets will first travel though tc, and if they aren't stolen
>>>> by it, like in the above rule, they will continue to OvS datapath.
>>>> Since we already did some actions (action ct in this case) which might
>>>> modify the packets, and updated action stats, we would like to continue
>>>> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
>>>> where we left off.
>>>>
>>>> To support this, introduce a new skb extension for tc, which
>>>> will be used for translating tc chain to ovs recirc_id to
>>>> handle these miss cases. Last tc chain index will be set
>>>> by tc goto chain action and read by OvS datapath.
>>>>
>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>>>> Acked-by: Jiri Pirko <jiri@mellanox.com>
>>>> ---
>>>>    include/linux/skbuff.h    | 13 +++++++++++++
>>>>    include/net/sch_generic.h |  5 ++++-
>>>>    net/core/skbuff.c         |  6 ++++++
>>>>    net/openvswitch/flow.c    |  9 +++++++++
>>>>    net/sched/Kconfig         | 13 +++++++++++++
>>>>    net/sched/act_api.c       |  1 +
>>>>    net/sched/cls_api.c       | 12 ++++++++++++
>>>>    7 files changed, 58 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>>>> index 3aef8d8..fb2a792 100644
>>>> --- a/include/linux/skbuff.h
>>>> +++ b/include/linux/skbuff.h
>>>> @@ -279,6 +279,16 @@ struct nf_bridge_info {
>>>>    };
>>>>    #endif
>>>>
>>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>>>> +/* Chain in tc_skb_ext will be used to share the tc chain with
>>>> + * ovs recirc_id. It will be set to the current chain by tc
>>>> + * and read by ovs to recirc_id.
>>>> + */
>>>> +struct tc_skb_ext {
>>>> +       __u32 chain;
>>>> +};
>>>> +#endif
>>>> +
>>>>    struct sk_buff_head {
>>>>           /* These two members must be first. */
>>>>           struct sk_buff  *next;
>>>> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
>>>>    #ifdef CONFIG_XFRM
>>>>           SKB_EXT_SEC_PATH,
>>>>    #endif
>>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>>>> +       TC_SKB_EXT,
>>>> +#endif
>>>>           SKB_EXT_NUM, /* must be last */
>>>>    };
>>>>
>>>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>>>> index 6b6b012..871feea 100644
>>>> --- a/include/net/sch_generic.h
>>>> +++ b/include/net/sch_generic.h
>>>> @@ -275,7 +275,10 @@ struct tcf_result {
>>>>                           unsigned long   class;
>>>>                           u32             classid;
>>>>                   };
>>>> -               const struct tcf_proto *goto_tp;
>>>> +               struct {
>>>> +                       const struct tcf_proto *goto_tp;
>>>> +                       u32 goto_index;
>>>> +               };
>>>>
>>>>                   /* used in the skb_tc_reinsert function */
>>>>                   struct {
>>>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>>>> index ea8e8d3..2b40b5a 100644
>>>> --- a/net/core/skbuff.c
>>>> +++ b/net/core/skbuff.c
>>>> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>>>>    #ifdef CONFIG_XFRM
>>>>           [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
>>>>    #endif
>>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>>>> +       [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
>>>> +#endif
>>>>    };
>>>>
>>>>    static __always_inline unsigned int skb_ext_total_length(void)
>>>> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
>>>>    #ifdef CONFIG_XFRM
>>>>                   skb_ext_type_len[SKB_EXT_SEC_PATH] +
>>>>    #endif
>>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>>>> +               skb_ext_type_len[TC_SKB_EXT] +
>>>> +#endif
>>>>                   0;
>>>>    }
>>>>
>>>> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
>>>> index bc89e16..0287ead 100644
>>>> --- a/net/openvswitch/flow.c
>>>> +++ b/net/openvswitch/flow.c
>>>> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
>>>>    int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>>>                            struct sk_buff *skb, struct sw_flow_key *key)
>>>>    {
>>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>>>> +       struct tc_skb_ext *tc_ext;
>>>> +#endif
>>>>           int res, err;
>>>>
>>>>           /* Extract metadata from packet. */
>>>> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>>>           if (res < 0)
>>>>                   return res;
>>>>           key->mac_proto = res;
>>>> +
>>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>>>> +       tc_ext = skb_ext_find(skb, TC_SKB_EXT);
>>>> +       key->recirc_id = tc_ext ? tc_ext->chain : 0;
>>>> +#else
>>>>           key->recirc_id = 0;
>>>> +#endif
>>>>
>>> Most of cases the config would be turned on, so the ifdef is not that
>>> useful. Can you add static key to avoid searching the skb-ext in non
>>> offload cases.
>> Hi,
>>
>> What do you mean by a static key?
>>
> https://www.kernel.org/doc/Documentation/static-keys.txt
>
> Static key can be enabled when a flow is added to the tc filter.

Hi and thanks for the feedback,

The skb_ext_find() just checks a single bit on the 
skb->active_extensions, and if so returns an offset. Do you think it 
will impact performance much?


But to your suggestion, do you mean that the first tc goto action 
instance with the relevant ifdef (CONFIG_NET_TC_SKB_EXT) it will enable 
the OvS static key that guards this skb_ext_find()?

I guess calling it in tcf_action_set_ctrlact() if goto_chain != 0.

This will expose some OvS helper function (or static key) to 
net/sched/act_api.c right?


Thanks,

Paul.
Pravin Shelar Aug. 14, 2019, 3:53 p.m. UTC | #8
On Tue, Aug 13, 2019 at 1:29 AM Paul Blakey <paulb@mellanox.com> wrote:
>
>
> On 8/12/2019 7:18 PM, Pravin Shelar wrote:
> > On Sun, Aug 11, 2019 at 3:46 AM Paul Blakey <paulb@mellanox.com> wrote:
> >>
> >> On 8/8/2019 11:53 PM, Pravin Shelar wrote:
> >>> On Wed, Aug 7, 2019 at 5:08 AM Paul Blakey <paulb@mellanox.com> wrote:
> >>>> Offloaded OvS datapath rules are translated one to one to tc rules,
> >>>> for example the following simplified OvS rule:
> >>>>
> >>>> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
> >>>>
> >>>> Will be translated to the following tc rule:
> >>>>
> >>>> $ tc filter add dev dev1 ingress \
> >>>>               prio 1 chain 0 proto ip \
> >>>>                   flower tcp ct_state -trk \
> >>>>                   action ct pipe \
> >>>>                   action goto chain 2
> >>>>
> >>>> Received packets will first travel though tc, and if they aren't stolen
> >>>> by it, like in the above rule, they will continue to OvS datapath.
> >>>> Since we already did some actions (action ct in this case) which might
> >>>> modify the packets, and updated action stats, we would like to continue
> >>>> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
> >>>> where we left off.
> >>>>
> >>>> To support this, introduce a new skb extension for tc, which
> >>>> will be used for translating tc chain to ovs recirc_id to
> >>>> handle these miss cases. Last tc chain index will be set
> >>>> by tc goto chain action and read by OvS datapath.
> >>>>
> >>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> >>>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
> >>>> Acked-by: Jiri Pirko <jiri@mellanox.com>
> >>>> ---
> >>>>    include/linux/skbuff.h    | 13 +++++++++++++
> >>>>    include/net/sch_generic.h |  5 ++++-
> >>>>    net/core/skbuff.c         |  6 ++++++
> >>>>    net/openvswitch/flow.c    |  9 +++++++++
> >>>>    net/sched/Kconfig         | 13 +++++++++++++
> >>>>    net/sched/act_api.c       |  1 +
> >>>>    net/sched/cls_api.c       | 12 ++++++++++++
> >>>>    7 files changed, 58 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> >>>> index 3aef8d8..fb2a792 100644
> >>>> --- a/include/linux/skbuff.h
> >>>> +++ b/include/linux/skbuff.h
> >>>> @@ -279,6 +279,16 @@ struct nf_bridge_info {
> >>>>    };
> >>>>    #endif
> >>>>
> >>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >>>> +/* Chain in tc_skb_ext will be used to share the tc chain with
> >>>> + * ovs recirc_id. It will be set to the current chain by tc
> >>>> + * and read by ovs to recirc_id.
> >>>> + */
> >>>> +struct tc_skb_ext {
> >>>> +       __u32 chain;
> >>>> +};
> >>>> +#endif
> >>>> +
> >>>>    struct sk_buff_head {
> >>>>           /* These two members must be first. */
> >>>>           struct sk_buff  *next;
> >>>> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
> >>>>    #ifdef CONFIG_XFRM
> >>>>           SKB_EXT_SEC_PATH,
> >>>>    #endif
> >>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >>>> +       TC_SKB_EXT,
> >>>> +#endif
> >>>>           SKB_EXT_NUM, /* must be last */
> >>>>    };
> >>>>
> >>>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> >>>> index 6b6b012..871feea 100644
> >>>> --- a/include/net/sch_generic.h
> >>>> +++ b/include/net/sch_generic.h
> >>>> @@ -275,7 +275,10 @@ struct tcf_result {
> >>>>                           unsigned long   class;
> >>>>                           u32             classid;
> >>>>                   };
> >>>> -               const struct tcf_proto *goto_tp;
> >>>> +               struct {
> >>>> +                       const struct tcf_proto *goto_tp;
> >>>> +                       u32 goto_index;
> >>>> +               };
> >>>>
> >>>>                   /* used in the skb_tc_reinsert function */
> >>>>                   struct {
> >>>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> >>>> index ea8e8d3..2b40b5a 100644
> >>>> --- a/net/core/skbuff.c
> >>>> +++ b/net/core/skbuff.c
> >>>> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
> >>>>    #ifdef CONFIG_XFRM
> >>>>           [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
> >>>>    #endif
> >>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >>>> +       [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
> >>>> +#endif
> >>>>    };
> >>>>
> >>>>    static __always_inline unsigned int skb_ext_total_length(void)
> >>>> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
> >>>>    #ifdef CONFIG_XFRM
> >>>>                   skb_ext_type_len[SKB_EXT_SEC_PATH] +
> >>>>    #endif
> >>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >>>> +               skb_ext_type_len[TC_SKB_EXT] +
> >>>> +#endif
> >>>>                   0;
> >>>>    }
> >>>>
> >>>> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> >>>> index bc89e16..0287ead 100644
> >>>> --- a/net/openvswitch/flow.c
> >>>> +++ b/net/openvswitch/flow.c
> >>>> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
> >>>>    int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
> >>>>                            struct sk_buff *skb, struct sw_flow_key *key)
> >>>>    {
> >>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >>>> +       struct tc_skb_ext *tc_ext;
> >>>> +#endif
> >>>>           int res, err;
> >>>>
> >>>>           /* Extract metadata from packet. */
> >>>> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
> >>>>           if (res < 0)
> >>>>                   return res;
> >>>>           key->mac_proto = res;
> >>>> +
> >>>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> >>>> +       tc_ext = skb_ext_find(skb, TC_SKB_EXT);
> >>>> +       key->recirc_id = tc_ext ? tc_ext->chain : 0;
> >>>> +#else
> >>>>           key->recirc_id = 0;
> >>>> +#endif
> >>>>
> >>> Most of cases the config would be turned on, so the ifdef is not that
> >>> useful. Can you add static key to avoid searching the skb-ext in non
> >>> offload cases.
> >> Hi,
> >>
> >> What do you mean by a static key?
> >>
> > https://www.kernel.org/doc/Documentation/static-keys.txt
> >
> > Static key can be enabled when a flow is added to the tc filter.
>
> Hi and thanks for the feedback,
>
> The skb_ext_find() just checks a single bit on the
> skb->active_extensions, and if so returns an offset. Do you think it
> will impact performance much?
>
I do not see much down side of adding static key here.

>
> But to your suggestion, do you mean that the first tc goto action
> instance with the relevant ifdef (CONFIG_NET_TC_SKB_EXT) it will enable
> the OvS static key that guards this skb_ext_find()?
>
> I guess calling it in tcf_action_set_ctrlact() if goto_chain != 0.
>
> This will expose some OvS helper function (or static key) to
> net/sched/act_api.c right?

Right, The patch adds this dependency anyways, so this symbol
definition in OVS would make it explicit to user.
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3aef8d8..fb2a792 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -279,6 +279,16 @@  struct nf_bridge_info {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+/* Chain in tc_skb_ext will be used to share the tc chain with
+ * ovs recirc_id. It will be set to the current chain by tc
+ * and read by ovs to recirc_id.
+ */
+struct tc_skb_ext {
+	__u32 chain;
+};
+#endif
+
 struct sk_buff_head {
 	/* These two members must be first. */
 	struct sk_buff	*next;
@@ -4050,6 +4060,9 @@  enum skb_ext_id {
 #ifdef CONFIG_XFRM
 	SKB_EXT_SEC_PATH,
 #endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	TC_SKB_EXT,
+#endif
 	SKB_EXT_NUM, /* must be last */
 };
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6b6b012..871feea 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -275,7 +275,10 @@  struct tcf_result {
 			unsigned long	class;
 			u32		classid;
 		};
-		const struct tcf_proto *goto_tp;
+		struct {
+			const struct tcf_proto *goto_tp;
+			u32 goto_index;
+		};
 
 		/* used in the skb_tc_reinsert function */
 		struct {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ea8e8d3..2b40b5a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4087,6 +4087,9 @@  int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 #ifdef CONFIG_XFRM
 	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
 #endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
+#endif
 };
 
 static __always_inline unsigned int skb_ext_total_length(void)
@@ -4098,6 +4101,9 @@  static __always_inline unsigned int skb_ext_total_length(void)
 #ifdef CONFIG_XFRM
 		skb_ext_type_len[SKB_EXT_SEC_PATH] +
 #endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+		skb_ext_type_len[TC_SKB_EXT] +
+#endif
 		0;
 }
 
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index bc89e16..0287ead 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -816,6 +816,9 @@  static int key_extract_mac_proto(struct sk_buff *skb)
 int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 			 struct sk_buff *skb, struct sw_flow_key *key)
 {
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	struct tc_skb_ext *tc_ext;
+#endif
 	int res, err;
 
 	/* Extract metadata from packet. */
@@ -848,7 +851,13 @@  int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 	if (res < 0)
 		return res;
 	key->mac_proto = res;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	tc_ext = skb_ext_find(skb, TC_SKB_EXT);
+	key->recirc_id = tc_ext ? tc_ext->chain : 0;
+#else
 	key->recirc_id = 0;
+#endif
 
 	err = key_extract(skb, key);
 	if (!err)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index afd2ba1..b3faafe 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -963,6 +963,19 @@  config NET_IFE_SKBTCINDEX
         tristate "Support to encoding decoding skb tcindex on IFE action"
         depends on NET_ACT_IFE
 
+config NET_TC_SKB_EXT
+	bool "TC recirculation support"
+	depends on NET_CLS_ACT
+	default y if NET_CLS_ACT
+	select SKB_EXTENSIONS
+
+	help
+	  Say Y here to allow tc chain misses to continue in OvS datapath in
+	  the correct recirc_id, and hardware chain misses to continue in
+	  the correct chain in tc software datapath.
+
+	  Say N here if you won't be using tc<->ovs offload or tc chains offload.
+
 endif # NET_SCHED
 
 config NET_SCH_FIFO
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3397122..c393604 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -27,6 +27,7 @@  static void tcf_action_goto_chain_exec(const struct tc_action *a,
 {
 	const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
 
+	res->goto_index = chain->index;
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3565d9a..b0b829a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1660,6 +1660,18 @@  int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			goto reset;
 		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
 			first_tp = res->goto_tp;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+			{
+				struct tc_skb_ext *ext;
+
+				ext = skb_ext_add(skb, TC_SKB_EXT);
+				if (WARN_ON_ONCE(!ext))
+					return TC_ACT_SHOT;
+
+				ext->chain = res->goto_index;
+			}
+#endif
 			goto reset;
 		}
 #endif