diff mbox series

[net] net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct

Message ID 1596019270-7437-1-git-send-email-wenxu@ucloud.cn
State Changes Requested
Delegated to: David Miller
Headers show
Series [net] net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct | expand

Commit Message

wenxu July 29, 2020, 10:41 a.m. UTC
From: wenxu <wenxu@ucloud.cn>

When openvswitch conntrack offload with act_ct action. Fragment packets
defrag in the ingress tc act_ct action and miss the next chain. Then the
packet pass to the openvswitch datapath without the mru. The over
mtu packet will be dropped in output action in openvswitch for over mtu.

"kernel: net2: dropped over-mtu packet: 1528 > 1500"

This patch add mru in the tc_skb_ext for adefrag and miss next chain
situation. And also add mru in the qdisc_skb_cb. The act_ct set the mru
to the qdisc_skb_cb when the packet defrag. And When the chain miss,
The mru is set to tc_skb_ext which can be got by ovs datapath.

Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct")
Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 include/linux/skbuff.h    | 1 +
 include/net/sch_generic.h | 1 +
 net/openvswitch/flow.c    | 1 +
 net/sched/act_ct.c        | 8 ++++++--
 net/sched/cls_api.c       | 1 +
 5 files changed, 10 insertions(+), 2 deletions(-)

Comments

Cong Wang July 30, 2020, 6:03 a.m. UTC | #1
On Wed, Jul 29, 2020 at 3:41 AM <wenxu@ucloud.cn> wrote:
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index c510b03..45401d5 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -384,6 +384,7 @@ struct qdisc_skb_cb {
>         };
>  #define QDISC_CB_PRIV_LEN 20
>         unsigned char           data[QDISC_CB_PRIV_LEN];
> +       u16                     mru;
>  };

Hmm, can you put it in the anonymous struct before 'data'?

We validate this cb size and data size like blow:

static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
{
        struct qdisc_skb_cb *qcb;

        BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb,
data) + sz);
        BUILD_BUG_ON(sizeof(qcb->data) < sz);
}

It _kinda_ expects ->data at the end.

The rest of your patch looks good to me, so feel free to add:
Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>

Thanks.
wenxu July 30, 2020, 8:53 a.m. UTC | #2
On 7/30/2020 2:03 PM, Cong Wang wrote:
> On Wed, Jul 29, 2020 at 3:41 AM <wenxu@ucloud.cn> wrote:
>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> index c510b03..45401d5 100644
>> --- a/include/net/sch_generic.h
>> +++ b/include/net/sch_generic.h
>> @@ -384,6 +384,7 @@ struct qdisc_skb_cb {
>>         };
>>  #define QDISC_CB_PRIV_LEN 20
>>         unsigned char           data[QDISC_CB_PRIV_LEN];
>> +       u16                     mru;
>>  };
> Hmm, can you put it in the anonymous struct before 'data'?
>
> We validate this cb size and data size like blow:
>
> static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
> {
>         struct qdisc_skb_cb *qcb;
>
>         BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb,
> data) + sz);
>         BUILD_BUG_ON(sizeof(qcb->data) < sz);
> }
>
> It _kinda_ expects ->data at the end.

It seems the data offsetof data should be align with szieof(u64)?

If I  modify it as following

@@ -383,6 +383,9 @@ struct qdisc_skb_cb {
                unsigned int            pkt_len;
                u16                     slave_dev_queue_mapping;
                u16                     tc_classid;
+               u16                     mru;
        };
 #define QDISC_CB_PRIV_LEN 20
        unsigned char           data[QDISC_CB_PRIV_LEN];

compile fail:

net/core/filter.c:7625:3: note: in expansion of macro ‘BUILD_BUG_ON’
   BUILD_BUG_ON((offsetof(struct sk_buff, cb) +

inn the file:  net/core/filter.c

case offsetof(struct __sk_buff, cb[0]) ...

             offsetofend(struct __sk_buff, cb[4]) - 1:
                BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
                BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
                              offsetof(struct qdisc_skb_cb, data)) %
                             sizeof(__u64));


If I  modify it as following

@@ -383,6 +383,9 @@ struct qdisc_skb_cb {
                unsigned int            pkt_len;
                u16                     slave_dev_queue_mapping;
                u16                     tc_classid;
+               u16                     mru;
+               u16                     _pad1;
+               u32                     _pad2;
        };
 #define QDISC_CB_PRIV_LEN 20
        unsigned char           data[QDISC_CB_PRIV_LEN];


compile fail:

./include/linux/filter.h:633:2: note: in expansion of macro ‘BUILD_BUG_ON’
  BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));


static inline void bpf_compute_data_pointers(struct sk_buff *skb)
{
        struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;

        BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
        cb->data_meta = skb->data - skb_metadata_len(skb);
        cb->data_end  = skb->data + skb_headlen(skb);
}


It seems no space for new value add before 'data'?


BR

wenxu
Cong Wang July 30, 2020, 9:50 p.m. UTC | #3
On Thu, Jul 30, 2020 at 1:53 AM wenxu <wenxu@ucloud.cn> wrote:
>
>
> On 7/30/2020 2:03 PM, Cong Wang wrote:
> > On Wed, Jul 29, 2020 at 3:41 AM <wenxu@ucloud.cn> wrote:
> >> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> >> index c510b03..45401d5 100644
> >> --- a/include/net/sch_generic.h
> >> +++ b/include/net/sch_generic.h
> >> @@ -384,6 +384,7 @@ struct qdisc_skb_cb {
> >>         };
> >>  #define QDISC_CB_PRIV_LEN 20
> >>         unsigned char           data[QDISC_CB_PRIV_LEN];
> >> +       u16                     mru;
> >>  };
> > Hmm, can you put it in the anonymous struct before 'data'?
> >
> > We validate this cb size and data size like blow:
> >
> > static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
> > {
> >         struct qdisc_skb_cb *qcb;
> >
> >         BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb,
> > data) + sz);
> >         BUILD_BUG_ON(sizeof(qcb->data) < sz);
> > }
> >
> > It _kinda_ expects ->data at the end.
>
> It seems the data offsetof data should be align with szieof(u64)?
>
> If I  modify it as following
>
> @@ -383,6 +383,9 @@ struct qdisc_skb_cb {
>                 unsigned int            pkt_len;
>                 u16                     slave_dev_queue_mapping;
>                 u16                     tc_classid;
> +               u16                     mru;
>         };
>  #define QDISC_CB_PRIV_LEN 20
>         unsigned char           data[QDISC_CB_PRIV_LEN];
>
> compile fail:
>
> net/core/filter.c:7625:3: note: in expansion of macro ‘BUILD_BUG_ON’
>    BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
>
> inn the file:  net/core/filter.c
>
> case offsetof(struct __sk_buff, cb[0]) ...
>
>              offsetofend(struct __sk_buff, cb[4]) - 1:
>                 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
>                 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
>                               offsetof(struct qdisc_skb_cb, data)) %
>                              sizeof(__u64));
>
>
> If I  modify it as following
>
> @@ -383,6 +383,9 @@ struct qdisc_skb_cb {
>                 unsigned int            pkt_len;
>                 u16                     slave_dev_queue_mapping;
>                 u16                     tc_classid;
> +               u16                     mru;
> +               u16                     _pad1;
> +               u32                     _pad2;
>         };
>  #define QDISC_CB_PRIV_LEN 20
>         unsigned char           data[QDISC_CB_PRIV_LEN];
>
>
> compile fail:
>
> ./include/linux/filter.h:633:2: note: in expansion of macro ‘BUILD_BUG_ON’
>   BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
>
>
> static inline void bpf_compute_data_pointers(struct sk_buff *skb)
> {
>         struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
>
>         BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
>         cb->data_meta = skb->data - skb_metadata_len(skb);
>         cb->data_end  = skb->data + skb_headlen(skb);
> }
>
>
> It seems no space for new value add before 'data'?

Hmm, I didn't know bpf has such restrictions on qdisc_skb_cb.
It seems impossible to add a new field before data, if you keep it
after data, can you adjust qdisc_cb_private_validate() too, like below?

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c510b03b9751..68235489a5d4 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -463,7 +463,7 @@ static inline void qdisc_cb_private_validate(const
struct sk_buff *skb, int sz)
 {
        struct qdisc_skb_cb *qcb;

-       BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb,
data) + sz);
+       BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb));
        BUILD_BUG_ON(sizeof(qcb->data) < sz);
 }

Thanks.
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0c0377f..0d842d6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -283,6 +283,7 @@  struct nf_bridge_info {
  */
 struct tc_skb_ext {
 	__u32 chain;
+	__u16 mru;
 };
 #endif
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c510b03..45401d5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -384,6 +384,7 @@  struct qdisc_skb_cb {
 	};
 #define QDISC_CB_PRIV_LEN 20
 	unsigned char		data[QDISC_CB_PRIV_LEN];
+	u16			mru;
 };
 
 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9d375e7..03942c3 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -890,6 +890,7 @@  int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 	if (static_branch_unlikely(&tc_recirc_sharing_support)) {
 		tc_ext = skb_ext_find(skb, TC_SKB_EXT);
 		key->recirc_id = tc_ext ? tc_ext->chain : 0;
+		OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
 	} else {
 		key->recirc_id = 0;
 	}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 5928efb..69445ab 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -706,8 +706,10 @@  static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
 		if (err && err != -EINPROGRESS)
 			goto out_free;
 
-		if (!err)
+		if (!err) {
 			*defrag = true;
+			cb.mru = IPCB(skb)->frag_max_size;
+		}
 	} else { /* NFPROTO_IPV6 */
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 		enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
@@ -717,8 +719,10 @@  static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
 		if (err && err != -EINPROGRESS)
 			goto out_free;
 
-		if (!err)
+		if (!err) {
 			*defrag = true;
+			cb.mru = IP6CB(skb)->frag_max_size;
+		}
 #else
 		err = -EOPNOTSUPP;
 		goto out_free;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 4619cb3..eb6acc5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1627,6 +1627,7 @@  int tcf_classify_ingress(struct sk_buff *skb,
 		if (WARN_ON_ONCE(!ext))
 			return TC_ACT_SHOT;
 		ext->chain = last_executed_chain;
+		ext->mru = qdisc_skb_cb(skb)->mru;
 	}
 
 	return ret;