From patchwork Sun Feb 16 10:01:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238720 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hn3384z9sSG for ; Sun, 16 Feb 2020 21:02:01 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727843AbgBPKBs (ORCPT ); Sun, 16 Feb 2020 05:01:48 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52945 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726787AbgBPKBq (ORCPT ); Sun, 16 Feb 2020 05:01:46 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:38 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1cdv007834; Sun, 16 Feb 2020 12:01:38 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next v3 01/16] net: sched: Introduce ingress classification function Date: Sun, 16 Feb 2020 12:01:21 +0200 Message-Id: <1581847296-19194-2-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org TC multi chain configuration can cause offloaded tc chains to miss in hardware after jumping to some chain. In such cases the software should continue from the chain that missed in hardware, as the hardware may have manipulated the packet and updated some counters. Currently a single tcf classification function serves both ingress and egress. However, multi chain miss processing (get tc skb extension on hw miss, set tc skb extension on tc miss) should happen only on ingress. Refactor the code to use ingress classification function, and move setting the tc skb extension from general classification to it, as a prestep for supporting the hw miss scenario. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko --- Changelog: v2->v3 Split from v2 first patch Defined tcf_classify_ingress so we only affect ingress classification Move setting the skb extension to ingress classification include/net/pkt_cls.h | 10 +++++++++ net/core/dev.c | 3 ++- net/sched/cls_api.c | 58 ++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a972244..3d51a2d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -72,6 +72,8 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); +int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -133,6 +135,14 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, { return TC_ACT_UNSPEC; } + +static inline int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + return TC_ACT_UNSPEC; +} + #endif static inline unsigned long diff --git a/net/core/dev.c b/net/core/dev.c index a6316b3..107af00 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4860,7 +4860,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { + switch (tcf_classify_ingress(skb, miniq->filter_list, &cl_res, + false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c2cdd0f..7652e0e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1559,8 +1559,11 @@ static int tcf_block_setup(struct tcf_block *block, * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ -int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode) +static inline int __tcf_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res, + bool compat_mode, + u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; @@ -1582,21 +1585,11 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, #ifdef CONFIG_NET_CLS_ACT if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { first_tp = orig_tp; + *last_executed_chain = first_tp->chain->index; goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; - -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - { - struct tc_skb_ext *ext; - - ext = skb_ext_add(skb, TC_SKB_EXT); - if (WARN_ON_ONCE(!ext)) - return TC_ACT_SHOT; - - ext->chain = err & TC_ACT_EXT_VAL_MASK; - } -#endif + *last_executed_chain = err & TC_ACT_EXT_VAL_MASK; goto reset; } #endif @@ -1619,8 +1612,45 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, goto reclassify; #endif } + +int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + u32 last_executed_chain = 0; + + return __tcf_classify(skb, tp, res, compat_mode, + &last_executed_chain); +} EXPORT_SYMBOL(tcf_classify); +int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 last_executed_chain = 0; + + return __tcf_classify(skb, tp, res, compat_mode, + &last_executed_chain); +#else + u32 last_executed_chain = tp ? tp->chain->index : 0; + struct tc_skb_ext *ext; + int ret; + + ret = __tcf_classify(skb, tp, res, compat_mode, &last_executed_chain); + + /* If we missed on some chain */ + if (ret == TC_ACT_UNSPEC && last_executed_chain) { + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + ext->chain = last_executed_chain; + } + + return ret; +#endif +} +EXPORT_SYMBOL(tcf_classify_ingress); + struct tcf_chain_info { struct tcf_proto __rcu **pprev; struct tcf_proto __rcu *next; From patchwork Sun Feb 16 10:01:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238722 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hp1lhYz9sRh for ; Sun, 16 Feb 2020 21:02:02 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727887AbgBPKBx (ORCPT ); Sun, 16 Feb 2020 05:01:53 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44747 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726037AbgBPKBr (ORCPT ); Sun, 16 Feb 2020 05:01:47 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1cdw007834; Sun, 16 Feb 2020 12:01:38 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next v3 02/16] net: sched: Pass ingress block to tcf_classify_ingress Date: Sun, 16 Feb 2020 12:01:22 +0200 Message-Id: <1581847296-19194-3-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org On ingress and cls_act qdiscs init, save the block on ingress mini_Qdisc and and pass it on to ingress classification, so it can be used for the looking up a specified chain index. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko --- Changelog: v2->v3 Split from v2 first patch Init mini qdisc pairs' block on ingress/cls_act qdiscs init instead of head chain include/net/pkt_cls.h | 7 +++++-- include/net/sch_generic.h | 3 +++ net/core/dev.c | 4 ++-- net/sched/cls_api.c | 4 +++- net/sched/sch_generic.c | 8 ++++++++ net/sched/sch_ingress.c | 11 ++++++++++- 6 files changed, 31 insertions(+), 6 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 3d51a2d..3595ec5 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -72,8 +72,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); -int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode); +int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, struct tcf_result *res, + bool compat_mode); #else static inline bool tcf_block_shared(struct tcf_block *block) @@ -137,6 +139,7 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } static inline int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1512087..bcdf98d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1269,6 +1269,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, */ struct mini_Qdisc { struct tcf_proto *filter_list; + struct tcf_block *block; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct rcu_head rcu; @@ -1295,6 +1296,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head); void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq); +void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + struct tcf_block *block); static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) { diff --git a/net/core/dev.c b/net/core/dev.c index 107af00..4866e61 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4860,8 +4860,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify_ingress(skb, miniq->filter_list, &cl_res, - false)) { + switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list, + &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7652e0e..0a29747 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1623,7 +1623,9 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } EXPORT_SYMBOL(tcf_classify); -int tcf_classify_ingress(struct sk_buff *skb, const struct tcf_proto *tp, +int tcf_classify_ingress(struct sk_buff *skb, + const struct tcf_block *ingress_block, + const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6c9595f..2efd5b6 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1391,6 +1391,14 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, } EXPORT_SYMBOL(mini_qdisc_pair_swap); +void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + struct tcf_block *block) +{ + miniqp->miniq1.block = block; + miniqp->miniq2.block = block; +} +EXPORT_SYMBOL(mini_qdisc_pair_block_init); + void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, struct mini_Qdisc __rcu **p_miniq) { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index bf56aa5..8483812 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -78,6 +78,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int err; net_inc_ingress_queue(); @@ -87,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, q->block_info.chain_head_change = clsact_chain_head_change; q->block_info.chain_head_change_priv = &q->miniqp; - return tcf_block_get_ext(&q->block, sch, &q->block_info, extack); + err = tcf_block_get_ext(&q->block, sch, &q->block_info, extack); + if (err) + return err; + + mini_qdisc_pair_block_init(&q->miniqp, q->block); + + return 0; } static void ingress_destroy(struct Qdisc *sch) @@ -226,6 +233,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; + mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block); + mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS; From patchwork Sun Feb 16 10:01:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238719 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hn06Wtz9sRh for ; Sun, 16 Feb 2020 21:02:01 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727802AbgBPKBs (ORCPT ); Sun, 16 Feb 2020 05:01:48 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52964 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727592AbgBPKBq (ORCPT ); Sun, 16 Feb 2020 05:01:46 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1cdx007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next v3 03/16] net: sched: Change the block's chain list to an rcu list Date: Sun, 16 Feb 2020 12:01:23 +0200 Message-Id: <1581847296-19194-4-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org To allow lookup of a block's chain under atomic context. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko --- Changelog: v2->v3 Split from v2 first patch net/sched/cls_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0a29747..79c4503 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -354,7 +355,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; - list_add_tail(&chain->list, &block->chain_list); + list_add_tail_rcu(&chain->list, &block->chain_list); mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; @@ -394,7 +395,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain) ASSERT_BLOCK_LOCKED(block); - list_del(&chain->list); + list_del_rcu(&chain->list); if (!chain->index) block->chain0.chain = NULL; From patchwork Sun Feb 16 10:01:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238731 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2j31H6Mz9sP7 for ; Sun, 16 Feb 2020 21:02:15 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726599AbgBPKBo (ORCPT ); Sun, 16 Feb 2020 05:01:44 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44749 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726020AbgBPKBn (ORCPT ); Sun, 16 Feb 2020 05:01:43 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce0007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next v3 04/16] net: sched: Support specifying a starting chain via tc skb ext Date: Sun, 16 Feb 2020 12:01:24 +0200 Message-Id: <1581847296-19194-5-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Set the starting chain from the tc skb ext chain value. Once we read the tc skb ext, delete it, so cloned/redirect packets won't inherit it. In order to lookup a chain by the chain index on the ingress block at ingress classification, provide a lookup function. Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko --- Changelog: v2->v3 Split from v2 first patch Do skb extension processing for ingress classification only Consume skb extension net/sched/cls_api.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 79c4503..5331ad3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -454,6 +454,20 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, return NULL; } +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block, + u32 chain_index) +{ + struct tcf_chain *chain; + + list_for_each_entry_rcu(chain, &block->chain_list, list) { + if (chain->index == chain_index) + return chain; + } + return NULL; +} +#endif + static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, u32 seq, u16 flags, int event, bool unicast); @@ -1562,13 +1576,13 @@ static int tcf_block_setup(struct tcf_block *block, */ static inline int __tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + const struct tcf_proto *orig_tp, struct tcf_result *res, bool compat_mode, u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; - const struct tcf_proto *orig_tp = tp; const struct tcf_proto *first_tp; int limit = 0; @@ -1619,7 +1633,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, { u32 last_executed_chain = 0; - return __tcf_classify(skb, tp, res, compat_mode, + return __tcf_classify(skb, tp, tp, res, compat_mode, &last_executed_chain); } EXPORT_SYMBOL(tcf_classify); @@ -1632,14 +1646,31 @@ int tcf_classify_ingress(struct sk_buff *skb, #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 last_executed_chain = 0; - return __tcf_classify(skb, tp, res, compat_mode, + return __tcf_classify(skb, tp, tp, res, compat_mode, &last_executed_chain); #else u32 last_executed_chain = tp ? tp->chain->index : 0; + const struct tcf_proto *orig_tp = tp; struct tc_skb_ext *ext; int ret; - ret = __tcf_classify(skb, tp, res, compat_mode, &last_executed_chain); + ext = skb_ext_find(skb, TC_SKB_EXT); + + if (ext && ext->chain) { + struct tcf_chain *fchain; + + fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain); + if (!fchain) + return TC_ACT_SHOT; + + /* Consume, so cloned/redirect skbs won't inherit ext */ + skb_ext_del(skb, TC_SKB_EXT); + + tp = rcu_dereference_bh(fchain->filter_chain); + } + + ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, + &last_executed_chain); /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { From patchwork Sun Feb 16 10:01:25 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238723 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hp54Dkz9sSG for ; Sun, 16 Feb 2020 21:02:02 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727906AbgBPKBy (ORCPT ); Sun, 16 Feb 2020 05:01:54 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52946 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726939AbgBPKBs (ORCPT ); Sun, 16 Feb 2020 05:01:48 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce1007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 05/16] net/mlx5: Introduce mapping infra for mapping unique ids to data Date: Sun, 16 Feb 2020 12:01:25 +0200 Message-Id: <1581847296-19194-6-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Add a new interface for mapping data to a given id range (max_id), and back again. It uses xarray as the id allocator and for finding a given id. For locking it uses xa_lock (spin_lock) for add()/del(), and rcu_read_lock for find(). This mapping interface also supports delaying the mapping removal via a workqueue. This is for cases where we need the mapping to have some grace period in regards to finding it back again, for example for packets arriving from hardware that were marked with by a rule with an old mapping that no longer exists. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- Changelog: v2->v3 Scraped generic structure Removed typeof() Cleaner flush Defined data[0] as data[] so comment isn't needed Changed from idr to xarray use inline style definitions Moved from lib to en drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/en/mapping.c | 218 +++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/mapping.h | 27 +++ 3 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d3e06ce..7f9de55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -34,7 +34,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ - lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ + lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ en/tc_tun_geneve.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c new file mode 100644 index 0000000..ea321e5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include +#include +#include +#include + +#include "mapping.h" + +#define MAPPING_GRACE_PERIOD 2000 + +struct mapping_ctx { + struct xarray xarray; + DECLARE_HASHTABLE(ht, 8); + struct mutex lock; /* Guards hashtable and xarray */ + unsigned long max_id; + size_t data_size; + bool delayed_removal; + struct delayed_work dwork; + struct list_head pending_list; + spinlock_t pending_list_lock; /* Guards pending list */ +}; + +struct mapping_item { + struct rcu_head rcu; + struct list_head list; + unsigned long timeout; + struct hlist_node node; + int cnt; + u32 id; + char data[]; +}; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id) +{ + struct mapping_item *mi; + int err = -ENOMEM; + u32 hash_key; + + mutex_lock(&ctx->lock); + + hash_key = jhash(data, ctx->data_size, 0); + hash_for_each_possible(ctx->ht, mi, node, hash_key) { + if (!memcmp(data, mi->data, ctx->data_size)) + goto attach; + } + + mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL); + if (!mi) + goto err_alloc; + + memcpy(mi->data, data, ctx->data_size); + hash_add(ctx->ht, &mi->node, hash_key); + + err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id), + GFP_KERNEL); + if (err) + goto err_assign; +attach: + ++mi->cnt; + *id = mi->id; + + mutex_unlock(&ctx->lock); + + return 0; + +err_assign: + hash_del(&mi->node); + kfree(mi); +err_alloc: + mutex_unlock(&ctx->lock); + + return err; +} + +static void mapping_remove_and_free(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + xa_erase(&ctx->xarray, mi->id); + kfree_rcu(mi, rcu); +} + +static void mapping_free_item(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + if (!ctx->delayed_removal) { + mapping_remove_and_free(ctx, mi); + return; + } + + mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD); + + spin_lock(&ctx->pending_list_lock); + list_add_tail(&mi->list, &ctx->pending_list); + spin_unlock(&ctx->pending_list_lock); + + schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD); +} + +int mapping_remove(struct mapping_ctx *ctx, u32 id) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + mutex_lock(&ctx->lock); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto out; + err = 0; + + if (--mi->cnt > 0) + goto out; + + hash_del(&mi->node); + mapping_free_item(ctx, mi); +out: + mutex_unlock(&ctx->lock); + + return err; +} + +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + rcu_read_lock(); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto err_find; + + memcpy(data, mi->data, ctx->data_size); + err = 0; + +err_find: + rcu_read_unlock(); + return err; +} + +static void +mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list) +{ + struct mapping_item *mi; + + list_for_each_entry(mi, list, list) + mapping_remove_and_free(ctx, mi); +} + +static void mapping_work_handler(struct work_struct *work) +{ + unsigned long min_timeout = 0, now = jiffies; + struct mapping_item *mi, *next; + LIST_HEAD(pending_items); + struct mapping_ctx *ctx; + + ctx = container_of(work, struct mapping_ctx, dwork.work); + + spin_lock(&ctx->pending_list_lock); + list_for_each_entry_safe(mi, next, &ctx->pending_list, list) { + if (time_after(now, mi->timeout)) + list_move(&mi->list, &pending_items); + else if (!min_timeout || + time_before(mi->timeout, min_timeout)) + min_timeout = mi->timeout; + } + spin_unlock(&ctx->pending_list_lock); + + mapping_remove_and_free_list(ctx, &pending_items); + + if (min_timeout) + schedule_delayed_work(&ctx->dwork, abs(min_timeout - now)); +} + +static void mapping_flush_work(struct mapping_ctx *ctx) +{ + if (!ctx->delayed_removal) + return; + + cancel_delayed_work_sync(&ctx->dwork); + mapping_remove_and_free_list(ctx, &ctx->pending_list); +} + +struct mapping_ctx * +mapping_create(size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->max_id = max_id ? max_id : UINT_MAX; + ctx->data_size = data_size; + + if (delayed_removal) { + INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler); + INIT_LIST_HEAD(&ctx->pending_list); + spin_lock_init(&ctx->pending_list_lock); + ctx->delayed_removal = true; + } + + mutex_init(&ctx->lock); + xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + + return ctx; +} + +void mapping_destroy(struct mapping_ctx *ctx) +{ + mapping_flush_work(ctx); + xa_destroy(&ctx->xarray); + mutex_destroy(&ctx->lock); + + kfree(ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h new file mode 100644 index 0000000..285525c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_MAPPING_H__ +#define __MLX5_MAPPING_H__ + +struct mapping_ctx; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id); +int mapping_remove(struct mapping_ctx *ctx, u32 id); +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data); + +/* mapping uses an xarray to map data to ids in add(), and for find(). + * For locking, it uses a internal xarray spin lock for add()/remove(), + * find() uses rcu_read_lock(). + * Choosing delayed_removal postpones the removal of a previously mapped + * id by MAPPING_GRACE_PERIOD milliseconds. + * This is to avoid races against hardware, where we mark the packet in + * hardware with a previous id, and quick remove() and add() reusing the same + * previous id. Then find() will get the new mapping instead of the old + * which was used to mark the packet. + */ +struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, + bool delayed_removal); +void mapping_destroy(struct mapping_ctx *ctx); + +#endif /* __MLX5_MAPPING_H__ */ From patchwork Sun Feb 16 10:01:26 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238718 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hm4LjPz9sP7 for ; Sun, 16 Feb 2020 21:02:00 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727795AbgBPKBr (ORCPT ); Sun, 16 Feb 2020 05:01:47 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44746 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727329AbgBPKBq (ORCPT ); Sun, 16 Feb 2020 05:01:46 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce2007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 06/16] net/mlx5: E-Switch, Move source port on reg_c0 to the upper 16 bits Date: Sun, 16 Feb 2020 12:01:26 +0200 Message-Id: <1581847296-19194-7-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Multi chain support requires the miss path to continue the processing from the last chain id, and for that we need to save the chain miss tag (a mapping for 32bit chain id) on reg_c0 which will come in a next patch. Currently reg_c0 is exclusively used to store the source port metadata, giving it 32bit, it is created from 16bits of vcha_id, and 16bits of vport number. We will move this source port metadata to upper 16bits, and leave the lower bits for the chain miss tag. We compress the reg_c0 source port metadata to 16bits by taking 8 bits from vhca_id, and 8bits from the vport number. Since we compress the vport number to 8bits statically, and leave two top ids for special PF/ECPF numbers, we will only support a max of 254 vports with this strategy. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/infiniband/hw/mlx5/main.c | 3 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 64 ++++++++++++++++++---- include/linux/mlx5/eswitch.h | 29 +++++++++- 3 files changed, 83 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e874d68..230028c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3570,7 +3570,8 @@ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 979f13b..788bb83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -85,7 +85,8 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, attr->in_rep->vport)); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); @@ -621,7 +622,8 @@ static void peer_miss_rules_setup(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -851,8 +853,9 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -1134,7 +1137,8 @@ struct mlx5_flow_handle * mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; } else { @@ -1604,11 +1608,19 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_act flow_act = {}; int err = 0; + u32 key; + + key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); + key >>= ESW_SOURCE_PORT_METADATA_OFFSET; MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); - MLX5_SET(set_action_in, action, data, - mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, key); + MLX5_SET(set_action_in, action, offset, + ESW_SOURCE_PORT_METADATA_OFFSET); + MLX5_SET(set_action_in, action, length, + ESW_SOURCE_PORT_METADATA_BITS); vport->ingress.offloads.modify_metadata = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, @@ -2470,9 +2482,41 @@ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) } EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num) { - return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; + u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0); + u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0); + u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + u32 val; + + /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */ + WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS)); + + /* Trim vhca_id to ESW_VHCA_ID_BITS */ + vhca_id &= vhca_id_mask; + + /* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they + * don't overlap with VF numbers, and themselves, after trimming. + */ + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) < + vport_num_mask - 1); + WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) == + (MLX5_VPORT_ECPF & vport_num_mask)); + + /* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't + * overlap with pf and ecpf. + */ + if (vport_num != MLX5_VPORT_UPLINK && + vport_num != MLX5_VPORT_ECPF) + WARN_ON_ONCE(vport_num >= vport_num_mask - 1); + + /* We can now trim vport_num to ESW_VPORT_BITS */ + vport_num &= vport_num_mask; + + val = (vhca_id << ESW_VPORT_BITS) | vport_num; + return val << (32 - ESW_SOURCE_PORT_METADATA_BITS); } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 98e667b..dd1333f 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -71,7 +71,26 @@ enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + +/* Reg C0 usage: + * Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) > + * + * Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num, + * the rest (lowest 16 bits) is left for tc chain tag restoration. + * VHCA_ID + VPORT comprise the SOURCE_PORT matching. + */ +#define ESW_VHCA_ID_BITS 8 +#define ESW_VPORT_BITS 8 +#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) +#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) + +static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) +{ + return GENMASK(31, 32 - ESW_SOURCE_PORT_METADATA_BITS); +} + +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ @@ -94,11 +113,17 @@ static inline u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) }; static inline u32 -mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, int vport_num) { return 0; }; + +static inline u32 +mlx5_eswitch_get_vport_metadata_mask(void) +{ + return 0; +} #endif /* CONFIG_MLX5_ESWITCH */ #endif From patchwork Sun Feb 16 10:01:27 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238724 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hq1Sz9z9sP7 for ; Sun, 16 Feb 2020 21:02:03 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727868AbgBPKBu (ORCPT ); Sun, 16 Feb 2020 05:01:50 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52963 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727637AbgBPKBr (ORCPT ); Sun, 16 Feb 2020 05:01:47 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce3007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 07/16] net/mlx5: E-Switch, Get reg_c0 value on CQE Date: Sun, 16 Feb 2020 12:01:27 +0200 Message-Id: <1581847296-19194-8-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org On RX side create a restore table in OFFLOADS namespace. This table will match on all values for reg_c0 we will use, and set it to the flow_tag. This flow tag can then be read on the CQE. As there is no copy action from reg c0 to flow tag, instead we have to set the flow tag explictily. We add an API so callers can add all the used reg_c0 values (tags) and for each of those we add a restore rule. This will be used in a following patch to save the miss chain mapping tag on reg_c0 and from it restore the tc chain on the skb. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 14 ++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 147 +++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 +- include/linux/mlx5/eswitch.h | 2 + 4 files changed, 156 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4472710..a94d91c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -189,6 +189,9 @@ struct mlx5_eswitch_fdb { }; struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads_restore; + struct mlx5_flow_group *restore_group; + struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; @@ -623,6 +626,11 @@ struct mlx5_vport *__must_check esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -638,6 +646,12 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} +static struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + return ERR_PTR(-EOPNOTSUPP); +} + #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 788bb83..81c2cbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -838,6 +838,54 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) return err; } +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore; + struct mlx5_flow_context *flow_context; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = tag; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->offloads.ft_offloads; + + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kfree(spec); + + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, + "Failed to create restore rule for tag: %d, err(%d)\n", + tag, (int)PTR_ERR(flow_rule)); + + return flow_rule; +} + +u32 +esw_get_max_restore_tag(struct mlx5_eswitch *esw) +{ + return ESW_CHAIN_TAG_METADATA_MASK; +} + #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -1060,6 +1108,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) } ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; + ft_attr.prio = 1; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1164,6 +1213,81 @@ struct mlx5_flow_handle * return flow_rule; } +static void esw_destroy_restore_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_group(offloads->restore_group); + mlx5_destroy_flow_table(offloads->ft_offloads_restore); +} + +static int esw_create_restore_table(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + void *match_criteria, *misc; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_free; + } + + ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + esw_warn(esw->dev, "Failed to create restore table, err %d\n", + err); + goto out_free; + } + + memset(flow_group_in, 0, inlen); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_CHAIN_TAG_METADATA_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft_attr.max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create restore flow group, err: %d\n", + err); + goto err_group; + } + + esw->offloads.ft_offloads_restore = ft; + esw->offloads.restore_group = g; + + return 0; + +err_group: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + + return err; +} + static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -1923,13 +2047,17 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) if (err) return err; - err = esw_create_offloads_fdb_tables(esw, total_vports); + err = esw_create_offloads_table(esw, total_vports); if (err) - goto create_fdb_err; + goto create_offloads_err; - err = esw_create_offloads_table(esw, total_vports); + err = esw_create_restore_table(esw); if (err) - goto create_ft_err; + goto create_restore_err; + + err = esw_create_offloads_fdb_tables(esw, total_vports); + if (err) + goto create_fdb_err; err = esw_create_vport_rx_group(esw, total_vports); if (err) @@ -1938,12 +2066,12 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) return 0; create_fg_err: - esw_destroy_offloads_table(esw); - -create_ft_err: esw_destroy_offloads_fdb_tables(esw); - create_fdb_err: + esw_destroy_restore_table(esw); +create_restore_err: + esw_destroy_offloads_table(esw); +create_offloads_err: esw_destroy_uplink_offloads_acl_tables(esw); return err; @@ -1952,8 +2080,9 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) { esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); + esw_destroy_restore_table(esw); + esw_destroy_offloads_table(esw); esw_destroy_uplink_offloads_acl_tables(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9dc2424..2660ffa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -111,8 +111,8 @@ #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) #define OFFLOADS_MAX_FT 1 -#define OFFLOADS_NUM_PRIOS 1 -#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) +#define OFFLOADS_NUM_PRIOS 2 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS) #define LAG_PRIO_NUM_LEVELS 1 #define LAG_NUM_PRIOS 1 diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index dd1333f..61705e7 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -84,6 +84,8 @@ enum devlink_eswitch_encap_mode #define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) #define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) #define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\ + 0) static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) { From patchwork Sun Feb 16 10:01:28 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238728 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hv1gYPz9sP7 for ; Sun, 16 Feb 2020 21:02:07 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727940AbgBPKCG (ORCPT ); Sun, 16 Feb 2020 05:02:06 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44744 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726824AbgBPKBr (ORCPT ); Sun, 16 Feb 2020 05:01:47 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce4007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 08/16] net/mlx5: E-Switch, Mark miss packets with new chain id mapping Date: Sun, 16 Feb 2020 12:01:28 +0200 Message-Id: <1581847296-19194-9-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Currently, if we miss in hardware after jumping to some chain, we continue in chain 0 in software. This is wrong, and with the new tc skb extension we can now restore the chain id on the skb, so tc can continue with in the correct chain. To restore the chain id in software after a miss in hardware, we create a register mapping from 32bit chain ids to 16bit of reg_c0 (that survives loopback), to 32bit chain ids. We then mark packets that miss on some chain with the current chain id mapping on their reg_c0 field. Using this mapping, we will support up to 64K concurrent chains. This register survives loopback and gets to the CQE on flow_tag via the eswitch restore rules. In next commit, we will reverse the mapping we got on the CQE to a chain id and tell tc to continue in the sw chain where we left off via the tc skb extension. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 12 ++ .../mellanox/mlx5/core/eswitch_offloads_chains.c | 130 ++++++++++++++++++++- .../mellanox/mlx5/core/eswitch_offloads_chains.h | 4 +- 4 files changed, 150 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 74091f7..2ee80f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -153,6 +153,14 @@ struct mlx5e_tc_flow_parse_attr { #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) +struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { + [CHAIN_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 0, + .mlen = 2, + }, +}; + struct mlx5e_hairpin { struct mlx5_hairpin *pair; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 262cdb7..e2dbbae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -91,6 +91,18 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, void mlx5e_tc_reoffload_flows_work(struct work_struct *work); +enum mlx5e_tc_attr_to_reg { + CHAIN_TO_REG, +}; + +struct mlx5e_tc_attr_to_reg_mapping { + int mfield; /* rewrite field */ + int moffset; /* offset of mfield */ + int mlen; /* bytes to rewrite/match */ +}; + +extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; + bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c index c5a446e..b139a97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -6,14 +6,17 @@ #include #include "eswitch_offloads_chains.h" +#include "en/mapping.h" #include "mlx5_core.h" #include "fs_core.h" #include "eswitch.h" #include "en.h" +#include "en_tc.h" #define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) #define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) #define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) +#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping) #define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) #define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) #define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) @@ -44,6 +47,7 @@ struct mlx5_esw_chains_priv { struct mutex lock; struct mlx5_flow_table *tc_end_fdb; + struct mapping_ctx *chains_mapping; int fdb_left[ARRAY_SIZE(ESW_POOLS)]; }; @@ -54,9 +58,12 @@ struct fdb_chain { u32 chain; int ref; + int id; struct mlx5_eswitch *esw; struct list_head prios_list; + struct mlx5_flow_handle *restore_rule; + struct mlx5_modify_hdr *miss_modify_hdr; }; struct fdb_prio_key { @@ -255,6 +262,70 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(fdb); } +static int +create_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)]; + struct mlx5_eswitch *esw = fdb_chain->esw; + struct mlx5_modify_hdr *mod_hdr; + u32 index; + int err; + + if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw)) + return 0; + + err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index); + if (err) + return err; + if (index == MLX5_FS_DEFAULT_FLOW_TAG) { + /* we got the special default flow tag id, so we won't know + * if we actually marked the packet with the restore rule + * we create. + * + * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. + */ + err = mapping_add(esw_chains_mapping(esw), + &fdb_chain->chain, &index); + mapping_remove(esw_chains_mapping(esw), + MLX5_FS_DEFAULT_FLOW_TAG); + if (err) + return err; + } + + fdb_chain->id = index; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield); + MLX5_SET(set_action_in, modact, offset, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8); + MLX5_SET(set_action_in, modact, length, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8); + MLX5_SET(set_action_in, modact, data, fdb_chain->id); + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + 1, modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + fdb_chain->miss_modify_hdr = mod_hdr; + + fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id); + if (IS_ERR(fdb_chain->restore_rule)) { + err = PTR_ERR(fdb_chain->restore_rule); + goto err_rule; + } + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); +err_mod_hdr: + /* Datapath can't find this mapping, so we can safely remove it */ + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); + return err; +} + static struct fdb_chain * mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) { @@ -269,6 +340,10 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) fdb_chain->chain = chain; INIT_LIST_HEAD(&fdb_chain->prios_list); + err = create_fdb_chain_restore(fdb_chain); + if (err) + goto err_restore; + err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); if (err) @@ -277,6 +352,12 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) return fdb_chain; err_insert: + if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) { + mlx5_del_flow_rules(fdb_chain->restore_rule); + mlx5_modify_header_dealloc(esw->dev, + fdb_chain->miss_modify_hdr); + } +err_restore: kvfree(fdb_chain); return ERR_PTR(err); } @@ -288,6 +369,15 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); + + if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) { + mlx5_del_flow_rules(fdb_chain->restore_rule); + mlx5_modify_header_dealloc(esw->dev, + fdb_chain->miss_modify_hdr); + + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); + } + kvfree(fdb_chain); } @@ -310,10 +400,12 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) } static struct mlx5_flow_handle * -mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb, +mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, + struct mlx5_flow_table *fdb, struct mlx5_flow_table *next_fdb) { static const struct mlx5_flow_spec spec = {}; + struct mlx5_eswitch *esw = fdb_chain->esw; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act act = {}; @@ -322,6 +414,11 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = next_fdb; + if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) { + act.modify_hdr = fdb_chain->miss_modify_hdr; + act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1); } @@ -345,7 +442,8 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) list_for_each_entry_continue_reverse(pos, &fdb_chain->prios_list, list) { - miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb, + miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain, + pos->fdb, next_fdb); if (IS_ERR(miss_rules[n])) { err = PTR_ERR(miss_rules[n]); @@ -459,7 +557,7 @@ static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) } /* Add miss rule to next_fdb */ - miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb); + miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb); if (IS_ERR(miss_rule)) { err = PTR_ERR(miss_rule); goto err_miss_rule; @@ -624,6 +722,7 @@ struct mlx5_flow_table * struct mlx5_esw_chains_priv *chains_priv; struct mlx5_core_dev *dev = esw->dev; u32 max_flow_counter, fdb_max; + struct mapping_ctx *mapping; int err; chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); @@ -660,10 +759,20 @@ struct mlx5_flow_table * if (err) goto init_prios_ht_err; + mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw), + true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto mapping_err; + } + esw_chains_mapping(esw) = mapping; + mutex_init(&esw_chains_lock(esw)); return 0; +mapping_err: + rhashtable_destroy(&esw_prios_ht(esw)); init_prios_ht_err: rhashtable_destroy(&esw_chains_ht(esw)); init_chains_ht_err: @@ -675,6 +784,7 @@ struct mlx5_flow_table * mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) { mutex_destroy(&esw_chains_lock(esw)); + mapping_destroy(esw_chains_mapping(esw)); rhashtable_destroy(&esw_prios_ht(esw)); rhashtable_destroy(&esw_chains_ht(esw)); @@ -756,3 +866,17 @@ struct mlx5_flow_table * mlx5_esw_chains_close(esw); mlx5_esw_chains_cleanup(esw); } + +int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, + u32 *chain) +{ + int err; + + err = mapping_find(esw_chains_mapping(esw), tag, chain); + if (err) { + esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag); + return -ENOENT; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h index 2e13097..da45e49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h @@ -26,5 +26,7 @@ struct mlx5_flow_table * int mlx5_esw_chains_create(struct mlx5_eswitch *esw); void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); -#endif /* __ML5_ESW_CHAINS_H__ */ +int +mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain); +#endif /* __ML5_ESW_CHAINS_H__ */ From patchwork Sun Feb 16 10:01:29 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238721 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hn5zhjz9sP7 for ; Sun, 16 Feb 2020 21:02:01 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727880AbgBPKBu (ORCPT ); Sun, 16 Feb 2020 05:01:50 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52981 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727668AbgBPKBr (ORCPT ); Sun, 16 Feb 2020 05:01:47 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce5007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 09/16] net/mlx5e: Rx, Split rep rx mpwqe handler from nic Date: Sun, 16 Feb 2020 12:01:29 +0200 Message-Id: <1581847296-19194-10-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Copy the current rep mpwqe rx handler which is also used by nic profile. In the next patch, we will add rep specific logic, just for the rep profile rx handler. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 2 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 54 ++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7b48cca..ad426e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1921,7 +1921,7 @@ static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_update_ndo_stats, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, .max_tc = 1, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, @@ -1941,7 +1941,7 @@ static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, .max_tc = MLX5E_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_ul_rep_stats_grps, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 3f756d5..6ff7d90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -191,6 +191,8 @@ struct mlx5e_rep_sq { void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1c3ab69..454ce4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1232,6 +1232,60 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wq_cyc_pop: mlx5_wq_cyc_pop(wq); } + +void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); + rq->stats->wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe_bcnt, head_offset, page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + napi_gro_receive(rq->cq.napi, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} #endif struct sk_buff * From patchwork Sun Feb 16 10:01:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238715 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hl2GVMz9sP7 for ; Sun, 16 Feb 2020 21:01:58 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726666AbgBPKBo (ORCPT ); Sun, 16 Feb 2020 05:01:44 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44768 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726116AbgBPKBn (ORCPT ); Sun, 16 Feb 2020 05:01:43 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce6007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 10/16] net/mlx5: E-Switch, Restore chain id on miss Date: Sun, 16 Feb 2020 12:01:30 +0200 Message-Id: <1581847296-19194-11-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Chain ids are mapped to the lower part of reg C, and after loopback are copied to to CQE via a restore rule's flow_tag. To let tc continue in the correct chain, we find the corresponding chain id in the eswitch chain id <-> reg C mapping, and set the SKB's tc extension chain to it. That tells tc to continue processing from this set chain. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 43 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 2 ++ 3 files changed, 51 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 454ce4c..e0abe79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1225,6 +1225,9 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); + if (!mlx5e_tc_rep_update_skb(cqe, skb)) + goto free_wqe; + napi_gro_receive(rq->cq.napi, skb); free_wqe: @@ -1275,6 +1278,9 @@ void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (!mlx5e_tc_rep_update_skb(cqe, skb)) + goto mpwrq_cqe_out; + napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2ee80f0..ac1ecf8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4339,3 +4339,46 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) } mutex_unlock(&rpriv->unready_flows_lock); } + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, + struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *tc_skb_ext; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + u32 chain = 0, reg_c0; + int err; + + reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); + if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) + reg_c0 = 0; + + if (!reg_c0) + return true; + + priv = netdev_priv(skb->dev); + esw = priv->mdev->priv.eswitch; + + err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find chain for chain tag: %d, err: %d\n", + reg_c0, err); + return false; + } + + if (!chain) + return true; + + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (!tc_skb_ext) { + WARN_ON_ONCE(1); + return false; + } + + tc_skb_ext->chain = chain; +#endif /* CONFIG_NET_TC_SKB_EXT */ + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index e2dbbae..9d5fcf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -106,6 +106,8 @@ struct mlx5e_tc_attr_to_reg_mapping { bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev); +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} From patchwork Sun Feb 16 10:01:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238727 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2ht0RH6z9sP7 for ; Sun, 16 Feb 2020 21:02:06 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727935AbgBPKCE (ORCPT ); Sun, 16 Feb 2020 05:02:04 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52999 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727720AbgBPKBr (ORCPT ); Sun, 16 Feb 2020 05:01:47 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce7007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 11/16] net/mlx5e: Allow re-allocating mod header actions Date: Sun, 16 Feb 2020 12:01:31 +0200 Message-Id: <1581847296-19194-12-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Currently the size of the mod header actions array is deduced from the number of parsed TC header rewrite actions. However, mod header actions are also used for setting HW register values. Support the dynamic reallocation of the mod header array as a pre-step for adding HW registers mod actions. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 120 +++++++++++++----------- drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 11 +++ 2 files changed, 76 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ac1ecf8..d844c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -144,9 +144,7 @@ struct mlx5e_tc_flow_parse_attr { const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; - int num_mod_hdr_actions; - int max_mod_hdr_actions; - void *mod_hdr_actions; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; @@ -369,10 +367,10 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mod_hdr_key key; u32 hash_key; - num_actions = parse_attr->num_mod_hdr_actions; + num_actions = parse_attr->mod_hdr_acts.num_actions; actions_size = MLX5_MH_ACT_SZ * num_actions; - key.actions = parse_attr->mod_hdr_actions; + key.actions = parse_attr->mod_hdr_acts.actions; key.num_actions = num_actions; hash_key = hash_mod_hdr_info(&key); @@ -962,7 +960,7 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_hdr = attr->modify_hdr; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1228,7 +1226,7 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -2391,25 +2389,26 @@ static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; -/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, attr->num_mod_hdr_actions - * says how many HW actions were actually parsed. - */ -static int offload_pedit_fields(struct pedit_headers_action *hdrs, +static int offload_pedit_fields(struct mlx5e_priv *priv, + int namespace, + struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, nactions, max_actions, first, last, next_z; + int i, action_size, first, last, next_z; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; + struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5_fields *f; unsigned long mask; __be32 mask_be32; __be16 mask_be16; + int err; u8 cmd; + mod_acts = &parse_attr->mod_hdr_acts; headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); @@ -2419,11 +2418,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions + - parse_attr->num_mod_hdr_actions * action_size; - - max_actions = parse_attr->max_mod_hdr_actions; - nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -2449,13 +2443,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } - if (nactions == max_actions) { - NL_SET_ERR_MSG_MOD(extack, - "too many pedit actions, can't offload"); - printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); - return -EOPNOTSUPP; - } - skip = false; if (s_mask) { void *match_mask = headers_c + f->match_offset; @@ -2502,6 +2489,18 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, return -EOPNOTSUPP; } + err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "too many pedit actions, can't offload"); + mlx5_core_warn(priv->mdev, + "mlx5: parsed %d pedit actions, can't do more\n", + mod_acts->num_actions); + return err; + } + + action = mod_acts->actions + + (mod_acts->num_actions * action_size); MLX5_SET(set_action_in, action, action_type, cmd); MLX5_SET(set_action_in, action, field, f->field); @@ -2524,11 +2523,9 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, else if (f->field_bsize == 8) MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); - action += action_size; - nactions++; + ++mod_acts->num_actions; } - parse_attr->num_mod_hdr_actions = nactions; return 0; } @@ -2541,29 +2538,48 @@ static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); } -static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - struct pedit_headers_action *hdrs, - int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr) +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - int nkeys, action_size, max_actions; + int action_size, new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; - nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + - hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; - action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + return 0; - max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace); - /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ - max_actions = min(max_actions, nkeys * 16); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); - if (!parse_attr->mod_hdr_actions) + max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, + namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return -ENOSPC; + + new_sz = action_size * new_num_actions; + old_sz = mod_hdr_acts->max_actions * action_size; + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (!ret) return -ENOMEM; - parse_attr->max_mod_hdr_actions = max_actions; + memset(ret + old_sz, 0, new_sz - old_sz); + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + return 0; } +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + kfree(mod_hdr_acts->actions); + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, @@ -2616,13 +2632,8 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, int err; u8 cmd; - if (!parse_attr->mod_hdr_actions) { - err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); - if (err) - goto out_err; - } - - err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack); + err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, + action_flags, extack); if (err < 0) goto out_dealloc_parsed_actions; @@ -2642,8 +2653,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, return 0; out_dealloc_parsed_actions: - kfree(parse_attr->mod_hdr_actions); -out_err: + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); return err; } @@ -2976,9 +2986,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, /* in case all pedit actions are skipped, remove the MOD_HDR * flag. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); } } @@ -3564,9 +3574,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, * flag. we might have set split_count either by pedit or * pop/push. if there is no pop/push either, reset it too. */ - if (parse_attr->num_mod_hdr_actions == 0) { + if (parse_attr->mod_hdr_acts.num_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - kfree(parse_attr->mod_hdr_actions); + dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) attr->split_count = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 9d5fcf6..3848ec7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -108,6 +108,17 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +struct mlx5e_tc_mod_hdr_acts { + int num_actions; + int max_actions; + void *actions; +}; + +int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} From patchwork Sun Feb 16 10:01:32 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238717 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hm0zrqz9sSG for ; Sun, 16 Feb 2020 21:02:00 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727640AbgBPKBp (ORCPT ); Sun, 16 Feb 2020 05:01:45 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44776 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1725951AbgBPKBo (ORCPT ); Sun, 16 Feb 2020 05:01:44 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:39 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce8007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 12/16] net/mlx5e: Move tc tunnel parsing logic with the rest at tc_tun module Date: Sun, 16 Feb 2020 12:01:32 +0200 Message-Id: <1581847296-19194-13-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Currently, tunnel parsing is split between en_tc and tc_tun. The next patch will replace the tunnel fields matching with a register match, and will not need this parsing. Move the tunnel parsing logic to tc_tun as a pre-step for skipping it in the next patch. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 112 ++++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 109 +------------------- 3 files changed, 112 insertions(+), 112 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index af4ebd2..608d0e07c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -469,10 +469,15 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level) + u8 *match_level) { struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + struct netlink_ext_ack *extack = f->common.extack; int err = 0; if (!tunnel) { @@ -499,6 +504,109 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, goto out; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + u16 addr_type; + + flow_rule_match_enc_control(rule, &match); + addr_type = match.key->addr_type; + + /* For tunnel addr_type used same key id`s as for non-tunnel */ + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.key->dst)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IP); + } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ETH_P_IPV6); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB + (priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + err = -EOPNOTSUPP; + goto out; + } + } + + /* Enforce DMAC when offloading incoming tunneled flows. + * Flow counters require a match on the DMAC. + */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), priv->netdev->dev_addr); + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; + out: return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 6f9a78c..1630f0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -76,8 +76,7 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - void *headers_c, - void *headers_v, u8 *match_level); + u8 *match_level); int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d844c05..1ddb360 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1677,122 +1677,15 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); - struct flow_rule *rule = flow_cls_offload_flow_rule(f); int err; - err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, - headers_c, headers_v, match_level); + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, match_level); if (err) { NL_SET_ERR_MSG_MOD(extack, "failed to parse tunnel attributes"); return err; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - u16 addr_type; - - flow_rule_match_enc_control(rule, &match); - addr_type = match.key->addr_type; - - /* For tunnel addr_type used same key id`s as for non-tunnel */ - if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_match_ipv4_addrs match; - - flow_rule_match_enc_ipv4_addrs(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(match.key->src)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(match.key->dst)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IP); - } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_match_ipv6_addrs match; - - flow_rule_match_enc_ipv6_addrs(rule, &match); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)); - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, - ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ETH_P_IPV6); - } - } - - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_match_ip match; - - flow_rule_match_enc_ip(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, - match.mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, - match.key->tos & 0x3); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, - match.mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, - match.key->tos >> 2); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, - match.mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, - match.key->ttl); - - if (match.mask->ttl && - !MLX5_CAP_ESW_FLOWTABLE_FDB - (priv->mdev, - ft_field_support.outer_ipv4_ttl)) { - NL_SET_ERR_MSG_MOD(extack, - "Matching on TTL is not supported"); - return -EOPNOTSUPP; - } - - } - - /* Enforce DMAC when offloading incoming tunneled flows. - * Flow counters require a match on the DMAC. - */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), priv->netdev->dev_addr); - - /* let software handle IP fragments */ - MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); - return 0; } From patchwork Sun Feb 16 10:01:33 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238726 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hr0Ylhz9sSG for ; Sun, 16 Feb 2020 21:02:04 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727833AbgBPKBs (ORCPT ); Sun, 16 Feb 2020 05:01:48 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52982 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1725951AbgBPKBq (ORCPT ); Sun, 16 Feb 2020 05:01:46 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:40 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ce9007834; Sun, 16 Feb 2020 12:01:39 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 13/16] net/mlx5e: Disallow inserting vxlan/vlan egress rules without decap/pop Date: Sun, 16 Feb 2020 12:01:33 +0200 Message-Id: <1581847296-19194-14-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Currently, rules on tunnel devices can be offloaded without decap action when a vlan pop action exists. Similarly, the driver will offload rules on vlan interfaces with no pop action when a decap action exists. Disallow the faulty behavior by checking that vlan egress rules do pop or drop and vxlan egress rules do decap, as intended. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1ddb360..17dba59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2675,6 +2675,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct net_device *filter_dev = parse_attr->filter_dev; + bool drop_action, decap_action, pop_action; u32 actions; if (mlx5e_is_eswitch_flow(flow)) @@ -2682,11 +2684,19 @@ static bool actions_match_supported(struct mlx5e_priv *priv, else actions = flow->nic_attr->action; - if (flow_flag_test(flow, EGRESS) && - !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) - return false; + drop_action = actions & MLX5_FLOW_CONTEXT_ACTION_DROP; + decap_action = actions & MLX5_FLOW_CONTEXT_ACTION_DECAP; + pop_action = actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + + if (flow_flag_test(flow, EGRESS) && !drop_action) { + /* If no drop, we must decap (vxlan) or pop (vlan) */ + if (mlx5e_get_tc_tun(filter_dev) && !decap_action) + return false; + else if (is_vlan_dev(filter_dev) && !pop_action) + return false; + else + return false; /* Sanity */ + } if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, From patchwork Sun Feb 16 10:01:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238725 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hq4Z8fz9sRh for ; Sun, 16 Feb 2020 21:02:03 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727921AbgBPKCA (ORCPT ); Sun, 16 Feb 2020 05:02:00 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44745 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726802AbgBPKBs (ORCPT ); Sun, 16 Feb 2020 05:01:48 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:40 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ceA007834; Sun, 16 Feb 2020 12:01:40 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 14/16] net/mlx5e: Support inner header rewrite with goto action Date: Sun, 16 Feb 2020 12:01:34 +0200 Message-Id: <1581847296-19194-15-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org The hardware supports header rewrite of outer headers only. To perform header rewrite on inner headers, we must first decapsulate the packet. Currently, the hardware decap action is explicitly set by the tc tunnel_key unset action. However, with goto action the user won't use the tunnel_key unset action. In addition, header rewrites actions will not apply to the inner header as done by the software model. To support this, we will map each tunnel matches seen on a tc rule to a unique tunnel id, implicity add a decap action on tc chain 0 flows, and mark the packets with this unique tunnel id. Tunnel matches on the decapsulated tunnel on later chains will match on this unique id instead of the actual packet. We will also use this mapping to restore the tunnel info metadata on miss. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 5 + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 472 ++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 13 + 3 files changed, 445 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 6ff7d90..4cdb36e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -81,6 +81,11 @@ struct mlx5_rep_uplink_priv { struct mutex unready_flows_lock; struct list_head unready_flows; struct work_struct reoffload_flows_work; + + /* maps tun_info to a unique id*/ + struct mapping_ctx *tunnel_mapping; + /* maps tun_enc_opts to a unique id*/ + struct mapping_ctx *tunnel_enc_opts_mapping; }; struct mlx5e_rep_priv { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 17dba59..3f1b812 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -55,10 +55,13 @@ #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" +#include "en/mapping.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "diag/en_tc_tracepoint.h" +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) + struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; @@ -134,6 +137,8 @@ struct mlx5e_tc_flow { refcount_t refcnt; struct rcu_head rcu_head; struct completion init_done; + int tunnel_id; /* the mapped tunnel id of this flow */ + union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -151,14 +156,105 @@ struct mlx5e_tc_flow_parse_attr { #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) +struct tunnel_match_key { + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_keyid enc_key_id; + struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_ip enc_ip; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + + int filter_ifindex; +}; + +/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. + * Upper TUNNEL_INFO_BITS for general tunnel info. + * Lower ENC_OPTS_BITS bits for enc_opts. + */ +#define TUNNEL_INFO_BITS 6 +#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) +#define ENC_OPTS_BITS 2 +#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) +#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) +#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) + struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [CHAIN_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, .moffset = 0, .mlen = 2, }, + [TUNNEL_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, + .moffset = 3, + .mlen = 1, + .soffset = MLX5_BYTE_OFF(fte_match_param, + misc_parameters_2.metadata_reg_c_1), + }, }; +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); + +void +mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask) +{ + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + void *headers_c = spec->match_criteria; + void *headers_v = spec->match_value; + void *fmask, *fval; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + mask = cpu_to_be32(mask) >> (32 - (match_len * 8)); + data = cpu_to_be32(data) >> (32 - (match_len * 8)); + + memcpy(fmask, &mask, match_len); + memcpy(fval, &data, match_len); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + int err; + + err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB, + mod_hdr_acts); + if (err) + return err; + + modact = mod_hdr_acts->actions + + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 4) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset * 8); + MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, data, data); + mod_hdr_acts->num_actions++; + + return 0; +} + struct mlx5e_hairpin { struct mlx5_hairpin *pair; @@ -216,8 +312,6 @@ struct mlx5e_mod_hdr_entry { int compl_result; }; -#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) - static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); @@ -1281,6 +1375,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr; int out_index; + mlx5e_put_flow_tunnel_id(flow); + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); @@ -1670,43 +1766,267 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } +static int flow_has_tc_fwd_action(struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_action *flow_action = &rule->action; + const struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_GOTO: + return true; + default: + continue; + } + } + + return false; +} + +static int +enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, + struct flow_dissector_key_enc_opts *opts, + struct netlink_ext_ack *extack, + bool *dont_care) +{ + struct geneve_opt *opt; + int off = 0; + + *dont_care = true; + + while (opts->len > off) { + opt = (struct geneve_opt *)&opts->data[off]; + + if (!(*dont_care) || opt->opt_class || opt->type || + memchr_inv(opt->opt_data, 0, opt->length * 4)) { + *dont_care = false; + + if (opt->opt_class != U16_MAX || + opt->type != U8_MAX || + memchr_inv(opt->opt_data, 0xFF, + opt->length * 4)) { + NL_SET_ERR_MSG(extack, + "Partial match of tunnel options in chain > 0 isn't supported"); + netdev_warn(priv->netdev, + "Partial match of tunnel options in chain > 0 isn't supported"); + return -EOPNOTSUPP; + } + } + + off += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +#define COPY_DISSECTOR(rule, diss_key, dst)\ +({ \ + struct flow_rule *__rule = (rule);\ + typeof(dst) __dst = dst;\ +\ + memcpy(__dst,\ + skb_flow_dissector_target(__rule->match.dissector,\ + diss_key,\ + __rule->match.key),\ + sizeof(*__dst));\ +}) + +static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct flow_cls_offload *f, + struct net_device *filter_dev) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; + struct flow_match_enc_opts enc_opts_match; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tunnel_match_key tunnel_key; + bool enc_opts_is_dont_care = true; + u32 tun_id, enc_opts_id = 0; + struct mlx5_eswitch *esw; + u32 value, mask; + int err; + + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + memset(&tunnel_key, 0, sizeof(tunnel_key)); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, + &tunnel_key.enc_control); + if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + &tunnel_key.enc_ipv4); + else + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + &tunnel_key.enc_ipv6); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, + &tunnel_key.enc_tp); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, + &tunnel_key.enc_key_id); + tunnel_key.filter_ifindex = filter_dev->ifindex; + + err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); + if (err) + return err; + + flow_rule_match_enc_opts(rule, &enc_opts_match); + err = enc_opts_is_dont_care_or_full_match(priv, + enc_opts_match.mask, + extack, + &enc_opts_is_dont_care); + if (err) + goto err_enc_opts; + + if (!enc_opts_is_dont_care) { + err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_match.key, &enc_opts_id); + if (err) + goto err_enc_opts; + } + + value = tun_id << ENC_OPTS_BITS | enc_opts_id; + mask = enc_opts_id ? TUNNEL_ID_MASK : + (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); + + if (attr->chain) { + mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, + TUNNEL_TO_REG, value, mask); + } else { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + err = mlx5e_tc_match_to_reg_set(priv->mdev, + mod_hdr_acts, + TUNNEL_TO_REG, value); + if (err) + goto err_set; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + flow->tunnel_id = value; + return 0; + +err_set: + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +err_enc_opts: + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + return err; +} + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) +{ + u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (tun_id) + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +} static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, - struct net_device *filter_dev, u8 *match_level) + struct net_device *filter_dev, + u8 *match_level, + bool *match_inner) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = f->common.extack; + bool needs_mapping, sets_mapping; int err; - err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, match_level); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "failed to parse tunnel attributes"); - return err; + if (!mlx5e_is_eswitch_flow(flow)) + return -EOPNOTSUPP; + + needs_mapping = !!flow->esw_attr->chain; + sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f); + *match_inner = !needs_mapping; + + if ((needs_mapping || sets_mapping) && + !mlx5_eswitch_vport_match_metadata_enabled(esw)) { + NL_SET_ERR_MSG(extack, + "Chains on tunnel devices isn't supported without register metadata support"); + netdev_warn(priv->netdev, + "Chains on tunnel devices isn't supported without register metadata support"); + return -EOPNOTSUPP; } - return 0; + if (!flow->esw_attr->chain) { + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + match_level); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, + "Failed to parse tunnel attributes"); + return err; + } + + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + } + + if (!needs_mapping && !sets_mapping) + return 0; + + return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); } -static void *get_match_headers_criteria(u32 flags, - struct mlx5_flow_spec *spec) +static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) { - return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); +} + +static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); +} + +static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); +} + +static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); } static void *get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? - MLX5_ADDR_OF(fte_match_param, spec->match_value, - inner_headers) : - MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); + get_match_inner_headers_value(spec) : + get_match_outer_headers_value(spec); +} + +static void *get_match_headers_criteria(u32 flags, + struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_criteria(spec) : + get_match_outer_headers_criteria(spec); } static int mlx5e_flower_parse_meta(struct net_device *filter_dev, @@ -1744,6 +2064,7 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, } static int __parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, @@ -1793,18 +2114,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, - outer_match_level)) - return -EOPNOTSUPP; + bool match_inner = false; - /* At this point, header pointers should point to the inner - * headers, outer header were already set by parse_tunnel_attr - */ - match_level = inner_match_level; - headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); - headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, - spec); + err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, + outer_match_level, &match_inner); + if (err) + return err; + + if (match_inner) { + /* header pointers should point to the inner headers + * if the packet was decapsulated already. + * outer headers are set by parse_tunnel_attr. + */ + match_level = inner_match_level; + headers_c = get_match_inner_headers_criteria(spec); + headers_v = get_match_inner_headers_value(spec); + } } err = mlx5e_flower_parse_meta(filter_dev, f); @@ -2121,8 +2446,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, inner_match_level = MLX5_MATCH_NONE; outer_match_level = MLX5_MATCH_NONE; - err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, - &outer_match_level); + err = __parse_cls_flower(priv, flow, spec, f, filter_dev, + &inner_match_level, &outer_match_level); non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? outer_match_level : inner_match_level; @@ -2676,7 +3001,7 @@ static bool actions_match_supported(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct net_device *filter_dev = parse_attr->filter_dev; - bool drop_action, decap_action, pop_action; + bool drop_action, pop_action; u32 actions; if (mlx5e_is_eswitch_flow(flow)) @@ -2685,17 +3010,15 @@ static bool actions_match_supported(struct mlx5e_priv *priv, actions = flow->nic_attr->action; drop_action = actions & MLX5_FLOW_CONTEXT_ACTION_DROP; - decap_action = actions & MLX5_FLOW_CONTEXT_ACTION_DECAP; pop_action = actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; if (flow_flag_test(flow, EGRESS) && !drop_action) { - /* If no drop, we must decap (vxlan) or pop (vlan) */ - if (mlx5e_get_tc_tun(filter_dev) && !decap_action) - return false; - else if (is_vlan_dev(filter_dev) && !pop_action) + /* We only support filters on tunnel device, or on vlan + * devices if they have pop/drop action + */ + if (!mlx5e_get_tc_tun(filter_dev) || + (is_vlan_dev(filter_dev) && !pop_action)) return false; - else - return false; /* Sanity */ } if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@ -3248,9 +3571,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; + bool encap = false, decap = false; + u32 action = attr->action; int err, i, if_count = 0; - bool encap = false; - u32 action = 0; if (!flow_action_has_entries(flow_action)) return -EINVAL; @@ -3427,7 +3750,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->split_count = attr->out_count; break; case FLOW_ACTION_TUNNEL_DECAP: - action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + decap = true; break; case FLOW_ACTION_GOTO: { u32 dest_chain = act->chain_index; @@ -3491,6 +3814,22 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (attr->dest_chain) { + if (decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG(extack, + "Decap with goto isn't supported"); + netdev_warn(priv->netdev, + "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); return -EOPNOTSUPP; @@ -4215,12 +4554,55 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) int mlx5e_tc_esw_init(struct rhashtable *tc_ht) { - return rhashtable_init(tc_ht, &tc_ht_params); + const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts); + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *priv; + struct mapping_ctx *mapping; + int err; + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + + mapping = mapping_create(sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_tun_mapping; + } + uplink_priv->tunnel_mapping = mapping; + + mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_enc_opts_mapping; + } + uplink_priv->tunnel_enc_opts_mapping = mapping; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + goto err_ht_init; + + return err; + +err_ht_init: + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); +err_enc_opts_mapping: + mapping_destroy(uplink_priv->tunnel_mapping); +err_tun_mapping: + netdev_warn(priv->netdev, + "Failed to initialize tc (eswitch), err: %d", err); + return err; } void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) { + struct mlx5_rep_uplink_priv *uplink_priv; + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); + + uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); + mapping_destroy(uplink_priv->tunnel_mapping); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 3848ec7..2fab76b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -93,12 +93,15 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, enum mlx5e_tc_attr_to_reg { CHAIN_TO_REG, + TUNNEL_TO_REG, }; struct mlx5e_tc_attr_to_reg_mapping { int mfield; /* rewrite field */ int moffset; /* offset of mfield */ int mlen; /* bytes to rewrite/match */ + + int soffset; /* offset of spec for match */ }; extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; @@ -114,6 +117,16 @@ struct mlx5e_tc_mod_hdr_acts { void *actions; }; +int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask); + int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, int namespace, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); From patchwork Sun Feb 16 10:01:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238729 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hx5B7dz9sP7 for ; Sun, 16 Feb 2020 21:02:09 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727778AbgBPKBr (ORCPT ); Sun, 16 Feb 2020 05:01:47 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:53000 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727723AbgBPKBq (ORCPT ); Sun, 16 Feb 2020 05:01:46 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:40 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ceB007834; Sun, 16 Feb 2020 12:01:40 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 15/16] net/mlx5: E-Switch, Get reg_c1 value on miss Date: Sun, 16 Feb 2020 12:01:35 +0200 Message-Id: <1581847296-19194-16-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org The HW model implicitly decapsulates tunnels on chain 0 and sets reg_c1 with the mapped tunnel id. On miss, the packet does not have the outer header and the driver restores the tunnel information from the tunnel id. Getting reg_c1 value in software requires enabling reg_c1 loopback and copying reg_c1 to reg_b. reg_b comes up on CQE as cqe->imm_inval_pkey. Use the reg_c0 restoration rules to also copy reg_c1 to reg_B. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 31 +++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a94d91c..e7dd2ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -191,6 +191,7 @@ struct mlx5_eswitch_fdb { struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads_restore; struct mlx5_flow_group *restore_group; + struct mlx5_modify_hdr *restore_copy_hdr_id; struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 81c2cbf..e4bd53e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -598,9 +598,11 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) esw_vport_context.fdb_to_vport_reg_c_id); if (enable) - fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0 | + MLX5_FDB_TO_VPORT_REG_C_1; else - fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + fdb_to_vport_reg_c_id &= ~(MLX5_FDB_TO_VPORT_REG_C_0 | + MLX5_FDB_TO_VPORT_REG_C_1); MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); @@ -861,7 +863,9 @@ struct mlx5_flow_handle * misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id; flow_context = &spec->flow_context; flow_context->flags |= FLOW_CONTEXT_HAS_TAG; @@ -1217,16 +1221,19 @@ static void esw_destroy_restore_table(struct mlx5_eswitch *esw) { struct mlx5_esw_offload *offloads = &esw->offloads; + mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id); mlx5_destroy_flow_group(offloads->restore_group); mlx5_destroy_flow_table(offloads->ft_offloads_restore); } static int esw_create_restore_table(struct mlx5_eswitch *esw) { + u8 modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *ns; + struct mlx5_modify_hdr *mod_hdr; void *match_criteria, *misc; struct mlx5_flow_table *ft; struct mlx5_flow_group *g; @@ -1275,11 +1282,29 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw) goto err_group; } + MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, modact, src_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(copy_action_in, modact, dst_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_B); + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_KERNEL, 1, + modact); + if (IS_ERR(mod_hdr)) { + esw_warn(dev, "Failed to create restore mod header, err: %d\n", + err); + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + esw->offloads.ft_offloads_restore = ft; esw->offloads.restore_group = g; + esw->offloads.restore_copy_hdr_id = mod_hdr; return 0; +err_mod_hdr: + mlx5_destroy_flow_group(g); err_group: mlx5_destroy_flow_table(ft); out_free: From patchwork Sun Feb 16 10:01:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul Blakey X-Patchwork-Id: 1238716 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (no SPF record) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 48L2hl59kBz9sRh for ; Sun, 16 Feb 2020 21:01:59 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726715AbgBPKBo (ORCPT ); Sun, 16 Feb 2020 05:01:44 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:44769 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726142AbgBPKBn (ORCPT ); Sun, 16 Feb 2020 05:01:43 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from paulb@mellanox.com) with ESMTPS (AES256-SHA encrypted); 16 Feb 2020 12:01:40 +0200 Received: from reg-r-vrt-019-120.mtr.labs.mlnx (reg-r-vrt-019-120.mtr.labs.mlnx [10.213.19.120]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01GA1ceC007834; Sun, 16 Feb 2020 12:01:40 +0200 From: Paul Blakey To: Paul Blakey , Saeed Mahameed , Oz Shlomo , Jakub Kicinski , Vlad Buslov , David Miller , "netdev@vger.kernel.org" , Jiri Pirko , Roi Dayan Subject: [PATCH net-next-mlx5 v3 16/16] net/mlx5e: Restore tunnel metadata on miss Date: Sun, 16 Feb 2020 12:01:36 +0200 Message-Id: <1581847296-19194-17-git-send-email-paulb@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1581847296-19194-1-git-send-email-paulb@mellanox.com> References: <1581847296-19194-1-git-send-email-paulb@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org In tunnel and chains setup, we decapsulate the packets on first chain hop, if we miss on later chains, the packet will comes up without tunnel header, so it won't be taken by the tunnel device automatically, which fills the tunnel metadata, and further tc tunnel matches won't work. On miss, we get the tunnel mapping id, which was set on the chain 0 rule that decapsulated the packet. This rule matched the tunnel outer headers. From the tunnel mapping id, we get to this tunnel matches and restore the equivalent tunnel info metadata dst on the skb. We also set the skb->dev to the relevant device (tunnel device). Now further tc processing can be done on the relevant device. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 10 ++- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 110 ++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 9 +- 3 files changed, 117 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e0abe79..135b8a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1193,6 +1193,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; struct sk_buff *skb; @@ -1225,11 +1226,13 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); - if (!mlx5e_tc_rep_update_skb(cqe, skb)) + if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) goto free_wqe; napi_gro_receive(rq->cq.napi, skb); + mlx5_tc_rep_post_napi_receive(&tc_priv); + free_wqe: mlx5e_free_rx_wqe(rq, wi, true); wq_cyc_pop: @@ -1246,6 +1249,7 @@ void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; + struct mlx5e_tc_update_priv tc_priv = {}; struct mlx5e_rx_wqe_ll *wqe; struct mlx5_wq_ll *wq; struct sk_buff *skb; @@ -1278,11 +1282,13 @@ void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); - if (!mlx5e_tc_rep_update_skb(cqe, skb)) + if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv)) goto mpwrq_cqe_out; napi_gro_receive(rq->cq.napi, skb); + mlx5_tc_rep_post_napi_receive(&tc_priv); + mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3f1b812..70b5fe2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4635,19 +4635,102 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) mutex_unlock(&rpriv->unready_flows_lock); } +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct flow_dissector_key_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; + int err; + + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); + if (err) { + WARN_ON_ONCE(true); + netdev_dbg(priv->netdev, + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); + return false; + } + + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); + return false; + } + } + + tun_dst = tun_rx_dst(enc_opts.len); + if (!tun_dst) { + WARN_ON_ONCE(true); + return false; + } + + ip_tunnel_key_init(&tun_dst->u.tun_info.key, + key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + 0, /* label */ + key.enc_tp.src, key.enc_tp.dst, + key32_to_tunnel_id(key.enc_key_id.keyid), + TUNNEL_KEY); + + if (enc_opts.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data, + enc_opts.len, enc_opts.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set tun_dev so we do dev_put() after datapath */ + tc_priv->tun_dev = dev; + + skb->dev = dev; + + return true; +} +#endif /* CONFIG_NET_TC_SKB_EXT */ + bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, - struct sk_buff *skb) + struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv) { #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + u32 chain = 0, reg_c0, reg_c1, tunnel_id; struct tc_skb_ext *tc_skb_ext; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; - u32 chain = 0, reg_c0; + int tunnel_moffset; int err; reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) reg_c0 = 0; + reg_c1 = be32_to_cpu(cqe->imm_inval_pkey); if (!reg_c0) return true; @@ -4663,17 +4746,26 @@ bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, return false; } - if (!chain) - return true; + if (chain) { + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); - if (!tc_skb_ext) { - WARN_ON_ONCE(1); - return false; + tc_skb_ext->chain = chain; } - tc_skb_ext->chain = chain; + tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset; + tunnel_id = reg_c1 >> (8 * tunnel_moffset); + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); #endif /* CONFIG_NET_TC_SKB_EXT */ return true; } + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) +{ + if (tc_priv->tun_dev) + dev_put(tc_priv->tun_dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 2fab76b..21cbde4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -109,7 +109,14 @@ struct mlx5e_tc_attr_to_reg_mapping { bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, struct net_device *out_dev); -bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +struct mlx5e_tc_update_priv { + struct net_device *tun_dev; +}; + +bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv); + +void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv); struct mlx5e_tc_mod_hdr_acts { int num_actions;