From patchwork Mon Aug 26 13:44:57 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153187 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCv25TcTz9sP7 for ; Mon, 26 Aug 2019 23:45:34 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732151AbfHZNpe (ORCPT ); Mon, 26 Aug 2019 09:45:34 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52957 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732070AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFP000366; Mon, 26 Aug 2019 16:45:14 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov Subject: [PATCH net-next v3 01/10] net: sched: protect block offload-related fields with rw_semaphore Date: Mon, 26 Aug 2019 16:44:57 +0300 Message-Id: <20190826134506.9705-2-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org In order to remove dependency on rtnl lock, extend tcf_block with 'cb_lock' rwsem and use it to protect flow_block->cb_list and related counters from concurrent modification. The lock is taken in read mode for read-only traversal of cb_list in tc_setup_cb_call() and write mode in all other cases. This approach ensures that: - cb_list is not changed concurrently while filters is being offloaded on block. - block->nooffloaddevcnt is checked while holding the lock in read mode, but is only changed by bind/unbind code when holding the cb_lock in write mode to prevent concurrent modification. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- Notes: Changes from V1 to V2: - Rename 'errout' label to 'err_unlock'. include/net/sch_generic.h | 2 ++ net/sched/cls_api.c | 45 +++++++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d9f359af0b93..a3eaf5f9d28f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -396,6 +397,7 @@ struct tcf_block { refcount_t refcnt; struct net *net; struct Qdisc *q; + struct rw_semaphore cb_lock; /* protects cb_list and offload counters */ struct flow_block flow_block; struct list_head owner_list; bool keep_dst; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e0d8b456e9f5..959b7ca1ca02 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -568,9 +568,11 @@ static void tc_indr_block_ing_cmd(struct net_device *dev, bo.block = &block->flow_block; + down_write(&block->cb_lock); cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); tcf_block_setup(block, &bo); + up_write(&block->cb_lock); } static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) @@ -661,6 +663,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_inc; @@ -669,24 +672,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, */ if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_unlock; } err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) - return err; + goto err_unlock; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); + up_write(&block->cb_lock); return 0; no_offload_dev_inc: - if (tcf_block_offload_in_use(block)) - return -EOPNOTSUPP; + if (tcf_block_offload_in_use(block)) { + err = -EOPNOTSUPP; + goto err_unlock; + } + err = 0; block->nooffloaddevcnt++; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); - return 0; +err_unlock: + up_write(&block->cb_lock); + return err; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, @@ -695,6 +705,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (!dev->netdev_ops->ndo_setup_tc) @@ -702,10 +713,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; + up_write(&block->cb_lock); return; no_offload_dev_dec: WARN_ON(block->nooffloaddevcnt-- == 0); + up_write(&block->cb_lock); } static int @@ -820,6 +833,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->owner_list); @@ -1355,6 +1369,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, struct tcf_proto *tp, *tp_prev; int err; + lockdep_assert_held(&block->cb_lock); + for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, @@ -1393,6 +1409,8 @@ static int tcf_block_bind(struct tcf_block *block, struct flow_block_cb *block_cb, *next; int err, i = 0; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry(block_cb, &bo->cb_list, list) { err = tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, true, @@ -1427,6 +1445,8 @@ static void tcf_block_unbind(struct tcf_block *block, { struct flow_block_cb *block_cb, *next; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, @@ -2987,19 +3007,26 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, int ok_count = 0; int err; + down_read(&block->cb_lock); /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) - return -EOPNOTSUPP; + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { - if (err_stop) - return err; + if (err_stop) { + ok_count = err; + goto err_unlock; + } } else { ok_count++; } } +err_unlock: + up_read(&block->cb_lock); return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); From patchwork Mon Aug 26 13:44:58 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153188 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCv41QlLz9sPK for ; Mon, 26 Aug 2019 23:45:36 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732130AbfHZNpa (ORCPT ); Mon, 26 Aug 2019 09:45:30 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52961 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732072AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFQ000366; Mon, 26 Aug 2019 16:45:14 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov , Jiri Pirko Subject: [PATCH net-next v3 02/10] net: sched: change tcf block offload counter type to atomic_t Date: Mon, 26 Aug 2019 16:44:58 +0300 Message-Id: <20190826134506.9705-3-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org As a preparation for running proto ops functions without rtnl lock, change offload counter type to atomic. This is necessary to allow updating the counter by multiple concurrent users when offloading filters to hardware from unlocked classifiers. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- include/net/sch_generic.h | 7 ++++--- net/sched/cls_api.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a3eaf5f9d28f..d778c502decd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -401,7 +402,7 @@ struct tcf_block { struct flow_block flow_block; struct list_head owner_list; bool keep_dst; - unsigned int offloadcnt; /* Number of oddloaded filters */ + atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ struct { struct tcf_chain *chain; @@ -443,7 +444,7 @@ static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; - block->offloadcnt++; + atomic_inc(&block->offloadcnt); } static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) @@ -451,7 +452,7 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; - block->offloadcnt--; + atomic_dec(&block->offloadcnt); } static inline void diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 959b7ca1ca02..f2c2f8159e35 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -629,7 +629,7 @@ static void tc_indr_block_call(struct tcf_block *block, static bool tcf_block_offload_in_use(struct tcf_block *block) { - return block->offloadcnt; + return atomic_read(&block->offloadcnt); } static int tcf_block_offload_cmd(struct tcf_block *block, From patchwork Mon Aug 26 13:44:59 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153189 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCv652Ppz9sP7 for ; Mon, 26 Aug 2019 23:45:38 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732157AbfHZNph (ORCPT ); Mon, 26 Aug 2019 09:45:37 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52958 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1731819AbfHZNp0 (ORCPT ); Mon, 26 Aug 2019 09:45:26 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFR000366; Mon, 26 Aug 2019 16:45:15 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov Subject: [PATCH net-next v3 03/10] net: sched: refactor block offloads counter usage Date: Mon, 26 Aug 2019 16:44:59 +0300 Message-Id: <20190826134506.9705-4-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Without rtnl lock protection filters can no longer safely manage block offloads counter themselves. Refactor cls API to protect block offloadcnt with tcf_block->cb_lock that is already used to protect driver callback list and nooffloaddevcnt counter. The counter can be modified by concurrent tasks by new functions that execute block callbacks (which is safe with previous patch that changed its type to atomic_t), however, block bind/unbind code that checks the counter value takes cb_lock in write mode to exclude any concurrent modifications. This approach prevents race conditions between bind/unbind and callback execution code but allows for concurrency for tc rule update path. Move block offload counter, filter in hardware counter and filter flags management from classifiers into cls hardware offloads API. Make functions tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API private. Implement following new cls API to be used instead: tc_setup_cb_add() - non-destructive filter add. If filter that wasn't already in hardware is successfully offloaded, increment block offloads counter, set filter in hardware counter and flag. On failure, previously offloaded filter is considered to be intact and offloads counter is not decremented. tc_setup_cb_replace() - destructive filter replace. Release existing filter block offload counter and reset its in hardware counter and flag. Set new filter in hardware counter and flag. On failure, previously offloaded filter is considered to be destroyed and offload counter is decremented. tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block offloads counter. tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and call tc_cls_offload_cnt_update() if cb() didn't return an error. Refactor all offload-capable classifiers to atomically offload filters to hardware, change block offload counter, and set filter in hardware counter and flag by means of the new cls API functions. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- Notes: Changes from V2 to V3: - Only return error from tc_setup_cb_reoffload() when adding new filter and when skip_sw flag is set. Changes from V1 to V2: - Remove redundant brackets around cnt. - Rename 'errout' label to 'err_unlock'. - Revert changing oldprog to cls_bpf.oldprog in conditional. - Add new helper tc_setup_cb_reoffload(). - Make tc_cls_offload_cnt_update() static and remove its export. - Change tc_setup_cb_*() helpers to only return zero or error code. include/net/pkt_cls.h | 17 +++- include/net/sch_generic.h | 31 ------- net/sched/cls_api.c | 176 +++++++++++++++++++++++++++++++++++--- net/sched/cls_bpf.c | 38 ++++---- net/sched/cls_flower.c | 38 +++----- net/sched/cls_matchall.c | 27 +++--- net/sched/cls_u32.c | 29 +++---- 7 files changed, 233 insertions(+), 123 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 64999ffcb486..612232492f67 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -506,7 +506,22 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop); + void *type_data, bool err_stop, bool rtnl_held); +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held); +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held); +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held); +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count); unsigned int tcf_exts_num_actions(struct tcf_exts *exts); struct tc_cls_u32_knode { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d778c502decd..f90e3b2a3065 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -439,37 +439,6 @@ static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) #define tcf_proto_dereference(p, tp) \ rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) -static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) -{ - if (*flags & TCA_CLS_FLAGS_IN_HW) - return; - *flags |= TCA_CLS_FLAGS_IN_HW; - atomic_inc(&block->offloadcnt); -} - -static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) -{ - if (!(*flags & TCA_CLS_FLAGS_IN_HW)) - return; - *flags &= ~TCA_CLS_FLAGS_IN_HW; - atomic_dec(&block->offloadcnt); -} - -static inline void -tc_cls_offload_cnt_update(struct tcf_block *block, u32 *cnt, - u32 *flags, bool add) -{ - if (add) { - if (!*cnt) - tcf_block_offload_inc(block, flags); - (*cnt)++; - } else { - (*cnt)--; - if (!*cnt) - tcf_block_offload_dec(block, flags); - } -} - static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f2c2f8159e35..6e612984e4a6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3000,37 +3000,185 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop) +static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) +{ + if (*flags & TCA_CLS_FLAGS_IN_HW) + return; + *flags |= TCA_CLS_FLAGS_IN_HW; + atomic_inc(&block->offloadcnt); +} + +static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) +{ + if (!(*flags & TCA_CLS_FLAGS_IN_HW)) + return; + *flags &= ~TCA_CLS_FLAGS_IN_HW; + atomic_dec(&block->offloadcnt); +} + +static void tc_cls_offload_cnt_update(struct tcf_block *block, + struct tcf_proto *tp, u32 *cnt, + u32 *flags, u32 diff, bool add) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + if (add) { + if (!*cnt) + tcf_block_offload_inc(block, flags); + *cnt += diff; + } else { + *cnt -= diff; + if (!*cnt) + tcf_block_offload_dec(block, flags); + } + spin_unlock(&tp->lock); +} + +static void +tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, + u32 *cnt, u32 *flags) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + tcf_block_offload_dec(block, flags); + *cnt = 0; + spin_unlock(&tp->lock); +} + +static int +__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) { struct flow_block_cb *block_cb; int ok_count = 0; int err; - down_read(&block->cb_lock); - /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) { - ok_count = -EOPNOTSUPP; - goto err_unlock; - } - list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { - if (err_stop) { - ok_count = err; - goto err_unlock; - } + if (err_stop) + return err; } else { ok_count++; } } -err_unlock: + return ok_count; +} + +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop, bool rtnl_held) +{ + int ok_count; + + down_read(&block->cb_lock); + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); up_read(&block->cb_lock); return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); +/* Non-destructive filter add. If filter that wasn't already in hardware is + * successfully offloaded, increment block offloads counter. On failure, + * previously offloaded filter is considered to be intact and offloads counter + * is not decremented. + */ + +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + int ok_count; + + down_read(&block->cb_lock); + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, + ok_count, true); +err_unlock: + up_read(&block->cb_lock); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_add); + +/* Destructive filter replace. If filter that wasn't already in hardware is + * successfully offloaded, increment block offload counter. On failure, + * previously offloaded filter is considered to be destroyed and offload counter + * is decremented. + */ + +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held) +{ + int ok_count; + + down_read(&block->cb_lock); + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, new_in_hw_count, new_flags, + ok_count, true); +err_unlock: + up_read(&block->cb_lock); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_replace); + +/* Destroy filter and decrement block offload counter, if filter was previously + * offloaded. + */ + +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + int ok_count; + + down_read(&block->cb_lock); + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); + up_read(&block->cb_lock); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_destroy); + +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count) +{ + int err = cb(type, type_data, cb_priv); + + if (err) { + if (add && tc_skip_sw(*flags)) + return err; + } else { + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, + add); + } + + return 0; +} +EXPORT_SYMBOL(tc_setup_cb_reoffload); + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts) { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 3f7a9c02b70c..bf10bdaf5012 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -163,17 +163,19 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf.exts_integrated = obj->exts_integrated; if (oldprog) - tcf_block_offload_dec(block, &oldprog->gen_flags); + err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &oldprog->gen_flags, + &oldprog->in_hw_count, + &prog->gen_flags, &prog->in_hw_count, + true); + else + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &prog->gen_flags, + &prog->in_hw_count, true); - err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); - if (prog) { - if (err < 0) { - cls_bpf_offload_cmd(tp, oldprog, prog, extack); - return err; - } else if (err > 0) { - prog->in_hw_count = err; - tcf_block_offload_inc(block, &prog->gen_flags); - } + if (prog && err) { + cls_bpf_offload_cmd(tp, oldprog, prog, extack); + return err; } if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) @@ -230,7 +232,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false); + tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true); } static int cls_bpf_init(struct tcf_proto *tp) @@ -673,15 +675,11 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv); - if (err) { - if (add && tc_skip_sw(prog->gen_flags)) - return err; - continue; - } - - tc_cls_offload_cnt_update(block, &prog->in_hw_count, - &prog->gen_flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF, + &cls_bpf, cb_priv, &prog->gen_flags, + &prog->in_hw_count); + if (err) + return err; } return 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 054123742e32..cb816bbbd376 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -419,10 +419,10 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, cls_flower.command = FLOW_CLS_DESTROY; cls_flower.cookie = (unsigned long) f; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false, + &f->flags, &f->in_hw_count, true); spin_lock(&tp->lock); list_del_init(&f->hw_list); - tcf_block_offload_dec(block, &f->flags); spin_unlock(&tp->lock); if (!rtnl_held) @@ -466,18 +466,13 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, goto errout; } - err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, + skip_sw, &f->flags, &f->in_hw_count, true); kfree(cls_flower.rule); - if (err < 0) { + if (err) { fl_hw_destroy_filter(tp, f, true, NULL); goto errout; - } else if (err > 0) { - f->in_hw_count = err; - err = 0; - spin_lock(&tp->lock); - tcf_block_offload_inc(block, &f->flags); - spin_unlock(&tp->lock); } if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { @@ -509,7 +504,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, cls_flower.cookie = (unsigned long) f; cls_flower.classid = f->res.classid; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, @@ -1844,21 +1839,16 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.classid = f->res.classid; - err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, + TC_SETUP_CLSFLOWER, &cls_flower, + cb_priv, &f->flags, + &f->in_hw_count); kfree(cls_flower.rule); if (err) { - if (add && tc_skip_sw(f->flags)) { - __fl_put(f); - return err; - } - goto next_flow; + __fl_put(f); + return err; } - - spin_lock(&tp->lock); - tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags, - add); - spin_unlock(&tp->lock); next_flow: __fl_put(f); } @@ -1886,7 +1876,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain, /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); kfree(cls_flower.rule); return 0; @@ -1902,7 +1892,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain, cls_flower.command = FLOW_CLS_TMPLT_DESTROY; cls_flower.cookie = (unsigned long) tmplt; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); } static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 455ea2793f9b..911d1ea28bb2 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -75,8 +75,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); - tcf_block_offload_dec(block, &head->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false, + &head->flags, &head->in_hw_count, true); } static int mall_replace_hw_filter(struct tcf_proto *tp, @@ -109,15 +109,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return err; } - err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, + skip_sw, &head->flags, &head->in_hw_count, true); kfree(cls_mall.rule); - if (err < 0) { + if (err) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; - } else if (err > 0) { - head->in_hw_count = err; - tcf_block_offload_inc(block, &head->flags); } if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) @@ -312,16 +310,13 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, return 0; } - err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, + &cls_mall, cb_priv, &head->flags, + &head->in_hw_count); kfree(cls_mall.rule); - if (err) { - if (add && tc_skip_sw(head->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add); + if (err) + return err; return 0; } @@ -337,7 +332,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_STATS; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, cls_mall.stats.pkts, cls_mall.stats.lastused); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 8614088edd1b..a0e6fac613de 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -480,7 +480,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); + tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, @@ -498,7 +498,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true); if (err < 0) { u32_clear_hw_hnode(tp, h, NULL); return err; @@ -522,8 +522,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); - tcf_block_offload_dec(block, &n->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false, + &n->flags, &n->in_hw_count, true); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, @@ -552,13 +552,11 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, if (n->ht_down) cls_u32.knode.link_handle = ht->handle; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); - if (err < 0) { + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw, + &n->flags, &n->in_hw_count, true); + if (err) { u32_remove_hw_knode(tp, n, NULL); return err; - } else if (err > 0) { - n->in_hw_count = err; - tcf_block_offload_inc(block, &n->flags); } if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) @@ -1201,14 +1199,11 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.knode.link_handle = ht->handle; } - err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); - if (err) { - if (add && tc_skip_sw(n->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32, + &cls_u32, cb_priv, &n->flags, + &n->in_hw_count); + if (err) + return err; return 0; } From patchwork Mon Aug 26 13:45:00 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153185 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCv104z4z9sPP for ; Mon, 26 Aug 2019 23:45:33 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732138AbfHZNpb (ORCPT ); Mon, 26 Aug 2019 09:45:31 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52966 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732060AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFS000366; Mon, 26 Aug 2019 16:45:15 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov Subject: [PATCH net-next v3 04/10] net: sched: notify classifier on successful offload add/delete Date: Mon, 26 Aug 2019 16:45:00 +0300 Message-Id: <20190826134506.9705-5-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org To remove dependency on rtnl lock, extend classifier ops with new ops->hw_add() and ops->hw_del() callbacks. Call them from cls API while holding cb_lock every time filter if successfully added to or deleted from hardware. Implement the new API in flower classifier. Use it to manage hw_filters list under cb_lock protection, instead of relying on rtnl lock to synchronize with concurrent fl_reoffload() call. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- Notes: Changes from V1 to V2: - Keep success flow unindented in tc_setup_cb_{add|replace}(). include/net/sch_generic.h | 4 ++++ net/sched/cls_api.c | 19 +++++++++++++++++-- net/sched/cls_flower.c | 33 ++++++++++++++++++++++++++------- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f90e3b2a3065..c4fbbaff30a2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -312,6 +312,10 @@ struct tcf_proto_ops { int (*reoffload)(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); + void (*hw_add)(struct tcf_proto *tp, + void *type_data); + void (*hw_del)(struct tcf_proto *tp, + void *type_data); void (*bind_class)(void *, u32, unsigned long); void * (*tmplt_create)(struct net *net, struct tcf_chain *chain, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6e612984e4a6..8b807e75fae2 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3099,6 +3099,11 @@ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, } ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); if (ok_count > 0) tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, ok_count, true); @@ -3130,11 +3135,18 @@ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, } tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); if (ok_count > 0) - tc_cls_offload_cnt_update(block, tp, new_in_hw_count, new_flags, - ok_count, true); + tc_cls_offload_cnt_update(block, tp, new_in_hw_count, + new_flags, ok_count, true); err_unlock: up_read(&block->cb_lock); return ok_count < 0 ? ok_count : 0; @@ -3155,6 +3167,9 @@ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + up_read(&block->cb_lock); return ok_count < 0 ? ok_count : 0; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index cb816bbbd376..5cb694469b51 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -421,9 +421,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false, &f->flags, &f->in_hw_count, true); - spin_lock(&tp->lock); - list_del_init(&f->hw_list); - spin_unlock(&tp->lock); if (!rtnl_held) rtnl_unlock(); @@ -433,7 +430,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = fl_head_dereference(tp); struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; bool skip_sw = tc_skip_sw(f->flags); @@ -480,9 +476,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, goto errout; } - spin_lock(&tp->lock); - list_add(&f->hw_list, &head->hw_filters); - spin_unlock(&tp->lock); errout: if (!rtnl_held) rtnl_unlock(); @@ -1856,6 +1849,30 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, return 0; } +static void fl_hw_add(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + list_add(&f->hw_list, &head->hw_filters); + spin_unlock(&tp->lock); +} + +static void fl_hw_del(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + + spin_lock(&tp->lock); + if (!list_empty(&f->hw_list)) + list_del_init(&f->hw_list); + spin_unlock(&tp->lock); +} + static int fl_hw_create_tmplt(struct tcf_chain *chain, struct fl_flow_tmplt *tmplt) { @@ -2516,6 +2533,8 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .reoffload = fl_reoffload, + .hw_add = fl_hw_add, + .hw_del = fl_hw_del, .dump = fl_dump, .bind_class = fl_bind_class, .tmplt_create = fl_tmplt_create, From patchwork Mon Aug 26 13:45:01 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153182 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCtv1KNHz9sP7 for ; Mon, 26 Aug 2019 23:45:27 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732092AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52988 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732068AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFT000366; Mon, 26 Aug 2019 16:45:15 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov , Jiri Pirko Subject: [PATCH net-next v3 05/10] net: sched: add API for registering unlocked offload block callbacks Date: Mon, 26 Aug 2019 16:45:01 +0300 Message-Id: <20190826134506.9705-6-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Extend struct flow_block_offload with "unlocked_driver_cb" flag to allow registering and unregistering block hardware offload callbacks that do not require caller to hold rtnl lock. Extend tcf_block with additional lockeddevcnt counter that is incremented for each non-unlocked driver callback attached to device. This counter is necessary to conditionally obtain rtnl lock before calling hardware callbacks in following patches. Register mlx5 tc block offload callbacks as "unlocked". Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +++ include/net/flow_offload.h | 1 + include/net/sch_generic.h | 1 + net/sched/cls_api.c | 6 ++++++ 5 files changed, 13 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fa4bf2d4bcd4..8592b98d0e70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3470,10 +3470,12 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; switch (type) { #ifdef CONFIG_MLX5_ESWITCH case TC_SETUP_BLOCK: + f->unlocked_driver_cb = true; return flow_block_cb_setup_simple(type_data, &mlx5e_block_cb_list, mlx5e_setup_tc_block_cb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3c0d36b2b91c..e7ac6233037d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -763,6 +763,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; + f->unlocked_driver_cb = true; f->driver_block_list = &mlx5e_block_cb_list; switch (f->command) { @@ -1245,9 +1246,11 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; switch (type) { case TC_SETUP_BLOCK: + f->unlocked_driver_cb = true; return flow_block_cb_setup_simple(type_data, &mlx5e_rep_block_cb_list, mlx5e_rep_setup_tc_cb, diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 757fa84de654..fc881875f856 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -284,6 +284,7 @@ struct flow_block_offload { enum flow_block_command command; enum flow_block_binder_type binder_type; bool block_shared; + bool unlocked_driver_cb; struct net *net; struct flow_block *block; struct list_head cb_list; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c4fbbaff30a2..43f5b7ed02bd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -408,6 +408,7 @@ struct tcf_block { bool keep_dst; atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ + unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ struct { struct tcf_chain *chain; struct list_head filter_chain_list; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8b807e75fae2..1a39779bdbad 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1418,6 +1418,8 @@ static int tcf_block_bind(struct tcf_block *block, bo->extack); if (err) goto err_unroll; + if (!bo->unlocked_driver_cb) + block->lockeddevcnt++; i++; } @@ -1433,6 +1435,8 @@ static int tcf_block_bind(struct tcf_block *block, block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } flow_block_cb_free(block_cb); } @@ -1454,6 +1458,8 @@ static void tcf_block_unbind(struct tcf_block *block, NULL); list_del(&block_cb->list); flow_block_cb_free(block_cb); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } } From patchwork Mon Aug 26 13:45:02 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153183 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCty3CVYz9sP9 for ; Mon, 26 Aug 2019 23:45:28 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732118AbfHZNp0 (ORCPT ); Mon, 26 Aug 2019 09:45:26 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52960 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732062AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFU000366; Mon, 26 Aug 2019 16:45:15 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov Subject: [PATCH net-next v3 06/10] net: sched: conditionally obtain rtnl lock in cls hw offloads API Date: Mon, 26 Aug 2019 16:45:02 +0300 Message-Id: <20190826134506.9705-7-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org In order to remove dependency on rtnl lock from offloads code of classifiers, take rtnl lock conditionally before executing driver callbacks. Only obtain rtnl lock if block is bound to devices that require it. Block bind/unbind code is rtnl-locked and obtains block->cb_lock while holding rtnl lock. Obtain locks in same order in tc_setup_cb_*() functions to prevent deadlock. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- Notes: Changes from V2 to V3: - Don't speculatively take rtnl lock when caller already has it. Changes from V1 to V2: - Speculatively read block->lockeddevcnt in tc_setup_cb_*() to obtain rtnl mutex without retry when block is bound to locked device. net/sched/cls_api.c | 65 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1a39779bdbad..3c103cf9fd0d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3076,11 +3076,28 @@ __tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held) { + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; +retry: + if (take_rtnl) + rtnl_lock(); down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); @@ -3095,9 +3112,23 @@ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *flags, unsigned int *in_hw_count, bool rtnl_held) { + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; +retry: + if (take_rtnl) + rtnl_lock(); down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + /* Make sure all netdevs sharing this block are offload-capable. */ if (block->nooffloaddevcnt && err_stop) { ok_count = -EOPNOTSUPP; @@ -3115,6 +3146,8 @@ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, ok_count, true); err_unlock: up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_add); @@ -3131,9 +3164,23 @@ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, u32 *new_flags, unsigned int *new_in_hw_count, bool rtnl_held) { + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; +retry: + if (take_rtnl) + rtnl_lock(); down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + /* Make sure all netdevs sharing this block are offload-capable. */ if (block->nooffloaddevcnt && err_stop) { ok_count = -EOPNOTSUPP; @@ -3155,6 +3202,8 @@ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, new_flags, ok_count, true); err_unlock: up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_replace); @@ -3167,9 +3216,23 @@ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *flags, unsigned int *in_hw_count, bool rtnl_held) { + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; +retry: + if (take_rtnl) + rtnl_lock(); down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); @@ -3177,6 +3240,8 @@ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, tp->ops->hw_del(tp, type_data); up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_destroy); From patchwork Mon Aug 26 13:45:03 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153190 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCv75JrSz9sP9 for ; Mon, 26 Aug 2019 23:45:39 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732161AbfHZNpi (ORCPT ); Mon, 26 Aug 2019 09:45:38 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52964 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1730502AbfHZNp0 (ORCPT ); Mon, 26 Aug 2019 09:45:26 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFV000366; Mon, 26 Aug 2019 16:45:15 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov , Jiri Pirko Subject: [PATCH net-next v3 07/10] net: sched: take rtnl lock in tc_setup_flow_action() Date: Mon, 26 Aug 2019 16:45:03 +0300 Message-Id: <20190826134506.9705-8-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org In order to allow using new flow_action infrastructure from unlocked classifiers, modify tc_setup_flow_action() to accept new 'rtnl_held' argument. Take rtnl lock before accessing tc_action data. This is necessary to protect from concurrent action replace. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- include/net/pkt_cls.h | 2 +- net/sched/cls_api.c | 17 +++++++++++++---- net/sched/cls_flower.c | 6 ++++-- net/sched/cls_matchall.c | 4 ++-- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 612232492f67..a48824bc1489 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -504,7 +504,7 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts); + const struct tcf_exts *exts, bool rtnl_held); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3c103cf9fd0d..8751bb8a682f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3266,14 +3266,17 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, EXPORT_SYMBOL(tc_setup_cb_reoffload); int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, bool rtnl_held) { const struct tc_action *act; - int i, j, k; + int i, j, k, err = 0; if (!exts) return 0; + if (!rtnl_held) + rtnl_lock(); + j = 0; tcf_exts_for_each_action(i, act, exts) { struct flow_action_entry *entry; @@ -3318,6 +3321,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->vlan.prio = tcf_vlan_push_prio(act); break; default: + err = -EOPNOTSUPP; goto err_out; } } else if (is_tcf_tunnel_set(act)) { @@ -3335,6 +3339,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_ADD; break; default: + err = -EOPNOTSUPP; goto err_out; } entry->mangle.htype = tcf_pedit_htype(act, k); @@ -3393,15 +3398,19 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_PTYPE; entry->ptype = tcf_skbedit_ptype(act); } else { + err = -EOPNOTSUPP; goto err_out; } if (!is_tcf_pedit(act)) j++; } - return 0; + err_out: - return -EOPNOTSUPP; + if (!rtnl_held) + rtnl_unlock(); + + return err; } EXPORT_SYMBOL(tc_setup_flow_action); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5cb694469b51..fb305bd45d93 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -452,7 +452,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + true); if (err) { kfree(cls_flower.rule); if (skip_sw) @@ -1819,7 +1820,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + true); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 911d1ea28bb2..3266f25011cc 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); @@ -300,7 +300,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); if (add && tc_skip_sw(head->flags)) { From patchwork Mon Aug 26 13:45:04 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153184 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCtz7331z9sPK for ; Mon, 26 Aug 2019 23:45:31 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732123AbfHZNp1 (ORCPT ); Mon, 26 Aug 2019 09:45:27 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52965 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732050AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFW000366; Mon, 26 Aug 2019 16:45:15 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov , Jiri Pirko Subject: [PATCH net-next v3 08/10] net: sched: take reference to action dev before calling offloads Date: Mon, 26 Aug 2019 16:45:04 +0300 Message-Id: <20190826134506.9705-9-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org In order to remove dependency on rtnl lock when calling hardware offload API, take reference to action mirred dev when initializing flow_action structure in tc_setup_flow_action(). Implement function tc_cleanup_flow_action(), use it to release the device after hardware offload API is done using it. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- include/net/pkt_cls.h | 2 ++ net/sched/cls_api.c | 32 ++++++++++++++++++++++++++++++++ net/sched/cls_flower.c | 2 ++ 3 files changed, 36 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a48824bc1489..e553fc80eb23 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -505,6 +505,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held); +void tc_cleanup_flow_action(struct flow_action *flow_action); + int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8751bb8a682f..d988737693e4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3265,6 +3265,27 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, } EXPORT_SYMBOL(tc_setup_cb_reoffload); +void tc_cleanup_flow_action(struct flow_action *flow_action) +{ + struct flow_action_entry *entry; + int i; + + flow_action_for_each(i, entry, flow_action) { + switch (entry->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED_INGRESS: + if (entry->dev) + dev_put(entry->dev); + break; + default: + break; + } + } +} +EXPORT_SYMBOL(tc_cleanup_flow_action); + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held) { @@ -3294,15 +3315,23 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; entry->dev = tcf_mirred_dev(act); + if (entry->dev) + dev_hold(entry->dev); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; entry->dev = tcf_mirred_dev(act); + if (entry->dev) + dev_hold(entry->dev); } else if (is_tcf_mirred_ingress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT_INGRESS; entry->dev = tcf_mirred_dev(act); + if (entry->dev) + dev_hold(entry->dev); } else if (is_tcf_mirred_ingress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED_INGRESS; entry->dev = tcf_mirred_dev(act); + if (entry->dev) + dev_hold(entry->dev); } else if (is_tcf_vlan(act)) { switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: @@ -3410,6 +3439,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, if (!rtnl_held) rtnl_unlock(); + if (err) + tc_cleanup_flow_action(flow_action); + return err; } EXPORT_SYMBOL(tc_setup_flow_action); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fb305bd45d93..2852fe6f50d2 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -465,6 +465,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw, &f->flags, &f->in_hw_count, true); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { @@ -1838,6 +1839,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_SETUP_CLSFLOWER, &cls_flower, cb_priv, &f->flags, &f->in_hw_count); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { From patchwork Mon Aug 26 13:45:05 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153192 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCvD4Z5Bz9sP7 for ; Mon, 26 Aug 2019 23:45:43 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732083AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52986 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732058AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFX000366; Mon, 26 Aug 2019 16:45:15 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov , Jiri Pirko Subject: [PATCH net-next v3 09/10] net: sched: copy tunnel info when setting flow_action entry->tunnel Date: Mon, 26 Aug 2019 16:45:05 +0300 Message-Id: <20190826134506.9705-10-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org In order to remove dependency on rtnl lock, modify tc_setup_flow_action() to copy tunnel info, instead of just saving pointer to tunnel_key action tunnel info. This is necessary to prevent concurrent action overwrite from releasing tunnel info while it is being used by rtnl-unlocked driver. Implement helper tcf_tunnel_info_copy() that is used to copy tunnel info with all its options to dynamically allocated memory block. Modify tc_cleanup_flow_action() to free dynamically allocated tunnel info. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- include/net/tc_act/tc_tunnel_key.h | 17 +++++++++++++++++ net/sched/cls_api.c | 9 ++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 7c3f777c168c..0689d9bcdf84 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -59,4 +59,21 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) return NULL; #endif } + +static inline struct ip_tunnel_info * +tcf_tunnel_info_copy(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct ip_tunnel_info *tun = tcf_tunnel_info(a); + + if (tun) { + size_t tun_size = sizeof(*tun) + tun->options_len; + struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size, + GFP_KERNEL); + + return tun_copy; + } +#endif + return NULL; +} #endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d988737693e4..671ca905dbb5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3279,6 +3279,9 @@ void tc_cleanup_flow_action(struct flow_action *flow_action) if (entry->dev) dev_put(entry->dev); break; + case FLOW_ACTION_TUNNEL_ENCAP: + kfree(entry->tunnel); + break; default: break; } @@ -3355,7 +3358,11 @@ int tc_setup_flow_action(struct flow_action *flow_action, } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; - entry->tunnel = tcf_tunnel_info(act); + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) { + err = -ENOMEM; + goto err_out; + } } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { From patchwork Mon Aug 26 13:45:06 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vlad Buslov X-Patchwork-Id: 1153186 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 46HCv20X7nz9sP9 for ; Mon, 26 Aug 2019 23:45:34 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732142AbfHZNpc (ORCPT ); Mon, 26 Aug 2019 09:45:32 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52987 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732059AbfHZNpZ (ORCPT ); Mon, 26 Aug 2019 09:45:25 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from vladbu@mellanox.com) with ESMTPS (AES256-SHA encrypted); 26 Aug 2019 16:45:15 +0300 Received: from reg-r-vrt-018-180.mtr.labs.mlnx. (reg-r-vrt-018-180.mtr.labs.mlnx [10.215.1.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x7QDjEFY000366; Mon, 26 Aug 2019 16:45:15 +0300 From: Vlad Buslov To: netdev@vger.kernel.org Cc: jhs@mojatatu.com, xiyou.wangcong@gmail.com, jiri@resnulli.us, davem@davemloft.net, jakub.kicinski@netronome.com, pablo@netfilter.org, Vlad Buslov , Jiri Pirko Subject: [PATCH net-next v3 10/10] net: sched: flower: don't take rtnl lock for cls hw offloads API Date: Mon, 26 Aug 2019 16:45:06 +0300 Message-Id: <20190826134506.9705-11-vladbu@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190826134506.9705-1-vladbu@mellanox.com> References: <20190826134506.9705-1-vladbu@mellanox.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Don't manually take rtnl lock in flower classifier before calling cls hardware offloads API. Instead, pass rtnl lock status via 'rtnl_held' parameter. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko --- net/sched/cls_flower.c | 53 +++++++++++++----------------------------- 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2852fe6f50d2..74221e3351c3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -412,18 +412,13 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; - if (!rtnl_held) - rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = FLOW_CLS_DESTROY; cls_flower.cookie = (unsigned long) f; tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false, - &f->flags, &f->in_hw_count, true); + &f->flags, &f->in_hw_count, rtnl_held); - if (!rtnl_held) - rtnl_unlock(); } static int fl_hw_replace_filter(struct tcf_proto *tp, @@ -435,14 +430,9 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(f->flags); int err = 0; - if (!rtnl_held) - rtnl_lock(); - cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) { - err = -ENOMEM; - goto errout; - } + if (!cls_flower.rule) + return -ENOMEM; tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = FLOW_CLS_REPLACE; @@ -453,36 +443,30 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.classid = f->res.classid; err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, - true); + rtnl_held); if (err) { kfree(cls_flower.rule); - if (skip_sw) + if (skip_sw) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - else - err = 0; - goto errout; + return err; + } + return 0; } err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, - skip_sw, &f->flags, &f->in_hw_count, true); + skip_sw, &f->flags, &f->in_hw_count, rtnl_held); tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { - fl_hw_destroy_filter(tp, f, true, NULL); - goto errout; - } - - if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { - err = -EINVAL; - goto errout; + fl_hw_destroy_filter(tp, f, rtnl_held, NULL); + return err; } -errout: - if (!rtnl_held) - rtnl_unlock(); + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; - return err; + return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, @@ -491,22 +475,17 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; - if (!rtnl_held) - rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = FLOW_CLS_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.classid = f->res.classid; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, + rtnl_held); tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, cls_flower.stats.lastused); - - if (!rtnl_held) - rtnl_unlock(); } static void __fl_put(struct cls_fl_filter *f)