From patchwork Tue May 26 18:56:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Ahern X-Patchwork-Id: 1298402 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org (client-ip=23.128.96.18; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=kernel.org Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=kernel.org header.i=@kernel.org header.a=rsa-sha256 header.s=default header.b=VxmNFSxr; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by ozlabs.org (Postfix) with ESMTP id 49Wkdp6t6Pz9sWq for ; Wed, 27 May 2020 05:33:18 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2390411AbgEZTdR (ORCPT ); Tue, 26 May 2020 15:33:17 -0400 Received: from mail.kernel.org ([198.145.29.99]:49146 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2389797AbgEZS4W (ORCPT ); Tue, 26 May 2020 14:56:22 -0400 Received: from C02YQ0RWLVCF.internal.digitalocean.com (c-73-181-34-237.hsd1.co.comcast.net [73.181.34.237]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 6B544208B6; Tue, 26 May 2020 18:56:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1590519381; bh=zeB4mtttKcrEXgCO0k4Jm6TfTcjFPut91gntKbxyIQA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VxmNFSxr690I3/91/3edFgHROpPrDrzA4yk1JG1jOMekU47ADT+nOgOjJuWrlWkx1 XtwCF9p+y/VarGxIecuI+A9sVg0SmINiHDwzsFqOqeo3PtMbNhoKxJlJVOkRDuW3S2 Op7avDYJlEt2VbWRSLpWpgs1cLCXMywWjfWRFjO4= From: David Ahern To: netdev@vger.kernel.org Cc: davem@davemloft.net, kuba@kernel.org, nikolay@cumulusnetworks.com, David Ahern Subject: [PATCH net v2 1/5] nexthops: Move code from remove_nexthop_from_groups to remove_nh_grp_entry Date: Tue, 26 May 2020 12:56:14 -0600 Message-Id: <20200526185618.43748-2-dsahern@kernel.org> X-Mailer: git-send-email 2.21.1 (Apple Git-122.3) In-Reply-To: <20200526185618.43748-1-dsahern@kernel.org> References: <20200526185618.43748-1-dsahern@kernel.org> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: David Ahern Move nh_grp dereference and check for removing nexthop group due to all members gone into remove_nh_grp_entry. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: David Ahern Acked-by: Nikolay Aleksandrov --- net/ipv4/nexthop.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 715e14475220..b4b772f120a7 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -694,17 +694,21 @@ static void nh_group_rebalance(struct nh_group *nhg) } } -static void remove_nh_grp_entry(struct nh_grp_entry *nhge, - struct nh_group *nhg, +static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, struct nl_info *nlinfo) { + struct nexthop *nhp = nhge->nh_parent; struct nexthop *nh = nhge->nh; struct nh_grp_entry *nhges; + struct nh_group *nhg; bool found = false; int i; WARN_ON(!nh); + list_del(&nhge->nh_list); + + nhg = rtnl_dereference(nhp->nh_grp); nhges = nhg->nh_entries; for (i = 0; i < nhg->num_nh; ++i) { if (found) { @@ -728,7 +732,11 @@ static void remove_nh_grp_entry(struct nh_grp_entry *nhge, nexthop_put(nh); if (nlinfo) - nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo); + nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); + + /* if this group has no more entries then remove it */ + if (!nhg->num_nh) + remove_nexthop(net, nhp, nlinfo); } static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, @@ -736,17 +744,8 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, { struct nh_grp_entry *nhge, *tmp; - list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) { - struct nh_group *nhg; - - list_del(&nhge->nh_list); - nhg = rtnl_dereference(nhge->nh_parent->nh_grp); - remove_nh_grp_entry(nhge, nhg, nlinfo); - - /* if this group has no more entries then remove it */ - if (!nhg->num_nh) - remove_nexthop(net, nhge->nh_parent, nlinfo); - } + list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) + remove_nh_grp_entry(net, nhge, nlinfo); } static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) From patchwork Tue May 26 18:56:15 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Ahern X-Patchwork-Id: 1298401 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org (client-ip=23.128.96.18; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=kernel.org Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=kernel.org header.i=@kernel.org header.a=rsa-sha256 header.s=default header.b=c2Frl8Cy; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by ozlabs.org (Postfix) with ESMTP id 49Wkdn4VThz9sWw for ; Wed, 27 May 2020 05:33:17 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2390908AbgEZTdQ (ORCPT ); Tue, 26 May 2020 15:33:16 -0400 Received: from mail.kernel.org ([198.145.29.99]:49186 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2389802AbgEZS4X (ORCPT ); Tue, 26 May 2020 14:56:23 -0400 Received: from C02YQ0RWLVCF.internal.digitalocean.com (c-73-181-34-237.hsd1.co.comcast.net [73.181.34.237]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id E3750208DB; Tue, 26 May 2020 18:56:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1590519382; bh=JHG4u04g44eiuHlgDVIQOAvUt/HD3vMhtyq+qyyYGjM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=c2Frl8CyPeG4VaPQMlZpHfR1iGUOg7lt/7Hc+41dBrt3VvIidDgmSGYGvbY5vn+3c GUIZ2SF8fnowqH4oyaOVYu6lSJnYE9Wa1RxDw6qJvJvW8QJzwLS5QmZofZXFmh5wzU CoeHsWG4FQ07b2oKsCe4qQDNQs63RVAyuvu2vr8o= From: David Ahern To: netdev@vger.kernel.org Cc: davem@davemloft.net, kuba@kernel.org, nikolay@cumulusnetworks.com, David Ahern Subject: [PATCH net v2 2/5] nexthops: don't modify published nexthop groups Date: Tue, 26 May 2020 12:56:15 -0600 Message-Id: <20200526185618.43748-3-dsahern@kernel.org> X-Mailer: git-send-email 2.21.1 (Apple Git-122.3) In-Reply-To: <20200526185618.43748-1-dsahern@kernel.org> References: <20200526185618.43748-1-dsahern@kernel.org> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Nikolay Aleksandrov We must avoid modifying published nexthop groups while they might be in use, otherwise we might see NULL ptr dereferences. In order to do that we allocate 2 nexthoup group structures upon nexthop creation and swap between them when we have to delete an entry. The reason is that we can't fail nexthop group removal, so we can't handle allocation failure thus we move the extra allocation on creation where we can safely fail and return ENOMEM. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David Ahern --- include/net/nexthop.h | 1 + net/ipv4/nexthop.c | 91 +++++++++++++++++++++++++++---------------- 2 files changed, 59 insertions(+), 33 deletions(-) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index c440ccc861fc..8a343519ed7a 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -70,6 +70,7 @@ struct nh_grp_entry { }; struct nh_group { + struct nh_group *spare; /* spare group for removals */ u16 num_nh; bool mpath; bool has_v4; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index b4b772f120a7..563f71bcb2d7 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -63,9 +63,16 @@ static void nexthop_free_mpath(struct nexthop *nh) int i; nhg = rcu_dereference_raw(nh->nh_grp); - for (i = 0; i < nhg->num_nh; ++i) - WARN_ON(nhg->nh_entries[i].nh); + for (i = 0; i < nhg->num_nh; ++i) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + WARN_ON(!list_empty(&nhge->nh_list)); + nexthop_put(nhge->nh); + } + + WARN_ON(nhg->spare == nhg); + + kfree(nhg->spare); kfree(nhg); } @@ -697,46 +704,53 @@ static void nh_group_rebalance(struct nh_group *nhg) static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, struct nl_info *nlinfo) { + struct nh_grp_entry *nhges, *new_nhges; struct nexthop *nhp = nhge->nh_parent; struct nexthop *nh = nhge->nh; - struct nh_grp_entry *nhges; - struct nh_group *nhg; - bool found = false; - int i; + struct nh_group *nhg, *newg; + int i, j; WARN_ON(!nh); - list_del(&nhge->nh_list); - nhg = rtnl_dereference(nhp->nh_grp); - nhges = nhg->nh_entries; - for (i = 0; i < nhg->num_nh; ++i) { - if (found) { - nhges[i-1].nh = nhges[i].nh; - nhges[i-1].weight = nhges[i].weight; - list_del(&nhges[i].nh_list); - list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list); - } else if (nhg->nh_entries[i].nh == nh) { - found = true; - } - } + newg = nhg->spare; - if (WARN_ON(!found)) + /* last entry, keep it visible and remove the parent */ + if (nhg->num_nh == 1) { + remove_nexthop(net, nhp, nlinfo); return; + } - nhg->num_nh--; - nhg->nh_entries[nhg->num_nh].nh = NULL; + newg->has_v4 = nhg->has_v4; + newg->mpath = nhg->mpath; + newg->num_nh = nhg->num_nh; - nh_group_rebalance(nhg); + /* copy old entries to new except the one getting removed */ + nhges = nhg->nh_entries; + new_nhges = newg->nh_entries; + for (i = 0, j = 0; i < nhg->num_nh; ++i) { + /* current nexthop getting removed */ + if (nhg->nh_entries[i].nh == nh) { + newg->num_nh--; + continue; + } - nexthop_put(nh); + list_del(&nhges[i].nh_list); + new_nhges[j].nh_parent = nhges[i].nh_parent; + new_nhges[j].nh = nhges[i].nh; + new_nhges[j].weight = nhges[i].weight; + list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list); + j++; + } + + nh_group_rebalance(newg); + rcu_assign_pointer(nhp->nh_grp, newg); + + list_del(&nhge->nh_list); + nexthop_put(nhge->nh); if (nlinfo) nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); - - /* if this group has no more entries then remove it */ - if (!nhg->num_nh) - remove_nexthop(net, nhp, nlinfo); } static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, @@ -746,6 +760,9 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) remove_nh_grp_entry(net, nhge, nlinfo); + + /* make sure all see the newly published array before releasing rtnl */ + synchronize_rcu(); } static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) @@ -759,10 +776,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) if (WARN_ON(!nhge->nh)) continue; - list_del(&nhge->nh_list); - nexthop_put(nhge->nh); - nhge->nh = NULL; - nhg->num_nh--; + list_del_init(&nhge->nh_list); } } @@ -1085,6 +1099,7 @@ static struct nexthop *nexthop_create_group(struct net *net, { struct nlattr *grps_attr = cfg->nh_grp; struct nexthop_grp *entry = nla_data(grps_attr); + u16 num_nh = nla_len(grps_attr) / sizeof(*entry); struct nh_group *nhg; struct nexthop *nh; int i; @@ -1095,12 +1110,21 @@ static struct nexthop *nexthop_create_group(struct net *net, nh->is_group = 1; - nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry)); + nhg = nexthop_grp_alloc(num_nh); if (!nhg) { kfree(nh); return ERR_PTR(-ENOMEM); } + /* spare group used for removals */ + nhg->spare = nexthop_grp_alloc(num_nh); + if (!nhg) { + kfree(nhg); + kfree(nh); + return NULL; + } + nhg->spare->spare = nhg; + for (i = 0; i < nhg->num_nh; ++i) { struct nexthop *nhe; struct nh_info *nhi; @@ -1132,6 +1156,7 @@ static struct nexthop *nexthop_create_group(struct net *net, for (; i >= 0; --i) nexthop_put(nhg->nh_entries[i].nh); + kfree(nhg->spare); kfree(nhg); kfree(nh); From patchwork Tue May 26 18:56:16 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Ahern X-Patchwork-Id: 1298398 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org (client-ip=23.128.96.18; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=kernel.org Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=kernel.org header.i=@kernel.org header.a=rsa-sha256 header.s=default header.b=rBijDMVe; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by ozlabs.org (Postfix) with ESMTP id 49Wkdd1TClz9sWl for ; Wed, 27 May 2020 05:33:09 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389806AbgEZS41 (ORCPT ); Tue, 26 May 2020 14:56:27 -0400 Received: from mail.kernel.org ([198.145.29.99]:49216 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2389808AbgEZS4X (ORCPT ); Tue, 26 May 2020 14:56:23 -0400 Received: from C02YQ0RWLVCF.internal.digitalocean.com (c-73-181-34-237.hsd1.co.comcast.net [73.181.34.237]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 6A3D12158C; Tue, 26 May 2020 18:56:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1590519382; bh=6N28XbjQ7Rh/AuwOgk3EG7NoGIEnJ4WNZJvhxBs0Pek=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=rBijDMVe2gM0AApUsHcEPc/WPLx5RVaL85T9qBw+47GlYsLO4Q+sP1NmUXhS/MdzG kHyGrelVqqrup8Yj1WhwXA4+JMDpLhAFpqybgcbgeNZTnFXLEgmC+ih+S8RdI3nOsL KyzK2m6FNc6ihZgGz9olSvBI8nEydtyE6ExyJKsY= From: David Ahern To: netdev@vger.kernel.org Cc: davem@davemloft.net, kuba@kernel.org, nikolay@cumulusnetworks.com, David Ahern Subject: [PATCH net v2 3/5] nexthop: Expand nexthop_is_multipath in a few places Date: Tue, 26 May 2020 12:56:16 -0600 Message-Id: <20200526185618.43748-4-dsahern@kernel.org> X-Mailer: git-send-email 2.21.1 (Apple Git-122.3) In-Reply-To: <20200526185618.43748-1-dsahern@kernel.org> References: <20200526185618.43748-1-dsahern@kernel.org> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: David Ahern I got too fancy consolidating checks on multipath type. The result is that path lookups can access 2 different nh_grp structs as exposed by Nik's torture tests. Expand nexthop_is_multipath within nexthop.h to avoid multiple, nh_grp dereferences and make decisions based on the consistent struct. Only 2 places left using nexthop_is_multipath are within IPv6, both only check that the nexthop is a multipath for a branching decision which are acceptable. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: David Ahern Acked-by: Nikolay Aleksandrov --- include/net/nexthop.h | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 8a343519ed7a..f09e8d7d9886 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -137,21 +137,20 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh) { unsigned int rc = 1; - if (nexthop_is_multipath(nh)) { + if (nh->is_group) { struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); - rc = nh_grp->num_nh; + if (nh_grp->mpath) + rc = nh_grp->num_nh; } return rc; } static inline -struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel) +struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) { - const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); - /* for_nexthops macros in fib_semantics.c grabs a pointer to * the nexthop before checking nhsel */ @@ -186,12 +185,14 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh) { const struct nh_info *nhi; - if (nexthop_is_multipath(nh)) { - if (nexthop_num_path(nh) > 1) - return false; - nh = nexthop_mpath_select(nh, 0); - if (!nh) + if (nh->is_group) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + if (nh_grp->num_nh > 1) return false; + + nh = nh_grp->nh_entries[0].nh; } nhi = rcu_dereference_rtnl(nh->nh_info); @@ -217,10 +218,15 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); - if (nexthop_is_multipath(nh)) { - nh = nexthop_mpath_select(nh, nhsel); - if (!nh) - return NULL; + if (nh->is_group) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + if (nh_grp->mpath) { + nh = nexthop_mpath_select(nh_grp, nhsel); + if (!nh) + return NULL; + } } nhi = rcu_dereference_rtnl(nh->nh_info); @@ -264,8 +270,11 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) { struct nh_info *nhi; - if (nexthop_is_multipath(nh)) { - nh = nexthop_mpath_select(nh, 0); + if (nh->is_group) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + nh = nexthop_mpath_select(nh_grp, 0); if (!nh) return NULL; } From patchwork Tue May 26 18:56:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Ahern X-Patchwork-Id: 1298399 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org (client-ip=23.128.96.18; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=kernel.org Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=kernel.org header.i=@kernel.org header.a=rsa-sha256 header.s=default header.b=k2wPjkAn; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by ozlabs.org (Postfix) with ESMTP id 49Wkdh4Zjsz9sWl for ; Wed, 27 May 2020 05:33:12 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2389828AbgEZS40 (ORCPT ); Tue, 26 May 2020 14:56:26 -0400 Received: from mail.kernel.org ([198.145.29.99]:49220 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2388663AbgEZS4X (ORCPT ); Tue, 26 May 2020 14:56:23 -0400 Received: from C02YQ0RWLVCF.internal.digitalocean.com (c-73-181-34-237.hsd1.co.comcast.net [73.181.34.237]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id E15E7208B6; Tue, 26 May 2020 18:56:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1590519383; bh=yNsGdnkwi3MCGrz0nWhBWVb160kiXOUhwsIe+xWy+Nk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=k2wPjkAnjTgy8FGa3OIK+7kCsLlu0k5YIGKzH4q3weKosgesZUKgaZ15YeiDmWeFL c8XdyH4g+xi7TLx/zUlK/FUJe8LfWr1Hsw1fJZsORddcDgB3Kp1jrfCoFhEI/DO0sE WM7Hxgw07+zyjPbLs59m/ivqCpKogyYhIIyjdLJo= From: David Ahern To: netdev@vger.kernel.org Cc: davem@davemloft.net, kuba@kernel.org, nikolay@cumulusnetworks.com, David Ahern Subject: [PATCH net v2 4/5] ipv4: Refactor nhc evaluation in fib_table_lookup Date: Tue, 26 May 2020 12:56:17 -0600 Message-Id: <20200526185618.43748-5-dsahern@kernel.org> X-Mailer: git-send-email 2.21.1 (Apple Git-122.3) In-Reply-To: <20200526185618.43748-1-dsahern@kernel.org> References: <20200526185618.43748-1-dsahern@kernel.org> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: David Ahern FIB lookups can return an entry that references an external nexthop. While walking the nexthop struct we do not want to make multiple calls into the nexthop code which can result in 2 different structs getting accessed - one returning the number of paths the rest of the loop seeing a different nh_grp struct. If the nexthop group shrunk, the result is an attempt to access a fib_nh_common that does not exist for the new nh_grp struct but did for the old one. To fix that move the device evaluation code to a helper that can be used for inline fib_nh path as well as external nexthops. Update the existing check for fi->nh in fib_table_lookup to call a new helper, nexthop_get_nhc_lookup, which walks the external nexthop with a single rcu dereference. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: David Ahern Acked-by: Nikolay Aleksandrov --- include/net/ip_fib.h | 2 ++ include/net/nexthop.h | 33 ++++++++++++++++++++++++++++ net/ipv4/fib_trie.c | 51 ++++++++++++++++++++++++++++++------------- 3 files changed, 71 insertions(+), 15 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b219a8fe0950..771ce068bc96 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -479,6 +479,8 @@ void fib_nh_common_release(struct fib_nh_common *nhc); void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri); void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); +bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, + const struct flowi4 *flp); static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { diff --git a/include/net/nexthop.h b/include/net/nexthop.h index f09e8d7d9886..9414ae46fc1c 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -233,6 +233,39 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) return &nhi->fib_nhc; } +/* called from fib_table_lookup with rcu_lock */ +static inline +struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, + int fib_flags, + const struct flowi4 *flp, + int *nhsel) +{ + struct nh_info *nhi; + + if (nh->is_group) { + struct nh_group *nhg = rcu_dereference(nh->nh_grp); + int i; + + for (i = 0; i < nhg->num_nh; i++) { + struct nexthop *nhe = nhg->nh_entries[i].nh; + + nhi = rcu_dereference(nhe->nh_info); + if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { + *nhsel = i; + return &nhi->fib_nhc; + } + } + } else { + nhi = rcu_dereference(nh->nh_info); + if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { + *nhsel = 0; + return &nhi->fib_nhc; + } + } + + return NULL; +} + static inline unsigned int fib_info_num_path(const struct fib_info *fi) { if (unlikely(fi->nh)) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4f334b425538..248f1c1959a6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1371,6 +1371,26 @@ static inline t_key prefix_mismatch(t_key key, struct key_vector *n) return (key ^ prefix) & (prefix | -prefix); } +bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, + const struct flowi4 *flp) +{ + if (nhc->nhc_flags & RTNH_F_DEAD) + return false; + + if (ip_ignore_linkdown(nhc->nhc_dev) && + nhc->nhc_flags & RTNH_F_LINKDOWN && + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) + return false; + + if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { + if (flp->flowi4_oif && + flp->flowi4_oif != nhc->nhc_oif) + return false; + } + + return true; +} + /* should be called with rcu_read_lock */ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags) @@ -1503,6 +1523,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, /* Step 3: Process the leaf, if that fails fall back to backtracing */ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; + struct fib_nh_common *nhc; int nhsel, err; if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { @@ -1528,26 +1549,25 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, if (fi->fib_flags & RTNH_F_DEAD) continue; - if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) { - err = fib_props[RTN_BLACKHOLE].error; - goto out_reject; + if (unlikely(fi->nh)) { + if (nexthop_is_blackhole(fi->nh)) { + err = fib_props[RTN_BLACKHOLE].error; + goto out_reject; + } + + nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp, + &nhsel); + if (nhc) + goto set_result; + goto miss; } for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { - struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); + nhc = fib_info_nhc(fi, nhsel); - if (nhc->nhc_flags & RTNH_F_DEAD) + if (!fib_lookup_good_nhc(nhc, fib_flags, flp)) continue; - if (ip_ignore_linkdown(nhc->nhc_dev) && - nhc->nhc_flags & RTNH_F_LINKDOWN && - !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) - continue; - if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { - if (flp->flowi4_oif && - flp->flowi4_oif != nhc->nhc_oif) - continue; - } - +set_result: if (!(fib_flags & FIB_LOOKUP_NOREF)) refcount_inc(&fi->fib_clntref); @@ -1568,6 +1588,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, return err; } } +miss: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_miss); #endif From patchwork Tue May 26 18:56:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Ahern X-Patchwork-Id: 1298400 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org (client-ip=23.128.96.18; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=kernel.org Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=kernel.org header.i=@kernel.org header.a=rsa-sha256 header.s=default header.b=uNrbjWeD; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by ozlabs.org (Postfix) with ESMTP id 49Wkdm0W3fz9sWl for ; Wed, 27 May 2020 05:33:15 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2390137AbgEZTdN (ORCPT ); Tue, 26 May 2020 15:33:13 -0400 Received: from mail.kernel.org ([198.145.29.99]:49200 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2389789AbgEZS4Y (ORCPT ); Tue, 26 May 2020 14:56:24 -0400 Received: from C02YQ0RWLVCF.internal.digitalocean.com (c-73-181-34-237.hsd1.co.comcast.net [73.181.34.237]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 678F82086A; Tue, 26 May 2020 18:56:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1590519383; bh=EDdkK6dIOQTKwvtUxwXYy4/4X/dxjHM+E4M4/arruR0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=uNrbjWeDR4TCYwBOSs4WRIv4ifr8HDt/kdpUbPMpRebOZkfmLC/pKHZFmtw5vDaBz IYXXp9oz+BmQbnTcejTwGcdN7hQ9boQkA15mfrguZiD9SYT89xBx018ipixMpZe4eB cn3Z9CLigy2Ifo6wqfg2o7QTJE7LvUPeX/WPZjIw= From: David Ahern To: netdev@vger.kernel.org Cc: davem@davemloft.net, kuba@kernel.org, nikolay@cumulusnetworks.com, David Ahern Subject: [PATCH net v2 5/5] ipv4: nexthop version of fib_info_nh_uses_dev Date: Tue, 26 May 2020 12:56:18 -0600 Message-Id: <20200526185618.43748-6-dsahern@kernel.org> X-Mailer: git-send-email 2.21.1 (Apple Git-122.3) In-Reply-To: <20200526185618.43748-1-dsahern@kernel.org> References: <20200526185618.43748-1-dsahern@kernel.org> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: David Ahern Similar to the last path, need to fix fib_info_nh_uses_dev for external nexthops to avoid referencing multiple nh_grp structs. Move the device check in fib_info_nh_uses_dev to a helper and create a nexthop version that is called if the fib_info uses an external nexthop. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: David Ahern Acked-by: Nikolay Aleksandrov --- include/net/ip_fib.h | 10 ++++++++++ include/net/nexthop.h | 25 +++++++++++++++++++++++++ net/ipv4/fib_frontend.c | 19 ++++++++++--------- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 771ce068bc96..2ec062aaa978 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -447,6 +447,16 @@ static inline int fib_num_tclassid_users(struct net *net) #endif int fib_unmerge(struct net *net); +static inline bool nhc_l3mdev_matches_dev(const struct fib_nh_common *nhc, +const struct net_device *dev) +{ + if (nhc->nhc_dev == dev || + l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) + return true; + + return false; +} + /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 9414ae46fc1c..8c9f1a718859 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -266,6 +266,31 @@ struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, return NULL; } +static inline bool nexthop_uses_dev(const struct nexthop *nh, + const struct net_device *dev) +{ + struct nh_info *nhi; + + if (nh->is_group) { + struct nh_group *nhg = rcu_dereference(nh->nh_grp); + int i; + + for (i = 0; i < nhg->num_nh; i++) { + struct nexthop *nhe = nhg->nh_entries[i].nh; + + nhi = rcu_dereference(nhe->nh_info); + if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) + return true; + } + } else { + nhi = rcu_dereference(nh->nh_info); + if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) + return true; + } + + return false; +} + static inline unsigned int fib_info_num_path(const struct fib_info *fi) { if (unlikely(fi->nh)) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1bf9da3a75f9..41079490a118 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -309,17 +309,18 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) { bool dev_match = false; #ifdef CONFIG_IP_ROUTE_MULTIPATH - int ret; + if (unlikely(fi->nh)) { + dev_match = nexthop_uses_dev(fi->nh, dev); + } else { + int ret; - for (ret = 0; ret < fib_info_num_path(fi); ret++) { - const struct fib_nh_common *nhc = fib_info_nhc(fi, ret); + for (ret = 0; ret < fib_info_num_path(fi); ret++) { + const struct fib_nh_common *nhc = fib_info_nhc(fi, ret); - if (nhc->nhc_dev == dev) { - dev_match = true; - break; - } else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) { - dev_match = true; - break; + if (nhc_l3mdev_matches_dev(nhc, dev)) { + dev_match = true; + break; + } } } #else