From patchwork Mon Mar 7 22:50:40 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kamal Mostafa X-Patchwork-Id: 593670 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from huckleberry.canonical.com (huckleberry.canonical.com [91.189.94.19]) by ozlabs.org (Postfix) with ESMTP id 35D691402A8; Tue, 8 Mar 2016 10:02:43 +1100 (AEDT) Received: from localhost ([127.0.0.1] helo=huckleberry.canonical.com) by huckleberry.canonical.com with esmtp (Exim 4.76) (envelope-from ) id 1ad4Ai-0004wi-2d; Mon, 07 Mar 2016 23:02:40 +0000 Received: from youngberry.canonical.com ([91.189.89.112]) by huckleberry.canonical.com with esmtps (TLS1.0:RSA_AES_256_CBC_SHA1:32) (Exim 4.76) (envelope-from ) id 1ad44D-0001CY-Pj for kernel-team@lists.ubuntu.com; Mon, 07 Mar 2016 22:55:57 +0000 Received: from 1.general.kamal.us.vpn ([10.172.68.52] helo=fourier) by youngberry.canonical.com with esmtpsa (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.76) (envelope-from ) id 1ad44D-0000bs-BY; Mon, 07 Mar 2016 22:55:57 +0000 Received: from kamal by fourier with local (Exim 4.86) (envelope-from ) id 1ad44A-0002BL-M4; Mon, 07 Mar 2016 14:55:54 -0800 From: Kamal Mostafa To: linux-kernel@vger.kernel.org, stable@vger.kernel.org, kernel-team@lists.ubuntu.com Subject: [PATCH 4.2.y-ckt 249/273] route: check and remove route cache when we get route Date: Mon, 7 Mar 2016 14:50:40 -0800 Message-Id: <1457391064-6660-250-git-send-email-kamal@canonical.com> X-Mailer: git-send-email 2.7.0 In-Reply-To: <1457391064-6660-1-git-send-email-kamal@canonical.com> References: <1457391064-6660-1-git-send-email-kamal@canonical.com> X-Extended-Stable: 4.2 Cc: Kamal Mostafa , Xin Long , "David S . Miller" X-BeenThere: kernel-team@lists.ubuntu.com X-Mailman-Version: 2.1.14 Precedence: list List-Id: Kernel team discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: kernel-team-bounces@lists.ubuntu.com Sender: kernel-team-bounces@lists.ubuntu.com 4.2.8-ckt5 -stable review patch. If anyone has any objections, please let me know. ---8<------------------------------------------------------------ From: Xin Long [ Upstream commit deed49df7390d5239024199e249190328f1651e7 ] Since the gc of ipv4 route was removed, the route cached would has no chance to be removed, and even it has been timeout, it still could be used, cause no code to check it's expires. Fix this issue by checking and removing route cache when we get route. Signed-off-by: Xin Long Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Kamal Mostafa --- include/net/ip_fib.h | 1 + net/ipv4/route.c | 77 ++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ff6d78f..468cc2a 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -59,6 +59,7 @@ struct fib_nh_exception { struct rtable __rcu *fnhe_rth_input; struct rtable __rcu *fnhe_rth_output; unsigned long fnhe_stamp; + struct rcu_head rcu; }; struct fnhe_hash_bucket { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e681b85..eca349c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -125,6 +125,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; +static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; /* * Interface to generic destination cache. */ @@ -751,7 +752,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, 0); + 0, jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1536,6 +1537,36 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash; + struct fib_nh_exception *fnhe, __rcu **fnhe_p; + u32 hval = fnhe_hashfun(daddr); + + spin_lock_bh(&fnhe_lock); + + hash = rcu_dereference_protected(nh->nh_exceptions, + lockdep_is_held(&fnhe_lock)); + hash += hval; + + fnhe_p = &hash->chain; + fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); + while (fnhe) { + if (fnhe->fnhe_daddr == daddr) { + rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( + fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + fnhe_flush_routes(fnhe); + kfree_rcu(fnhe, rcu); + break; + } + fnhe_p = &fnhe->fnhe_next; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, + lockdep_is_held(&fnhe_lock)); + } + + spin_unlock_bh(&fnhe_lock); +} + /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1590,11 +1621,20 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe) + if (fnhe) { rth = rcu_dereference(fnhe->fnhe_rth_input); - else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(&FIB_RES_NH(*res), daddr); + fnhe = NULL; + } else { + goto rt_cache; + } + } + + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); +rt_cache: if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1943,19 +1983,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct fib_nh *nh = &FIB_RES_NH(*res); fnhe = find_exception(nh, fl4->daddr); - if (fnhe) + if (fnhe) { prth = &fnhe->fnhe_rth_output; - else { - if (unlikely(fl4->flowi4_flags & - FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { - do_cache = false; - goto add; + rth = rcu_dereference(*prth); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(nh, fl4->daddr); + fnhe = NULL; + } else { + goto rt_cache; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } + + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); rth = rcu_dereference(*prth); + +rt_cache: if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; @@ -2503,7 +2553,6 @@ void ip_rt_multicast_event(struct in_device *in_dev) } #ifdef CONFIG_SYSCTL -static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8;