From patchwork Mon Jan 4 18:51:09 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Flavio Leitner X-Patchwork-Id: 42085 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 84F12B7BEE for ; Tue, 5 Jan 2010 05:51:38 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753843Ab0ADSvV (ORCPT ); Mon, 4 Jan 2010 13:51:21 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1753790Ab0ADSvS (ORCPT ); Mon, 4 Jan 2010 13:51:18 -0500 Received: from caiajhbdcbhh.dreamhost.com ([208.97.132.177]:55558 "EHLO homiemail-a21.g.dreamhost.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1753826Ab0ADSvO (ORCPT ); Mon, 4 Jan 2010 13:51:14 -0500 X-Greylist: delayed 26472 seconds by postgrey-1.27 at vger.kernel.org; Mon, 04 Jan 2010 13:51:14 EST Received: from localhost (unknown [189.101.84.146]) by homiemail-a21.g.dreamhost.com (Postfix) with ESMTPA id 7FD91300064; Mon, 4 Jan 2010 10:51:12 -0800 (PST) Date: Mon, 4 Jan 2010 16:51:09 -0200 From: Flavio Leitner To: Eric Dumazet Cc: David Miller , netdev@vger.kernel.org Subject: Re: [PATCH] igmp: fix ip_mc_sf_allow race Message-ID: <20100104185109.GA2706@sysclose.org> References: <1262183005-28406-1-git-send-email-fleitner@redhat.com> <20100103.215441.43026709.davem@davemloft.net> <20100104112957.GA2573@sysclose.org> <4B41E7F7.2030003@gmail.com> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <4B41E7F7.2030003@gmail.com> User-Agent: Mutt/1.5.20 (2009-08-17) Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org On Mon, Jan 04, 2010 at 02:07:03PM +0100, Eric Dumazet wrote: > Le 04/01/2010 12:29, Flavio Leitner a écrit : > > > Then, I tried using call_rcu() to avoid the problem you are saying, > > but when you stop the reproducer, sk_free() will warn printing > > "optmem leakage.." because the rcu callback didn't run yet. > > > > > > This is probably because your call_rcu() callback was trying to call sock_kfree_s() ? yes, correct. > > rtnl_unlock(); > call_rcu(&iml->lock, callback_func) > > callback_func() > { > sock_kfree_s(sk, iml, sizeof(*iml)); > } > > > > Take a look at sock_kfree_s() definition : > > void sock_kfree_s(struct sock *sk, void *mem, int size) > { > kfree(mem); > atomic_sub(size, &sk->sk_omem_alloc); > } > > > You can certainly try : > > rtnl_unlock(); > atomic_sub(sizeof(*iml), sk->sk_omem_alloc); > call_rcu(&iml->rcu, kfree); > > (immediate sk_omem_alloc handling, but deferred kfree()) Ok, below is the new version using call_rcu(). I'm still running my tests here, so I'm planning to resubmit it later if this version is okay with you. thanks! Acked-by: David L Stevens diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 724c27e..9cccd16 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -170,6 +170,7 @@ struct ip_mc_socklist { struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist *sflist; + struct rcu_head rcu; }; struct ip_sf_list { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 76c0840..ed154db 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1799,7 +1799,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) iml->next = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - inet->mc_list = iml; + rcu_assign_pointer(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done: @@ -1825,6 +1825,17 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, return err; } + +void ip_mc_socklist_reclaim(struct rcu_head *rp) +{ + struct ip_mc_socklist *iml; + + iml = container_of(rp, struct ip_mc_socklist, rcu); + /* sk_omem_alloc should have been decreased by the caller*/ + kfree(iml); +} + + /* * Ask a socket to leave a group. */ @@ -1854,12 +1865,15 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - *imlp = iml->next; + rcu_assign_pointer(*imlp, iml->next); if (in_dev) ip_mc_dec_group(in_dev, group); + rtnl_unlock(); - sock_kfree_s(sk, iml, sizeof(*iml)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + call_rcu(&iml->rcu, ip_mc_socklist_reclaim); return 0; } if (!in_dev) @@ -2209,30 +2223,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; + int ret; + ret = 1; if (!ipv4_is_multicast(loc_addr)) - return 1; + goto out; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; } + ret = inet->mc_all; if (!pmc) - return inet->mc_all; + goto unlock; psl = pmc->sflist; + ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) - return pmc->sfmode == MCAST_EXCLUDE; + goto unlock; for (i=0; isl_count; i++) { if (psl->sl_addr[i] == rmt_addr) break; } + ret = 0; if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) - return 0; + goto unlock; if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) - return 0; - return 1; + goto unlock; + ret = 1; +unlock: + rcu_read_unlock(); +out: + return ret; } /* @@ -2245,13 +2269,17 @@ void ip_mc_drop_socket(struct sock *sk) struct ip_mc_socklist *iml; struct net *net = sock_net(sk); - if (inet->mc_list == NULL) + rcu_read_lock(); + if (rcu_dereference(inet->mc_list) == NULL) { + rcu_read_unlock(); return; + } + rcu_read_unlock(); rtnl_lock(); - while ((iml = inet->mc_list) != NULL) { + while ((iml = rcu_dereference(inet->mc_list)) != NULL) { struct in_device *in_dev; - inet->mc_list = iml->next; + rcu_assign_pointer(inet->mc_list, iml->next); in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); @@ -2259,7 +2287,9 @@ void ip_mc_drop_socket(struct sock *sk) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); in_dev_put(in_dev); } - sock_kfree_s(sk, iml, sizeof(*iml)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + call_rcu(&iml->rcu, ip_mc_socklist_reclaim); } rtnl_unlock(); }