diff mbox

Multicast: Avoid useless duplication of multicast messages

Message ID alpine.DEB.1.10.0904141447310.30596@qirst.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Christoph Lameter (Ampere) April 14, 2009, 6:48 p.m. UTC
Neil: Could you test this?


Subject: Multicast: Avoid useless duplication of multicast messages

If two processes open the same port as a multicast socket and then
join two different multicast groups then traffic for both multicast groups
is forwarded to either process. This means that application will get surprising
data that they did not ask for. Applications will have to filter these out in
order to work correctly if multiple apps run on the same system.

These are pretty strange semantics but they have been around since the
beginning of multicast support on Unix systems.

Add an option

	igmp_mc_socket_based_filtering

that is off by default so that the default behavior stays as is.

If one wants to have sane multicast behavior for the above case
then this option can be set. Thereupon applications will not get
additional traffic forwarded to them if they happen to run on a host
where another application also receives multicast traffic from a
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Neil Horman April 14, 2009, 8:44 p.m. UTC | #1
On Tue, Apr 14, 2009 at 02:48:48PM -0400, Christoph Lameter wrote:
> Neil: Could you test this?
> 
I can and will, although it will take me a few days to get a system I can play
with it on.  I really don't think it needs much testing as it clearly provides
the functionality that you describe. 

That said, I still disagree with the addition of this switch, as its
superfolous.  Programatically an application can do what you want without this
change already .If you provide me with the test application that you've been working with, I can
demonstrate exactly how.

Regards
Neil

> 
> Subject: Multicast: Avoid useless duplication of multicast messages
> 
> If two processes open the same port as a multicast socket and then
> join two different multicast groups then traffic for both multicast groups
> is forwarded to either process. This means that application will get surprising
> data that they did not ask for. Applications will have to filter these out in
> order to work correctly if multiple apps run on the same system.
> 
> These are pretty strange semantics but they have been around since the
> beginning of multicast support on Unix systems.
> 
> Add an option
> 
> 	igmp_mc_socket_based_filtering
> 
> that is off by default so that the default behavior stays as is.
> 
> If one wants to have sane multicast behavior for the above case
> then this option can be set. Thereupon applications will not get
> additional traffic forwarded to them if they happen to run on a host
> where another application also receives multicast traffic from a
> different multicast group.
> 
> Signed-off-by: Christoph Lameter <cl@linux.com>
> 
> ---
>  Documentation/networking/ip-sysctl.txt |   10 ++++++++++
>  include/linux/igmp.h                   |    1 +
>  include/linux/sysctl.h                 |    1 +
>  net/ipv4/igmp.c                        |    6 +++---
>  net/ipv4/sysctl_net_ipv4.c             |    8 ++++++++
>  5 files changed, 23 insertions(+), 3 deletions(-)
> 
> Index: linux-2.6/net/ipv4/igmp.c
> ===================================================================
> --- linux-2.6.orig/net/ipv4/igmp.c	2009-04-14 13:03:14.000000000 -0500
> +++ linux-2.6/net/ipv4/igmp.c	2009-04-14 13:11:38.000000000 -0500
> @@ -1419,7 +1419,7 @@ static struct in_device *ip_mc_find_dev(
>   */
>  int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
>  int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
> -
> +int sysctl_igmp_mc_socket_based_filtering = 0;
> 
>  static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
>  	__be32 *psfsrc)
> @@ -2187,7 +2187,7 @@ int ip_mc_sf_allow(struct sock *sk, __be
>  	struct ip_sf_socklist *psl;
>  	int i;
> 
> -	if (!ipv4_is_multicast(loc_addr))
> +	if (ipv4_is_lbcast(loc_addr) || !ipv4_is_multicast(loc_addr))
>  		return 1;
> 
>  	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
> @@ -2196,7 +2196,7 @@ int ip_mc_sf_allow(struct sock *sk, __be
>  			break;
>  	}
>  	if (!pmc)
> -		return 1;
> +		return !sysctl_igmp_mc_socket_based_filtering;
>  	psl = pmc->sflist;
>  	if (!psl)
>  		return pmc->sfmode == MCAST_EXCLUDE;
> Index: linux-2.6/include/linux/igmp.h
> ===================================================================
> --- linux-2.6.orig/include/linux/igmp.h	2009-04-14 13:13:14.000000000 -0500
> +++ linux-2.6/include/linux/igmp.h	2009-04-14 13:41:14.000000000 -0500
> @@ -150,6 +150,7 @@ static inline struct igmpv3_query *
> 
>  extern int sysctl_igmp_max_memberships;
>  extern int sysctl_igmp_max_msf;
> +extern int sysctl_igmp_mc_socket_based_filtering;
> 
>  struct ip_sf_socklist
>  {
> Index: linux-2.6/include/linux/sysctl.h
> ===================================================================
> --- linux-2.6.orig/include/linux/sysctl.h	2009-04-14 13:15:57.000000000 -0500
> +++ linux-2.6/include/linux/sysctl.h	2009-04-14 13:16:49.000000000 -0500
> @@ -435,6 +435,7 @@ enum
>  	NET_TCP_ALLOWED_CONG_CONTROL=123,
>  	NET_TCP_MAX_SSTHRESH=124,
>  	NET_TCP_FRTO_RESPONSE=125,
> +	NET_IPV4_IGMP_MC_SOCKET_BASED_FILTERING=126,
>  };
> 
>  enum {
> Index: linux-2.6/net/ipv4/sysctl_net_ipv4.c
> ===================================================================
> --- linux-2.6.orig/net/ipv4/sysctl_net_ipv4.c	2009-04-14 13:13:53.000000000 -0500
> +++ linux-2.6/net/ipv4/sysctl_net_ipv4.c	2009-04-14 13:15:44.000000000 -0500
> @@ -408,6 +408,14 @@ static struct ctl_table ipv4_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec
>  	},
> +	{
> +		.ctl_name	= NET_IPV4_IGMP_MC_SOCKET_BASED_FILTERING,
> +		.procname	= "igmp_mc_socked_based_filtering",
> +		.data		= &sysctl_igmp_mc_socket_based_filtering,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec
> +	},
> 
>  #endif
>  	{
> Index: linux-2.6/Documentation/networking/ip-sysctl.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/networking/ip-sysctl.txt	2009-04-14 13:48:09.000000000 -0500
> +++ linux-2.6/Documentation/networking/ip-sysctl.txt	2009-04-14 13:53:10.000000000 -0500
> @@ -611,6 +611,16 @@ igmp_max_memberships - INTEGER
>  	Change the maximum number of multicast groups we can subscribe to.
>  	Default: 20
> 
> +igmp_mc_socket_based_filtering - INTEGER
> +	Use the list of subscribed multicast addresses to filter the traffic
> +	going to a multicast socket. If set to zero then multicast traffic
> +        is forwarded to any socket subscribed to a port number ignoring the
> +        list of multicast groups that a socket has been subscribed to. This mode
> +        is the default since it has been done that way in the past.
> +	If set to one then only multicast traffic of the multicast groups
> +        that a socket has joined are forwarded to the socket.
> +	Default: 0
> +
>  conf/interface/*  changes special settings per interface (where "interface" is
>  		  the name of your network interface)
>  conf/all/*	  is special, changes the settings for all interfaces
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter (Ampere) April 14, 2009, 9:45 p.m. UTC | #2
On Tue, 14 Apr 2009, Neil Horman wrote:

> That said, I still disagree with the addition of this switch, as its
> superfolous.  Programatically an application can do what you want without this
> change already .If you provide me with the test application that you've been working with, I can
> demonstrate exactly how.

Ok. First applications listens to multicast groups A and B. Second one
uses C and D. All use a port 4711 for communication which is the
standard for a certain application. Both applications are very latency
sensitive. You do not want the data to be duplicated into the user space
of both processes.

Both processes need to reply to messages on the
multicast groups. The receiver always needs the source IP address for
authentication otherwise messages are ignored

How are you going to do this? Its trivial to do with this patch and one
socket in each process listening to port 4711 that subscribes to the
necessary multicast groups. Plus one the problem is that some of the
applications here are only available in binary form and they were naively
coded assuming that the socket layer would not need exotic tricks to
convince it to do the right thing.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Neil Horman April 15, 2009, 11:07 a.m. UTC | #3
On Tue, Apr 14, 2009 at 05:45:58PM -0400, Christoph Lameter wrote:
> On Tue, 14 Apr 2009, Neil Horman wrote:
> 
> > That said, I still disagree with the addition of this switch, as its
> > superfolous.  Programatically an application can do what you want without this
> > change already .If you provide me with the test application that you've been working with, I can
> > demonstrate exactly how.
> 
> Ok. First applications listens to multicast groups A and B. Second one
> uses C and D. All use a port 4711 for communication which is the
> standard for a certain application. Both applications are very latency
> sensitive. You do not want the data to be duplicated into the user space
> of both processes.
> 
> Both processes need to reply to messages on the
> multicast groups. The receiver always needs the source IP address for
> authentication otherwise messages are ignored
> 
> How are you going to do this? 
I'm going to write each application to use 2 sockets, one bound to each
multicast group.  Thats the way it works now.  I think you missed the obvious in
your construction of this example.

> Its trivial to do with this patch and one
> socket in each process listening to port 4711 that subscribes to the
> necessary multicast groups. 
Its trivial without the patch as well.

> Plus one the problem is that some of the
> applications here are only available in binary form and they were naively
> coded assuming that the socket layer would not need exotic tricks to
> convince it to do the right thing.
What?  If you have binary only software thats written for Linux and it
malfunctions,  This has been the accepted and standard multicast behavior on
Linux and various other unicies for decades.  What you are describing is a
vendor bug in your application, something you need to either contact the vendor
about, or find a new vendor.  They made an application based on an assumption,
and their assumption was wrong, they need to fix it.  We could do them a favor,
I suppose, but I don't see why.

It boils down to this:  This is the way multicast subscriptions have worked in
bsd, linux, and presumably various other unix and non-unix operating systems for
lord only knows how long.  Provide some documentation that shows its in
violation of a newer standard, or that it is common practice to behave
differently on another OS (such that including this directive would make porting
easier).  As it stands currently, this patch only serves to create a crutch to
perpetuate misundersandings about how the behavior currently works.

Neil
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter (Ampere) April 15, 2009, 12:51 p.m. UTC | #4
On Wed, 15 Apr 2009, Neil Horman wrote:

> > How are you going to do this?
> I'm going to write each application to use 2 sockets, one bound to each
> multicast group.  Thats the way it works now.  I think you missed the obvious in
> your construction of this example.

Ok it could be done with binding. But you would need 3 sockets. One per MC
groups bound to a MC group each and then one for the replies (hmmm...
looks like you could use SO_BINDTODEVICE on one socket to get around the
third one --there is even an exception case for this in inet_bind causing
more weird semantics-- but then the application needs to know the device
name of the NIC, argh)

> > Its trivial to do with this patch and one
> > socket in each process listening to port 4711 that subscribes to the
> > necessary multicast groups.
> Its trivial without the patch as well.

I do not see how you can justify making such a statement.

> It boils down to this:  This is the way multicast subscriptions have worked in
> bsd, linux, and presumably various other unix and non-unix operating systems for
> lord only knows how long.  Provide some documentation that shows its in
> violation of a newer standard, or that it is common practice to behave
> differently on another OS (such that including this directive would make porting
> easier).  As it stands currently, this patch only serves to create a crutch to
> perpetuate misundersandings about how the behavior currently works.

The way things work is counterintuitive and leads to weird code constructs
with the application having to manage multiple sockets because weird
semantics have developed over the years.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Neil Horman April 15, 2009, 2:22 p.m. UTC | #5
On Wed, Apr 15, 2009 at 08:51:35AM -0400, Christoph Lameter wrote:
> On Wed, 15 Apr 2009, Neil Horman wrote:
> 
> > > How are you going to do this?
> > I'm going to write each application to use 2 sockets, one bound to each
> > multicast group.  Thats the way it works now.  I think you missed the obvious in
> > your construction of this example.
> 
> Ok it could be done with binding. But you would need 3 sockets. One per MC
> groups bound to a MC group each and then one for the replies (hmmm...
> looks like you could use SO_BINDTODEVICE on one socket to get around the
Depending on your setup, 2 is perfectly sufficient.  In fact 1 can be sufficient
if you want to filter in your application, but we've been over that.

> third one --there is even an exception case for this in inet_bind causing
> more weird semantics-- but then the application needs to know the device
> name of the NIC, argh)
Of course it does, but thats zero incremental cost, since you need to know the
device name anyway, when specifying the ifindex to the join request.

> > > Its trivial to do with this patch and one
> > > socket in each process listening to port 4711 that subscribes to the
> > > necessary multicast groups.
> > Its trivial without the patch as well.
> 
> I do not see how you can justify making such a statement.
> 
I find it justified because I don't see an application using 2 or 3 sockets and a poll or
select call as anything more than trivial.  If you find that to be non-trivial,
perhaps a refresher programming course might be in order for you?

> > It boils down to this:  This is the way multicast subscriptions have worked in
> > bsd, linux, and presumably various other unix and non-unix operating systems for
> > lord only knows how long.  Provide some documentation that shows its in
> > violation of a newer standard, or that it is common practice to behave
> > differently on another OS (such that including this directive would make porting
> > easier).  As it stands currently, this patch only serves to create a crutch to
> > perpetuate misundersandings about how the behavior currently works.
> 
> The way things work is counterintuitive and leads to weird code constructs
> with the application having to manage multiple sockets because weird
> semantics have developed over the years.

The way things work is counterintuitive to _you_ (is it was to me a few months
ago).  That asside, I came to understand how this actually works, how it has
worked for decades, and how programmers have successfully written applications
that use this model over that time period.  Can we modify the model?  Sure.
Should we?  I certainly don't see any need, given that it does little except
change the model.  For those who understand it, its compltely useless.  I'm
willing to concede that I'm wrong, but not without some modicum of evidence that
this change will benefit existing applications.  If some other operating system
adheres to the model you expect it to, perhaps this has legs, but I don't know
of any that do.  The current model, even if counter intuitive, is well defined,
well understood, and documented.  I fail to see how adding an alternate,
undocumented model (that may itself be counterintuitive to all the developers
who have developed under the current model) adds anything significant.

Neil

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 15, 2009, 2:41 p.m. UTC | #6
Neil Horman wrote:
> On Wed, Apr 15, 2009 at 08:51:35AM -0400, Christoph Lameter wrote:
>> On Wed, 15 Apr 2009, Neil Horman wrote:
>>
>>>> How are you going to do this?
>>> I'm going to write each application to use 2 sockets, one bound to each
>>> multicast group.  Thats the way it works now.  I think you missed the obvious in
>>> your construction of this example.
>> Ok it could be done with binding. But you would need 3 sockets. One per MC
>> groups bound to a MC group each and then one for the replies (hmmm...
>> looks like you could use SO_BINDTODEVICE on one socket to get around the
> Depending on your setup, 2 is perfectly sufficient.  In fact 1 can be sufficient
> if you want to filter in your application, but we've been over that.
> 
>> third one --there is even an exception case for this in inet_bind causing
>> more weird semantics-- but then the application needs to know the device
>> name of the NIC, argh)
> Of course it does, but thats zero incremental cost, since you need to know the
> device name anyway, when specifying the ifindex to the join request.
> 
>>>> Its trivial to do with this patch and one
>>>> socket in each process listening to port 4711 that subscribes to the
>>>> necessary multicast groups.
>>> Its trivial without the patch as well.
>> I do not see how you can justify making such a statement.
>>
> I find it justified because I don't see an application using 2 or 3 sockets and a poll or
> select call as anything more than trivial.  If you find that to be non-trivial,
> perhaps a refresher programming course might be in order for you?
> 
>>> It boils down to this:  This is the way multicast subscriptions have worked in
>>> bsd, linux, and presumably various other unix and non-unix operating systems for
>>> lord only knows how long.  Provide some documentation that shows its in
>>> violation of a newer standard, or that it is common practice to behave
>>> differently on another OS (such that including this directive would make porting
>>> easier).  As it stands currently, this patch only serves to create a crutch to
>>> perpetuate misundersandings about how the behavior currently works.
>> The way things work is counterintuitive and leads to weird code constructs
>> with the application having to manage multiple sockets because weird
>> semantics have developed over the years.
> 
> The way things work is counterintuitive to _you_ (is it was to me a few months
> ago).  That asside, I came to understand how this actually works, how it has
> worked for decades, and how programmers have successfully written applications
> that use this model over that time period.  Can we modify the model?  Sure.
> Should we?  I certainly don't see any need, given that it does little except
> change the model.  For those who understand it, its compltely useless.  I'm
> willing to concede that I'm wrong, but not without some modicum of evidence that
> this change will benefit existing applications.  If some other operating system
> adheres to the model you expect it to, perhaps this has legs, but I don't know
> of any that do.  The current model, even if counter intuitive, is well defined,
> well understood, and documented.  I fail to see how adding an alternate,
> undocumented model (that may itself be counterintuitive to all the developers
> who have developed under the current model) adds anything significant.
> 
> Neil

Hi Neil

This has been somewhat bugging me for a while, so I went digging.

Here is a rather pertinent text that points out that we "might" have a bug.
RFC 4607:

4.2.  Requirements on the Host IP Module

   An incoming datagram destined to an SSM address MUST be delivered by
   the IP module to all sockets that have indicated (via Subscribe) a
   desire to receive data that matches the datagram's source address,
   destination address, and arriving interface.  It MUST NOT be
   delivered to other sockets.


Additionally, RFC 3678 describes IP_ADD_MEMBERSHIP as an 'any-source group'
and is allowed by the SSM spec.  This is also how it is implemented in the kernel.
However, we do not appear to perform the filtering required by the above quoted
section 4.2.  In particular, if we fail to match the 'datagram's destination address',
we deliver the packet, which I believe is in violation of the "MUST NOT" above.

I've CC'd Dave Stevens, since I'd like to hear his opinion regarding this text.

Thanks
-vlad
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Neil Horman April 15, 2009, 3:57 p.m. UTC | #7
On Wed, Apr 15, 2009 at 10:41:03AM -0400, Vlad Yasevich wrote:
> Neil Horman wrote:
> > On Wed, Apr 15, 2009 at 08:51:35AM -0400, Christoph Lameter wrote:
> >> On Wed, 15 Apr 2009, Neil Horman wrote:
> >>
> >>>> How are you going to do this?
> >>> I'm going to write each application to use 2 sockets, one bound to each
> >>> multicast group.  Thats the way it works now.  I think you missed the obvious in
> >>> your construction of this example.
> >> Ok it could be done with binding. But you would need 3 sockets. One per MC
> >> groups bound to a MC group each and then one for the replies (hmmm...
> >> looks like you could use SO_BINDTODEVICE on one socket to get around the
> > Depending on your setup, 2 is perfectly sufficient.  In fact 1 can be sufficient
> > if you want to filter in your application, but we've been over that.
> > 
> >> third one --there is even an exception case for this in inet_bind causing
> >> more weird semantics-- but then the application needs to know the device
> >> name of the NIC, argh)
> > Of course it does, but thats zero incremental cost, since you need to know the
> > device name anyway, when specifying the ifindex to the join request.
> > 
> >>>> Its trivial to do with this patch and one
> >>>> socket in each process listening to port 4711 that subscribes to the
> >>>> necessary multicast groups.
> >>> Its trivial without the patch as well.
> >> I do not see how you can justify making such a statement.
> >>
> > I find it justified because I don't see an application using 2 or 3 sockets and a poll or
> > select call as anything more than trivial.  If you find that to be non-trivial,
> > perhaps a refresher programming course might be in order for you?
> > 
> >>> It boils down to this:  This is the way multicast subscriptions have worked in
> >>> bsd, linux, and presumably various other unix and non-unix operating systems for
> >>> lord only knows how long.  Provide some documentation that shows its in
> >>> violation of a newer standard, or that it is common practice to behave
> >>> differently on another OS (such that including this directive would make porting
> >>> easier).  As it stands currently, this patch only serves to create a crutch to
> >>> perpetuate misundersandings about how the behavior currently works.
> >> The way things work is counterintuitive and leads to weird code constructs
> >> with the application having to manage multiple sockets because weird
> >> semantics have developed over the years.
> > 
> > The way things work is counterintuitive to _you_ (is it was to me a few months
> > ago).  That asside, I came to understand how this actually works, how it has
> > worked for decades, and how programmers have successfully written applications
> > that use this model over that time period.  Can we modify the model?  Sure.
> > Should we?  I certainly don't see any need, given that it does little except
> > change the model.  For those who understand it, its compltely useless.  I'm
> > willing to concede that I'm wrong, but not without some modicum of evidence that
> > this change will benefit existing applications.  If some other operating system
> > adheres to the model you expect it to, perhaps this has legs, but I don't know
> > of any that do.  The current model, even if counter intuitive, is well defined,
> > well understood, and documented.  I fail to see how adding an alternate,
> > undocumented model (that may itself be counterintuitive to all the developers
> > who have developed under the current model) adds anything significant.
> > 
> > Neil
> 
> Hi Neil
> 
> This has been somewhat bugging me for a while, so I went digging.
> 
> Here is a rather pertinent text that points out that we "might" have a bug.
> RFC 4607:
> 
> 4.2.  Requirements on the Host IP Module
> 
>    An incoming datagram destined to an SSM address MUST be delivered by
>    the IP module to all sockets that have indicated (via Subscribe) a
>    desire to receive data that matches the datagram's source address,
>    destination address, and arriving interface.  It MUST NOT be
>    delivered to other sockets.
> 
I'll let David respond more fully, since I'm not familiar with this RFC, but a
quick read would suggest that (from the abstract), this only applies to a subset
of addresses, which are not being used in the application in question here.
From what I read, the RFC defines an extenstion to the sockets api which allows
you to subscribe to a multicast group from a specific source, using one of the
reserved muticast ranges provided in the abstract.  It appears that we support
this RFC via the IP_ADD_SOURCE_MEMBERSHIP socket option.  Now, if we allow
sockets that issue IP_ADD_SOURCE_MEMBERSHIP calls to receive datagrams from
multicast addresses within the range defined by the rfc from other sources that
they have not subscribed to, yes we have a bug, but thats not overly relevant I
think to Christophs problem, since he's using the any-source model, and its
corresponding addresses.  Switching to the specific-source model would solve his
immeidate problem here that we've been debating, but would likely introduce a
new set, in that he would then have to write his app to subscribe to the myrriad
of sources that are sending to that multicast group.

> 
> Additionally, RFC 3678 describes IP_ADD_MEMBERSHIP as an 'any-source group'
> and is allowed by the SSM spec.  This is also how it is implemented in the kernel.
> However, we do not appear to perform the filtering required by the above quoted
> section 4.2.  
Very true, so we may have a bug in the SSM model, but again, thats not what
Christoph is using, its the any-source model, using group address unrelated to
the ssm RFC.

In particular, if we fail to match the 'datagram's destination address',
> we deliver the packet, which I believe is in violation of the "MUST NOT" above.
> 
I think only if the SSM model is used via the socket extensions the RFC
describes.  If Christophs app is subscribing via IP_ADD_SOURCE_MEMBERSHIP, then
yes, we have a problem.  But everything I've read says he uses the standard, any-source
IP_ADD_MEMBERSHIP option which I think makes assertions from RFC 4607 void.
Christoph, are you using IP_ADD_SOURCE_MEMBERSHIP?

Neil

> I've CC'd Dave Stevens, since I'd like to hear his opinion regarding this text.
> 
> Thanks
> -vlad
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 15, 2009, 4:07 p.m. UTC | #8
Neil Horman wrote:
> On Wed, Apr 15, 2009 at 10:41:03AM -0400, Vlad Yasevich wrote:
>> Neil Horman wrote:
>>> On Wed, Apr 15, 2009 at 08:51:35AM -0400, Christoph Lameter wrote:
>>>> On Wed, 15 Apr 2009, Neil Horman wrote:
>>>>
>>>>>> How are you going to do this?
>>>>> I'm going to write each application to use 2 sockets, one bound to each
>>>>> multicast group.  Thats the way it works now.  I think you missed the obvious in
>>>>> your construction of this example.
>>>> Ok it could be done with binding. But you would need 3 sockets. One per MC
>>>> groups bound to a MC group each and then one for the replies (hmmm...
>>>> looks like you could use SO_BINDTODEVICE on one socket to get around the
>>> Depending on your setup, 2 is perfectly sufficient.  In fact 1 can be sufficient
>>> if you want to filter in your application, but we've been over that.
>>>
>>>> third one --there is even an exception case for this in inet_bind causing
>>>> more weird semantics-- but then the application needs to know the device
>>>> name of the NIC, argh)
>>> Of course it does, but thats zero incremental cost, since you need to know the
>>> device name anyway, when specifying the ifindex to the join request.
>>>
>>>>>> Its trivial to do with this patch and one
>>>>>> socket in each process listening to port 4711 that subscribes to the
>>>>>> necessary multicast groups.
>>>>> Its trivial without the patch as well.
>>>> I do not see how you can justify making such a statement.
>>>>
>>> I find it justified because I don't see an application using 2 or 3 sockets and a poll or
>>> select call as anything more than trivial.  If you find that to be non-trivial,
>>> perhaps a refresher programming course might be in order for you?
>>>
>>>>> It boils down to this:  This is the way multicast subscriptions have worked in
>>>>> bsd, linux, and presumably various other unix and non-unix operating systems for
>>>>> lord only knows how long.  Provide some documentation that shows its in
>>>>> violation of a newer standard, or that it is common practice to behave
>>>>> differently on another OS (such that including this directive would make porting
>>>>> easier).  As it stands currently, this patch only serves to create a crutch to
>>>>> perpetuate misundersandings about how the behavior currently works.
>>>> The way things work is counterintuitive and leads to weird code constructs
>>>> with the application having to manage multiple sockets because weird
>>>> semantics have developed over the years.
>>> The way things work is counterintuitive to _you_ (is it was to me a few months
>>> ago).  That asside, I came to understand how this actually works, how it has
>>> worked for decades, and how programmers have successfully written applications
>>> that use this model over that time period.  Can we modify the model?  Sure.
>>> Should we?  I certainly don't see any need, given that it does little except
>>> change the model.  For those who understand it, its compltely useless.  I'm
>>> willing to concede that I'm wrong, but not without some modicum of evidence that
>>> this change will benefit existing applications.  If some other operating system
>>> adheres to the model you expect it to, perhaps this has legs, but I don't know
>>> of any that do.  The current model, even if counter intuitive, is well defined,
>>> well understood, and documented.  I fail to see how adding an alternate,
>>> undocumented model (that may itself be counterintuitive to all the developers
>>> who have developed under the current model) adds anything significant.
>>>
>>> Neil
>> Hi Neil
>>
>> This has been somewhat bugging me for a while, so I went digging.
>>
>> Here is a rather pertinent text that points out that we "might" have a bug.
>> RFC 4607:
>>
>> 4.2.  Requirements on the Host IP Module
>>
>>    An incoming datagram destined to an SSM address MUST be delivered by
>>    the IP module to all sockets that have indicated (via Subscribe) a
>>    desire to receive data that matches the datagram's source address,
>>    destination address, and arriving interface.  It MUST NOT be
>>    delivered to other sockets.
>>
> I'll let David respond more fully, since I'm not familiar with this RFC, but a
> quick read would suggest that (from the abstract), this only applies to a subset
> of addresses, which are not being used in the application in question here.
> From what I read, the RFC defines an extenstion to the sockets api which allows
> you to subscribe to a multicast group from a specific source, using one of the
> reserved muticast ranges provided in the abstract.  It appears that we support
> this RFC via the IP_ADD_SOURCE_MEMBERSHIP socket option.  Now, if we allow
> sockets that issue IP_ADD_SOURCE_MEMBERSHIP calls to receive datagrams from
> multicast addresses within the range defined by the rfc from other sources that
> they have not subscribed to, yes we have a bug, but thats not overly relevant I
> think to Christophs problem, since he's using the any-source model, and its
> corresponding addresses.  Switching to the specific-source model would solve his
> immeidate problem here that we've been debating, but would likely introduce a
> new set, in that he would then have to write his app to subscribe to the myrriad
> of sources that are sending to that multicast group.

The problem is that if an application follows an IP_ADD_MEMBERSHIP call with
an IP_BLOCK_SOURCE call, thus extending the exclude the list, we would still
deliver packets that don't match the multicast destination.  That violates the
above SSM requirement.  It appears to be an API bug that allows for a violation
of the protocol specification.

> 
>> Additionally, RFC 3678 describes IP_ADD_MEMBERSHIP as an 'any-source group'
>> and is allowed by the SSM spec.  This is also how it is implemented in the kernel.
>> However, we do not appear to perform the filtering required by the above quoted
>> section 4.2.  
> Very true, so we may have a bug in the SSM model, but again, thats not what
> Christoph is using, its the any-source model, using group address unrelated to
> the ssm RFC.
> 

See above.  IP_ADD_MEMBERSHIP is also part of the ssm model since it can be
followed with IP_BLOCK_SOURCE.  They have to work together, but the socket matching
code is ignoring it if it can't find the multicast in the socket's list.

-vlad

> In particular, if we fail to match the 'datagram's destination address',
>> we deliver the packet, which I believe is in violation of the "MUST NOT" above.
>>
> I think only if the SSM model is used via the socket extensions the RFC
> describes.  If Christophs app is subscribing via IP_ADD_SOURCE_MEMBERSHIP, then
> yes, we have a problem.  But everything I've read says he uses the standard, any-source
> IP_ADD_MEMBERSHIP option which I think makes assertions from RFC 4607 void.
> Christoph, are you using IP_ADD_SOURCE_MEMBERSHIP?
> 
> Neil
> 
>> I've CC'd Dave Stevens, since I'd like to hear his opinion regarding this text.
>>
>> Thanks
>> -vlad
>>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Neil Horman April 15, 2009, 4:38 p.m. UTC | #9
On Wed, Apr 15, 2009 at 12:07:23PM -0400, Vlad Yasevich wrote:
> Neil Horman wrote:
> > On Wed, Apr 15, 2009 at 10:41:03AM -0400, Vlad Yasevich wrote:
> >> Neil Horman wrote:
> >>> On Wed, Apr 15, 2009 at 08:51:35AM -0400, Christoph Lameter wrote:
> >>>> On Wed, 15 Apr 2009, Neil Horman wrote:
> >>>>
> >>>>>> How are you going to do this?
> >>>>> I'm going to write each application to use 2 sockets, one bound to each
> >>>>> multicast group.  Thats the way it works now.  I think you missed the obvious in
> >>>>> your construction of this example.
> >>>> Ok it could be done with binding. But you would need 3 sockets. One per MC
> >>>> groups bound to a MC group each and then one for the replies (hmmm...
> >>>> looks like you could use SO_BINDTODEVICE on one socket to get around the
> >>> Depending on your setup, 2 is perfectly sufficient.  In fact 1 can be sufficient
> >>> if you want to filter in your application, but we've been over that.
> >>>
> >>>> third one --there is even an exception case for this in inet_bind causing
> >>>> more weird semantics-- but then the application needs to know the device
> >>>> name of the NIC, argh)
> >>> Of course it does, but thats zero incremental cost, since you need to know the
> >>> device name anyway, when specifying the ifindex to the join request.
> >>>
> >>>>>> Its trivial to do with this patch and one
> >>>>>> socket in each process listening to port 4711 that subscribes to the
> >>>>>> necessary multicast groups.
> >>>>> Its trivial without the patch as well.
> >>>> I do not see how you can justify making such a statement.
> >>>>
> >>> I find it justified because I don't see an application using 2 or 3 sockets and a poll or
> >>> select call as anything more than trivial.  If you find that to be non-trivial,
> >>> perhaps a refresher programming course might be in order for you?
> >>>
> >>>>> It boils down to this:  This is the way multicast subscriptions have worked in
> >>>>> bsd, linux, and presumably various other unix and non-unix operating systems for
> >>>>> lord only knows how long.  Provide some documentation that shows its in
> >>>>> violation of a newer standard, or that it is common practice to behave
> >>>>> differently on another OS (such that including this directive would make porting
> >>>>> easier).  As it stands currently, this patch only serves to create a crutch to
> >>>>> perpetuate misundersandings about how the behavior currently works.
> >>>> The way things work is counterintuitive and leads to weird code constructs
> >>>> with the application having to manage multiple sockets because weird
> >>>> semantics have developed over the years.
> >>> The way things work is counterintuitive to _you_ (is it was to me a few months
> >>> ago).  That asside, I came to understand how this actually works, how it has
> >>> worked for decades, and how programmers have successfully written applications
> >>> that use this model over that time period.  Can we modify the model?  Sure.
> >>> Should we?  I certainly don't see any need, given that it does little except
> >>> change the model.  For those who understand it, its compltely useless.  I'm
> >>> willing to concede that I'm wrong, but not without some modicum of evidence that
> >>> this change will benefit existing applications.  If some other operating system
> >>> adheres to the model you expect it to, perhaps this has legs, but I don't know
> >>> of any that do.  The current model, even if counter intuitive, is well defined,
> >>> well understood, and documented.  I fail to see how adding an alternate,
> >>> undocumented model (that may itself be counterintuitive to all the developers
> >>> who have developed under the current model) adds anything significant.
> >>>
> >>> Neil
> >> Hi Neil
> >>
> >> This has been somewhat bugging me for a while, so I went digging.
> >>
> >> Here is a rather pertinent text that points out that we "might" have a bug.
> >> RFC 4607:
> >>
> >> 4.2.  Requirements on the Host IP Module
> >>
> >>    An incoming datagram destined to an SSM address MUST be delivered by
> >>    the IP module to all sockets that have indicated (via Subscribe) a
> >>    desire to receive data that matches the datagram's source address,
> >>    destination address, and arriving interface.  It MUST NOT be
> >>    delivered to other sockets.
> >>
> > I'll let David respond more fully, since I'm not familiar with this RFC, but a
> > quick read would suggest that (from the abstract), this only applies to a subset
> > of addresses, which are not being used in the application in question here.
> > From what I read, the RFC defines an extenstion to the sockets api which allows
> > you to subscribe to a multicast group from a specific source, using one of the
> > reserved muticast ranges provided in the abstract.  It appears that we support
> > this RFC via the IP_ADD_SOURCE_MEMBERSHIP socket option.  Now, if we allow
> > sockets that issue IP_ADD_SOURCE_MEMBERSHIP calls to receive datagrams from
> > multicast addresses within the range defined by the rfc from other sources that
> > they have not subscribed to, yes we have a bug, but thats not overly relevant I
> > think to Christophs problem, since he's using the any-source model, and its
> > corresponding addresses.  Switching to the specific-source model would solve his
> > immeidate problem here that we've been debating, but would likely introduce a
> > new set, in that he would then have to write his app to subscribe to the myrriad
> > of sources that are sending to that multicast group.
> 
> The problem is that if an application follows an IP_ADD_MEMBERSHIP call with
> an IP_BLOCK_SOURCE call, thus extending the exclude the list, we would still
> deliver packets that don't match the multicast destination.  That violates the
> above SSM requirement.  It appears to be an API bug that allows for a violation
> of the protocol specification.
> 
Ok, but IP_BLOCK_SOURCE is part of the any-source api, not the SSM api (although
it allows it).  I agree what you've described is a bug, but its a bug against
the SSM RFC, not RFC 3678, which is what Christoph is using based on the
selection of his multicast address being outside the range of that defined by
SSM.

> > 
> >> Additionally, RFC 3678 describes IP_ADD_MEMBERSHIP as an 'any-source group'
> >> and is allowed by the SSM spec.  This is also how it is implemented in the kernel.
> >> However, we do not appear to perform the filtering required by the above quoted
> >> section 4.2.  
> > Very true, so we may have a bug in the SSM model, but again, thats not what
> > Christoph is using, its the any-source model, using group address unrelated to
> > the ssm RFC.
> > 
> 
> See above.  IP_ADD_MEMBERSHIP is also part of the ssm model since it can be
> followed with IP_BLOCK_SOURCE.  They have to work together, but the socket matching
> code is ignoring it if it can't find the multicast in the socket's list.
> 
Also see above, hes using multicast group 239.x.x.x.  SSM only encompases
addresses in the 232.0.0.1 to 232.255.255.255 range.  He's using ASM not SSM,
regardless of what SSM allows.  I agree it sounds like we have a bug in SSM
behavior, but its not overly relevant to this discussion (saving for the fact
that his new feature would inadvertantly fix the bug, in addition to altering
ASM behavior).  If we want to fix the SSM bug, thats great, lets fix it, but
lets not do it by introducing a new behavior to ASM.

And thats all moot anyway, becaues Christoph (unless I'm mistaken) is not, and
does not want to restrict source sending privlidges.  He wants to get data on
multicast groups from all/any source, he just doesn't want to get multicast data
from groups he didn't explicitly join in the app doing the receiving, which is
exactly what you can currently use bind for.

Neil

> -vlad
> 
> > In particular, if we fail to match the 'datagram's destination address',
> >> we deliver the packet, which I believe is in violation of the "MUST NOT" above.
> >>
> > I think only if the SSM model is used via the socket extensions the RFC
> > describes.  If Christophs app is subscribing via IP_ADD_SOURCE_MEMBERSHIP, then
> > yes, we have a problem.  But everything I've read says he uses the standard, any-source
> > IP_ADD_MEMBERSHIP option which I think makes assertions from RFC 4607 void.
> > Christoph, are you using IP_ADD_SOURCE_MEMBERSHIP?
> > 
> > Neil
> > 
> >> I've CC'd Dave Stevens, since I'd like to hear his opinion regarding this text.
> >>
> >> Thanks
> >> -vlad
> >>
> > --
> > To unsubscribe from this list: send the line "unsubscribe netdev" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 15, 2009, 5:19 p.m. UTC | #10
Neil Horman wrote:
find the multicast in the socket's list.
>>
> Also see above, hes using multicast group 239.x.x.x.  SSM only encompases
> addresses in the 232.0.0.1 to 232.255.255.255 range.  He's using ASM not SSM,
> regardless of what SSM allows.  I agree it sounds like we have a bug in SSM
> behavior, but its not overly relevant to this discussion (saving for the fact
> that his new feature would inadvertantly fix the bug, in addition to altering
> ASM behavior).  If we want to fix the SSM bug, thats great, lets fix it, but
> lets not do it by introducing a new behavior to ASM.

Sorry, but I don't buy it.  What we have is essentially "backward-brokeness".

Looking at BSD, which was the root of the original brokeness, they have it fixed.
The code will skip sockets that are not members of a particular group.  So, we
are trying really hard to stay bug-for-bug compatible with old implementations.

> 
> And thats all moot anyway, becaues Christoph (unless I'm mistaken) is not, and
> does not want to restrict source sending privlidges.  He wants to get data on
> multicast groups from all/any source, he just doesn't want to get multicast data
> from groups he didn't explicitly join in the app doing the receiving, which is
> exactly what you can currently use bind for.

Let's look at it the other way.  What is broken if we actually filter based on the
socket group membership?  The only applications that will be impacted are ones that
do not join groups themselves and expect to get multicast traffic.  Such applications
are broken to start with.

We already do group membership check for the socket. We simply incorrectly determine
that any socket that doesn't list a group.

What's worse is that if you have a socket that doesn't care about any mulicast
destinations (never did an ADD_MEMBERSHIP), it will still get multicast traffic if
it bound to that port.

We need to take into account the socket's multicast group list.

-vlad

> 
> Neil
> 
>> -vlad
>>
>>> In particular, if we fail to match the 'datagram's destination address',
>>>> we deliver the packet, which I believe is in violation of the "MUST NOT" above.
>>>>
>>> I think only if the SSM model is used via the socket extensions the RFC
>>> describes.  If Christophs app is subscribing via IP_ADD_SOURCE_MEMBERSHIP, then
>>> yes, we have a problem.  But everything I've read says he uses the standard, any-source
>>> IP_ADD_MEMBERSHIP option which I think makes assertions from RFC 4607 void.
>>> Christoph, are you using IP_ADD_SOURCE_MEMBERSHIP?
>>>
>>> Neil
>>>
>>>> I've CC'd Dave Stevens, since I'd like to hear his opinion regarding this text.
>>>>
>>>> Thanks
>>>> -vlad
>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Neil Horman April 15, 2009, 5:53 p.m. UTC | #11
On Wed, Apr 15, 2009 at 01:19:41PM -0400, Vlad Yasevich wrote:
> Neil Horman wrote:
> find the multicast in the socket's list.
> >>
> > Also see above, hes using multicast group 239.x.x.x.  SSM only encompases
> > addresses in the 232.0.0.1 to 232.255.255.255 range.  He's using ASM not SSM,
> > regardless of what SSM allows.  I agree it sounds like we have a bug in SSM
> > behavior, but its not overly relevant to this discussion (saving for the fact
> > that his new feature would inadvertantly fix the bug, in addition to altering
> > ASM behavior).  If we want to fix the SSM bug, thats great, lets fix it, but
> > lets not do it by introducing a new behavior to ASM.
> 
> Sorry, but I don't buy it.  What we have is essentially "backward-brokeness".
> 
> Looking at BSD, which was the root of the original brokeness, they have it fixed.
> The code will skip sockets that are not members of a particular group.  So, we
> are trying really hard to stay bug-for-bug compatible with old implementations.
> 

Despite your assertions, its not broken just because you call it such.  Its
working as its been documented.  If BSD has changed, I'll go look, and as I've
said several times in this thread, if this makes porting from other os'es
easier, than this has legs.  You're the first to have pointed one out, so thank
you.  Regardless however, that doesn't make the current behavior broken.

> > 
> > And thats all moot anyway, becaues Christoph (unless I'm mistaken) is not, and
> > does not want to restrict source sending privlidges.  He wants to get data on
> > multicast groups from all/any source, he just doesn't want to get multicast data
> > from groups he didn't explicitly join in the app doing the receiving, which is
> > exactly what you can currently use bind for.
> 
> Let's look at it the other way.  What is broken if we actually filter based on the
> socket group membership?  The only applications that will be impacted are ones that
> do not join groups themselves and expect to get multicast traffic.  Such applications
> are broken to start with.
> 
I can easily envision on application which expects to get multicast traffic that
doesn't join a group within the context of its own process, specifically relying
on the behavior as its documented today.  Consider a data processing
application whos group management is segmented into a different utility.  This
is really the problem here though isn't it?  A proposal to change the 20 year
old behavior of multicast reception with no way to know how strongly
applications rely on this behavior and no documentation to support the assertion
that the current behavior is broken.

> We already do group membership check for the socket. We simply incorrectly determine
> that any socket that doesn't list a group.
> 
> What's worse is that if you have a socket that doesn't care about any mulicast
> destinations (never did an ADD_MEMBERSHIP), it will still get multicast traffic if
> it bound to that port.
> 
You assume thats true, but you really have no way of knowing thats the case.  I
can imagine plenty of uses for applications that anonymously receive multicast
datagrams.


I'll refer you again to this exact conversation months ago, when I was on the
opposite end of this, and shown to be wrong:
http://kerneltrap.org/mailarchive/linux-netdev/2008/7/11/2430904

Neil
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter (Ampere) April 15, 2009, 7:17 p.m. UTC | #12
On Wed, 15 Apr 2009, Vlad Yasevich wrote:

> Looking at BSD, which was the root of the original brokeness, they have it fixed.
> The code will skip sockets that are not members of a particular group.  So, we
> are trying really hard to stay bug-for-bug compatible with old implementations.

Ahh interesting. David: Could you say something on this?

> What's worse is that if you have a socket that doesn't care about any mulicast
> destinations (never did an ADD_MEMBERSHIP), it will still get multicast traffic if
> it bound to that port.
>
> We need to take into account the socket's multicast group list.

Right. The fix is pretty simple too since the infrastructure has been
there since the IGMPv3 updates.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter (Ampere) April 15, 2009, 7:21 p.m. UTC | #13
On Wed, 15 Apr 2009, Neil Horman wrote:

> I can easily envision on application which expects to get multicast traffic that
> doesn't join a group within the context of its own process, specifically relying
> on the behavior as its documented today.  Consider a data processing
> application whos group management is segmented into a different utility.  This
> is really the problem here though isn't it?  A proposal to change the 20 year
> old behavior of multicast reception with no way to know how strongly
> applications rely on this behavior and no documentation to support the assertion
> that the current behavior is broken.

The "utility" must be a daemon that keeps the socket open. You are
talking about a sheperding process that first opens a socket and
then performs multicast groups. It then keeps the socket open
(otherwise would be unsubscribed) and starts other processes that then
open their own sockets and expect the subscriptions to work.

That does not look convincing. Can you cite a case of an
application actually depending on this behavior?

> I'll refer you again to this exact conversation months ago, when I was on the
> opposite end of this, and shown to be wrong:
> http://kerneltrap.org/mailarchive/linux-netdev/2008/7/11/2430904

Just you backing down does not mean that this is wrong. We have many
more factiods here now.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Neil Horman April 15, 2009, 7:43 p.m. UTC | #14
On Wed, Apr 15, 2009 at 03:21:27PM -0400, Christoph Lameter wrote:
> On Wed, 15 Apr 2009, Neil Horman wrote:
> 
> > I can easily envision on application which expects to get multicast traffic that
> > doesn't join a group within the context of its own process, specifically relying
> > on the behavior as its documented today.  Consider a data processing
> > application whos group management is segmented into a different utility.  This
> > is really the problem here though isn't it?  A proposal to change the 20 year
> > old behavior of multicast reception with no way to know how strongly
> > applications rely on this behavior and no documentation to support the assertion
> > that the current behavior is broken.
> 
> The "utility" must be a daemon that keeps the socket open. You are
> talking about a sheperding process that first opens a socket and
> then performs multicast groups. It then keeps the socket open
> (otherwise would be unsubscribed) and starts other processes that then
> open their own sockets and expect the subscriptions to work.
> 
> That does not look convincing. Can you cite a case of an
> application actually depending on this behavior?
> 
No, of course not, since I'm just hypothesizing.  Of course that doesn't mean
they don't exist.  And by that token you can't predict what will happen to
applications that do rely (either explicitly or inadvertently) on the current
behavior.

None of which _really_ matters, anyway, they're applications, they can be fixed
to work with either.  The question really is, do we need to, and I think the
answer is no

> > I'll refer you again to this exact conversation months ago, when I was on the
> > opposite end of this, and shown to be wrong:
> > http://kerneltrap.org/mailarchive/linux-netdev/2008/7/11/2430904
> 
> Just you backing down does not mean that this is wrong. We have many
> more factiods here now.

No, it doesn't mean this is wrong, but it does mean David convinced me what we
have now is right.  I'm obviously not going to be able to pass that on to you,
so I'm done.  Perhaps he will pick this up, I've said my peace.

Neil

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 15, 2009, 9:06 p.m. UTC | #15
Christoph Lameter wrote:
> On Wed, 15 Apr 2009, Vlad Yasevich wrote:
> 
>> Looking at BSD, which was the root of the original brokeness, they have it fixed.
>> The code will skip sockets that are not members of a particular group.  So, we
>> are trying really hard to stay bug-for-bug compatible with old implementations.
> 
> Ahh interesting. David: Could you say something on this?

Just digging around some more, it appears that OpenSolaris also filters out
non-joined groups at the socket:

http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/common/inet/ip/ip_multi.c#ilg_lookup_ill_withsrc

In that code, connp is essentially a socket and ilg is the membership list.
That function function called from conn_wantpacket(), which is in turn called
for every socket that matches the packet.

> 
>> What's worse is that if you have a socket that doesn't care about any mulicast
>> destinations (never did an ADD_MEMBERSHIP), it will still get multicast traffic if
>> it bound to that port.
>>
>> We need to take into account the socket's multicast group list.
> 
> Right. The fix is pretty simple too since the infrastructure has been
> there since the IGMPv3 updates.
> 

Right. since IGMPv3 introduced the concept of filtering.  It even states this
in RFC 3376:

     Filtering of packets based upon a socket's multicast reception
     state is a new feature of this service interface.  The previous
     service interface [RFC1112] described no filtering based upon
     multicast join state; rather, a join on a socket simply caused the
     host to join a group on the given interface, and packets destined
     for that group could be delivered to all sockets whether they had
     joined or not.


-vlad
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Stevens April 15, 2009, 9:42 p.m. UTC | #16
Vlad, Neil,
        [Sorry if I'm behind -- just saw this thread...]

> Here is a rather pertinent text that points out that we "might" have a 
bug.
> RFC 4607:
> 
> 4.2.  Requirements on the Host IP Module
> 
>    An incoming datagram destined to an SSM address MUST be delivered by
>    the IP module to all sockets that have indicated (via Subscribe) a
>    desire to receive data that matches the datagram's source address,
>    destination address, and arriving interface.  It MUST NOT be
>    delivered to other sockets.
> 
> 
> Additionally, RFC 3678 describes IP_ADD_MEMBERSHIP as an 'any-source 
group'
> and is allowed by the SSM spec.  This is also how it is implemented in 
the kernel.
> However, we do not appear to perform the filtering required by the above 
quoted
> section 4.2.  In particular, if we fail to match the 'datagram's 
destination address',
> we deliver the packet, which I believe is in violation of the "MUST NOT" 
above.

        "SSM" stands for "Source-Specific Multicast". "Any source" is 
explicitly
*not* source-specific. Nothing in RFC 4607 is intended to change legacy 
behavior
where there is no source filtering. If you do SSM on Linux, you will have 
the
group-join check.
        The legacy behavior you don't like was not a bug in BSD. It was 
intentional
and authored by Steve Deering, who is also the author of the original 
multicast
RFC. The intent was that multicast address socket behavior would be just 
like
unicast address behavior, with the exception that multicast addresses are
interface-specific. If you add a new unicast address to the machine, and
the binding on the socket doesn't restrict it, you will receive packets
from any of the addresses on the machine. That's what INADDR_ANY means. 
Your
multicast application may also receive unicast traffic on that port too.
        It does seem to cause a lot of confusion for people, though I'm 
not
sure why. *Any* multicast application can receive traffic not intended for
it because there is no exclusion on multicast senders. So, applications 
must
handle packets not for it, period. Unless you have an IANA-allocated 
multicast
address and port, someone else may use it for something else on the same
network, while still following the rules. And if you do have assigned 
addresses
and ports, someone may send you garbage, anyway.
        I'll read the rest of the thread and see if there's any more to 
respond
to... :-)

 +-DLS


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Stevens April 15, 2009, 10:16 p.m. UTC | #17
Actually, I think having an option for Solaris
compatibility might be a good idea, but I think
it should be per-socket.

I know of at least one application (a JVM) in
the wild that relies on the current behavior--
joins are done in one process for sockets in
another. It isn't necessarily obvious what
will break if you turn it off globally.

I tend to agree with Neil that it really shouldn't
be necessary, but there is no doubt it causes
great confusion to people.

Also, for the record, Linux doesn't support SSM
per RFC 4607. The filtering it requires applies
only to its address range and it explicitly states
the current Linux model as part of the reasoning
for it in the SSM address range only. That indicates
to me it is incorrect to filter all multicasts that
way, as Solaris does.

Doing something per-socket to express what you want
easily is fine with me and, as long as it defaults to
standard behavior, not a standards issue. Changing
all sockets on the machine from existing, correct
behavior I think is not appropriate.

                                                +-DLS

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller April 15, 2009, 11:45 p.m. UTC | #18
Please don't post references to OpenSolaris code as I've been advised
in the past that even just looking at the opensolaris tree might taint
us as Linux developers.

Thank you.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Yasevich April 16, 2009, 12:44 p.m. UTC | #19
David Miller wrote:
> Please don't post references to OpenSolaris code as I've been advised
> in the past that even just looking at the opensolaris tree might taint
> us as Linux developers.
> 
> Thank you.
> 


Sorry, didn't realize it.

-vlad
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter (Ampere) April 16, 2009, 2:45 p.m. UTC | #20
On Wed, 15 Apr 2009, David Stevens wrote:

> I know of at least one application (a JVM) in
> the wild that relies on the current behavior--
> joins are done in one process for sockets in
> another. It isn't necessarily obvious what

That is sick. Relying on operations on one socket affecting a
socket in another processs.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

different multicast group.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 Documentation/networking/ip-sysctl.txt |   10 ++++++++++
 include/linux/igmp.h                   |    1 +
 include/linux/sysctl.h                 |    1 +
 net/ipv4/igmp.c                        |    6 +++---
 net/ipv4/sysctl_net_ipv4.c             |    8 ++++++++
 5 files changed, 23 insertions(+), 3 deletions(-)

Index: linux-2.6/net/ipv4/igmp.c
===================================================================
--- linux-2.6.orig/net/ipv4/igmp.c	2009-04-14 13:03:14.000000000 -0500
+++ linux-2.6/net/ipv4/igmp.c	2009-04-14 13:11:38.000000000 -0500
@@ -1419,7 +1419,7 @@  static struct in_device *ip_mc_find_dev(
  */
 int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
 int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
-
+int sysctl_igmp_mc_socket_based_filtering = 0;

 static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 	__be32 *psfsrc)
@@ -2187,7 +2187,7 @@  int ip_mc_sf_allow(struct sock *sk, __be
 	struct ip_sf_socklist *psl;
 	int i;

-	if (!ipv4_is_multicast(loc_addr))
+	if (ipv4_is_lbcast(loc_addr) || !ipv4_is_multicast(loc_addr))
 		return 1;

 	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
@@ -2196,7 +2196,7 @@  int ip_mc_sf_allow(struct sock *sk, __be
 			break;
 	}
 	if (!pmc)
-		return 1;
+		return !sysctl_igmp_mc_socket_based_filtering;
 	psl = pmc->sflist;
 	if (!psl)
 		return pmc->sfmode == MCAST_EXCLUDE;
Index: linux-2.6/include/linux/igmp.h
===================================================================
--- linux-2.6.orig/include/linux/igmp.h	2009-04-14 13:13:14.000000000 -0500
+++ linux-2.6/include/linux/igmp.h	2009-04-14 13:41:14.000000000 -0500
@@ -150,6 +150,7 @@  static inline struct igmpv3_query *

 extern int sysctl_igmp_max_memberships;
 extern int sysctl_igmp_max_msf;
+extern int sysctl_igmp_mc_socket_based_filtering;

 struct ip_sf_socklist
 {
Index: linux-2.6/include/linux/sysctl.h
===================================================================
--- linux-2.6.orig/include/linux/sysctl.h	2009-04-14 13:15:57.000000000 -0500
+++ linux-2.6/include/linux/sysctl.h	2009-04-14 13:16:49.000000000 -0500
@@ -435,6 +435,7 @@  enum
 	NET_TCP_ALLOWED_CONG_CONTROL=123,
 	NET_TCP_MAX_SSTHRESH=124,
 	NET_TCP_FRTO_RESPONSE=125,
+	NET_IPV4_IGMP_MC_SOCKET_BASED_FILTERING=126,
 };

 enum {
Index: linux-2.6/net/ipv4/sysctl_net_ipv4.c
===================================================================
--- linux-2.6.orig/net/ipv4/sysctl_net_ipv4.c	2009-04-14 13:13:53.000000000 -0500
+++ linux-2.6/net/ipv4/sysctl_net_ipv4.c	2009-04-14 13:15:44.000000000 -0500
@@ -408,6 +408,14 @@  static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.ctl_name	= NET_IPV4_IGMP_MC_SOCKET_BASED_FILTERING,
+		.procname	= "igmp_mc_socked_based_filtering",
+		.data		= &sysctl_igmp_mc_socket_based_filtering,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},

 #endif
 	{
Index: linux-2.6/Documentation/networking/ip-sysctl.txt
===================================================================
--- linux-2.6.orig/Documentation/networking/ip-sysctl.txt	2009-04-14 13:48:09.000000000 -0500
+++ linux-2.6/Documentation/networking/ip-sysctl.txt	2009-04-14 13:53:10.000000000 -0500
@@ -611,6 +611,16 @@  igmp_max_memberships - INTEGER
 	Change the maximum number of multicast groups we can subscribe to.
 	Default: 20

+igmp_mc_socket_based_filtering - INTEGER
+	Use the list of subscribed multicast addresses to filter the traffic
+	going to a multicast socket. If set to zero then multicast traffic
+        is forwarded to any socket subscribed to a port number ignoring the
+        list of multicast groups that a socket has been subscribed to. This mode
+        is the default since it has been done that way in the past.
+	If set to one then only multicast traffic of the multicast groups
+        that a socket has joined are forwarded to the socket.
+	Default: 0
+
 conf/interface/*  changes special settings per interface (where "interface" is
 		  the name of your network interface)
 conf/all/*	  is special, changes the settings for all interfaces