From patchwork Tue Jun 23 14:06:09 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Timo Teras X-Patchwork-Id: 29058 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@bilbo.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from ozlabs.org (ozlabs.org [203.10.76.45]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client CN "mx.ozlabs.org", Issuer "CA Cert Signing Authority" (verified OK)) by bilbo.ozlabs.org (Postfix) with ESMTPS id A669AB70B1 for ; Wed, 24 Jun 2009 00:06:28 +1000 (EST) Received: by ozlabs.org (Postfix) id 8B905DDDF6; Wed, 24 Jun 2009 00:06:28 +1000 (EST) Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by ozlabs.org (Postfix) with ESMTP id D11D2DDD0C for ; Wed, 24 Jun 2009 00:06:27 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1759539AbZFWOGO (ORCPT ); Tue, 23 Jun 2009 10:06:14 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1759733AbZFWOGN (ORCPT ); Tue, 23 Jun 2009 10:06:13 -0400 Received: from ey-out-2122.google.com ([74.125.78.27]:5668 "EHLO ey-out-2122.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1759502AbZFWOGK (ORCPT ); Tue, 23 Jun 2009 10:06:10 -0400 Received: by ey-out-2122.google.com with SMTP id 9so10836eyd.37 for ; Tue, 23 Jun 2009 07:06:11 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:sender:message-id:date:from :user-agent:mime-version:to:subject:content-type; bh=J8N1bk7NGT7bzKXVoB1ItuJxFavNckeHmgFEMMPzQ4U=; b=ajdPu/eev4JJd0ZL9n9GWRRmGSinlSQiQh2HoCUfqg4Nb/pp39/QFOI22HZNEldlRK N1yBwRTPhsumbidEtVwQer07ecD0JvGUWZzJBKFRGJiMIGjoRNR7QU5JTrmJWMKkolW2 LLGOoZmi8297hveZ2yIxcjewTGMjlSbD23j6A= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=sender:message-id:date:from:user-agent:mime-version:to:subject :content-type; b=W3hsMqxfw+u4v6PXUyTFKw/T/Wb9dQ9oxKcnJm4tZG8iw152SuPCqqvKJPRCq3vxG7 wAd0j+DkxtpdABuTX9I6I1qxBhAg+e/Ts1Go4Sl5lf5r2nFjPFQyEagmS0hROZH4nD7p 09HIsFJ462if2n815IFVxURMR9PdnouY4gYg4= Received: by 10.210.116.16 with SMTP id o16mr6432836ebc.3.1245765971179; Tue, 23 Jun 2009 07:06:11 -0700 (PDT) Received: from ?10.252.5.10? (xdsl-83-150-94-239.nebulazone.fi [83.150.94.239]) by mx.google.com with ESMTPS id 7sm277439eyg.32.2009.06.23.07.06.09 (version=SSLv3 cipher=RC4-MD5); Tue, 23 Jun 2009 07:06:09 -0700 (PDT) Message-ID: <4A40E151.10007@iki.fi> Date: Tue, 23 Jun 2009 17:06:09 +0300 From: =?ISO-8859-1?Q?Timo_Ter=E4s?= User-Agent: Thunderbird 2.0.0.21 (X11/20090409) MIME-Version: 1.0 To: netdev@vger.kernel.org Subject: multicast for v2.6.29 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 8a45569..13500a3 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -33,7 +33,7 @@ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) -#define MAXVIFS 32 +#define MAXVIFS 256 typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short vifi_t; #define ALL_VIFS ((vifi_t)(-1)) @@ -41,7 +41,7 @@ typedef unsigned short vifi_t; /* * Same idea as select */ - + #define VIFM_SET(n,m) ((m)|=(1<<(n))) #define VIFM_CLR(n,m) ((m)&=~(1<<(n))) #define VIFM_ISSET(n,m) ((m)&(1<<(n))) @@ -53,7 +53,7 @@ typedef unsigned short vifi_t; * Passed by mrouted for an MRT_ADD_VIF - again we use the * mrouted 3.6 structures for compatibility */ - + struct vifctl { vifi_t vifc_vifi; /* Index of VIF */ unsigned char vifc_flags; /* VIFF_ flags */ @@ -66,11 +66,12 @@ struct vifctl { #define VIFF_TUNNEL 0x1 /* IPIP tunnel */ #define VIFF_SRCRT 0x2 /* NI */ #define VIFF_REGISTER 0x4 /* register vif */ +#define VIFF_NBMA 0x10 /* * Cache manipulation structures for mrouted and PIMd */ - + struct mfcctl { struct in_addr mfcc_origin; /* Origin of mcast */ @@ -83,10 +84,10 @@ struct mfcctl int mfcc_expire; }; -/* +/* * Group count retrieval for mrouted */ - + struct sioc_sg_req { struct in_addr src; @@ -113,7 +114,7 @@ struct sioc_vif_req * This is the format the mroute daemon expects to see IGMP control * data. Magically happens to be like an IP packet as per the original */ - + struct igmpmsg { __u32 unused1,unused2; @@ -190,7 +191,7 @@ struct vif_device #define VIFF_STATIC 0x8000 -struct mfc_cache +struct mfc_cache { struct mfc_cache *next; /* Next entry on cache line */ __be32 mfc_mcastgrp; /* Group the entry belongs to */ @@ -224,7 +225,7 @@ struct mfc_cache #define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1)) #else #define MFC_HASH(a,b) ((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1)) -#endif +#endif #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1466644..5adea03 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -116,6 +116,31 @@ static struct net_protocol pim_protocol; static struct timer_list ipmr_expire_timer; +static __be32 ipmr_get_skb_nbma(struct sk_buff *skb) +{ + union { + char addr[MAX_ADDR_LEN]; + __be32 inaddr; + } u; + + if (dev_parse_header(skb, u.addr) != 4) + return INADDR_ANY; + + return u.inaddr; +} + +static int ip_mr_match_vif_skb(struct vif_device *vif, struct sk_buff *skb) +{ + if (vif->dev != skb->dev) + return 0; + + if (vif->flags & VIFF_NBMA) + return ipmr_get_skb_nbma(skb) == vif->remote; + + return 1; +} + + /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) @@ -468,6 +493,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return err; } break; + case VIFF_NBMA: case 0: dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); if (!dev) @@ -502,7 +528,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) v->pkt_in = 0; v->pkt_out = 0; v->link = dev->ifindex; - if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) + if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER|VIFF_NBMA)) v->link = dev->iflink; /* And finish update writing critical data */ @@ -1191,12 +1217,15 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); - return dst_output(skb); + if (skb->dst != NULL) + return dst_output(skb); + else + return dev_queue_xmit(skb); } /* @@ -1208,7 +1237,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &vif_table[vifi]; struct net_device *dev; - struct rtable *rt; + struct net_device *fromdev = skb->dev; + struct rtable *rt = NULL; int encap = 0; if (vif->dev == NULL) @@ -1236,6 +1266,19 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (ip_route_output_key(&init_net, &rt, &fl)) goto out_free; encap = sizeof(struct iphdr); + dev = rt->u.dst.dev; + } else if (vif->flags&VIFF_NBMA) { + /* Fixme, we should take tunnel source address from the + * tunnel device binding if it exists */ + struct flowi fl = { .oif = vif->link, + .nl_u = { .ip4_u = + { .daddr = vif->remote, + .tos = RT_TOS(iph->tos) } }, + .proto = IPPROTO_GRE }; + if (ip_route_output_key(&init_net, &rt, &fl)) + goto out_free; + encap = LL_RESERVED_SPACE(rt->u.dst.dev); + dev = vif->dev; } else { struct flowi fl = { .oif = vif->link, .nl_u = { .ip4_u = @@ -1244,34 +1287,39 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) .proto = IPPROTO_IPIP }; if (ip_route_output_key(&init_net, &rt, &fl)) goto out_free; + dev = rt->u.dst.dev; } - dev = rt->u.dst.dev; + if (!(vif->flags & VIFF_NBMA)) { + if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { + /* Do not fragment multicasts. Alas, IPv4 does not + allow to send ICMP, so that packets will disappear + to blackhole. + */ - if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { - /* Do not fragment multicasts. Alas, IPv4 does not - allow to send ICMP, so that packets will disappear - to blackhole. - */ - - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); - ip_rt_put(rt); - goto out_free; + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + goto out_free_rt; + } } encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; - if (skb_cow(skb, encap)) { - ip_rt_put(rt); - goto out_free; - } + if (skb_cow(skb, encap)) + goto out_free_rt; vif->pkt_out++; vif->bytes_out += skb->len; dst_release(skb->dst); - skb->dst = &rt->u.dst; + if (vif->flags & VIFF_NBMA) { + ip_rt_put(rt); + skb->dst = NULL; + rt = NULL; + } else { + skb->dst = &rt->u.dst; + } ip_decrease_ttl(ip_hdr(skb)); + skb->dev = dev; /* FIXME: forward and output firewalls used to be called here. * What do we do with netfilter? -- RR */ @@ -1280,6 +1328,10 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) /* FIXME: extra output firewall step used to be here. --RR */ vif->dev->stats.tx_packets++; vif->dev->stats.tx_bytes += skb->len; + } else if (vif->flags & VIFF_NBMA) { + if (dev_hard_header(skb, dev, ntohs(skb->protocol), + &vif->remote, NULL, 4) < 0) + goto out_free_rt; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1295,20 +1347,29 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev, + NF_HOOK(PF_INET, NF_INET_FORWARD, skb, fromdev, dev, ipmr_forward_finish); return; +out_free_rt: + if (rt != NULL) + ip_rt_put(rt); out_free: kfree_skb(skb); return; } -static int ipmr_find_vif(struct net_device *dev) +static int ipmr_find_vif(struct net_device *dev, __be32 nbma_origin) { int ct; for (ct=maxvif-1; ct>=0; ct--) { - if (vif_table[ct].dev == dev) + if (vif_table[ct].dev != dev) + continue; + + if (vif_table[ct].flags & VIFF_NBMA) { + if (vif_table[ct].remote == nbma_origin) + break; + } else if (nbma_origin == INADDR_ANY) break; } return ct; @@ -1328,7 +1389,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (vif_table[vif].dev != skb->dev) { + if (!ip_mr_match_vif_skb(&vif_table[vif], skb)) { int true_vifi; if (skb->rtable->fl.iif == 0) { @@ -1347,7 +1408,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local } cache->mfc_un.res.wrong_if++; - true_vifi = ipmr_find_vif(skb->dev); + true_vifi = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb)); if (true_vifi >= 0 && mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -1454,7 +1515,7 @@ int ip_mr_input(struct sk_buff *skb) skb = skb2; } - vif = ipmr_find_vif(skb->dev); + vif = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb)); if (vif >= 0) { int err = ipmr_cache_unresolved(vif, skb); read_unlock(&mrt_lock); @@ -1634,7 +1695,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) } dev = skb->dev; - if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { + if (dev == NULL || (vif = ipmr_find_vif(dev, INADDR_ANY)) < 0) { read_unlock(&mrt_lock); return -ENODEV; }