@@ -33,7 +33,7 @@
#define SIOCGETSGCNT (SIOCPROTOPRIVATE+1)
#define SIOCGETRPF (SIOCPROTOPRIVATE+2)
-#define MAXVIFS 32
+#define MAXVIFS 256
typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */
typedef unsigned short vifi_t;
#define ALL_VIFS ((vifi_t)(-1))
@@ -41,7 +41,7 @@ typedef unsigned short vifi_t;
/*
* Same idea as select
*/
-
+
#define VIFM_SET(n,m) ((m)|=(1<<(n)))
#define VIFM_CLR(n,m) ((m)&=~(1<<(n)))
#define VIFM_ISSET(n,m) ((m)&(1<<(n)))
@@ -53,7 +53,7 @@ typedef unsigned short vifi_t;
* Passed by mrouted for an MRT_ADD_VIF - again we use the
* mrouted 3.6 structures for compatibility
*/
-
+
struct vifctl {
vifi_t vifc_vifi; /* Index of VIF */
unsigned char vifc_flags; /* VIFF_ flags */
@@ -66,11 +66,12 @@ struct vifctl {
#define VIFF_TUNNEL 0x1 /* IPIP tunnel */
#define VIFF_SRCRT 0x2 /* NI */
#define VIFF_REGISTER 0x4 /* register vif */
+#define VIFF_NBMA 0x10
/*
* Cache manipulation structures for mrouted and PIMd
*/
-
+
struct mfcctl
{
struct in_addr mfcc_origin; /* Origin of mcast */
@@ -83,10 +84,10 @@ struct mfcctl
int mfcc_expire;
};
-/*
+/*
* Group count retrieval for mrouted
*/
-
+
struct sioc_sg_req
{
struct in_addr src;
@@ -113,7 +114,7 @@ struct sioc_vif_req
* This is the format the mroute daemon expects to see IGMP control
* data. Magically happens to be like an IP packet as per the original
*/
-
+
struct igmpmsg
{
__u32 unused1,unused2;
@@ -190,7 +191,7 @@ struct vif_device
#define VIFF_STATIC 0x8000
-struct mfc_cache
+struct mfc_cache
{
struct mfc_cache *next; /* Next entry on cache line */
__be32 mfc_mcastgrp; /* Group the entry belongs to */
@@ -224,7 +225,7 @@ struct mfc_cache
#define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1))
#else
#define MFC_HASH(a,b) ((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1))
-#endif
+#endif
#endif
@@ -116,6 +116,31 @@ static struct net_protocol pim_protocol;
static struct timer_list ipmr_expire_timer;
+static __be32 ipmr_get_skb_nbma(struct sk_buff *skb)
+{
+ union {
+ char addr[MAX_ADDR_LEN];
+ __be32 inaddr;
+ } u;
+
+ if (dev_parse_header(skb, u.addr) != 4)
+ return INADDR_ANY;
+
+ return u.inaddr;
+}
+
+static int ip_mr_match_vif_skb(struct vif_device *vif, struct sk_buff *skb)
+{
+ if (vif->dev != skb->dev)
+ return 0;
+
+ if (vif->flags & VIFF_NBMA)
+ return ipmr_get_skb_nbma(skb) == vif->remote;
+
+ return 1;
+}
+
+
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
@@ -468,6 +493,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
return err;
}
break;
+ case VIFF_NBMA:
case 0:
dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
if (!dev)
@@ -502,7 +528,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
v->pkt_in = 0;
v->pkt_out = 0;
v->link = dev->ifindex;
- if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+ if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER|VIFF_NBMA))
v->link = dev->iflink;
/* And finish update writing critical data */
@@ -1191,12 +1217,15 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
if (unlikely(opt->optlen))
ip_forward_options(skb);
- return dst_output(skb);
+ if (skb->dst != NULL)
+ return dst_output(skb);
+ else
+ return dev_queue_xmit(skb);
}
/*
@@ -1208,7 +1237,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &vif_table[vifi];
struct net_device *dev;
- struct rtable *rt;
+ struct net_device *fromdev = skb->dev;
+ struct rtable *rt = NULL;
int encap = 0;
if (vif->dev == NULL)
@@ -1236,6 +1266,19 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (ip_route_output_key(&init_net, &rt, &fl))
goto out_free;
encap = sizeof(struct iphdr);
+ dev = rt->u.dst.dev;
+ } else if (vif->flags&VIFF_NBMA) {
+ /* Fixme, we should take tunnel source address from the
+ * tunnel device binding if it exists */
+ struct flowi fl = { .oif = vif->link,
+ .nl_u = { .ip4_u =
+ { .daddr = vif->remote,
+ .tos = RT_TOS(iph->tos) } },
+ .proto = IPPROTO_GRE };
+ if (ip_route_output_key(&init_net, &rt, &fl))
+ goto out_free;
+ encap = LL_RESERVED_SPACE(rt->u.dst.dev);
+ dev = vif->dev;
} else {
struct flowi fl = { .oif = vif->link,
.nl_u = { .ip4_u =
@@ -1244,34 +1287,39 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
.proto = IPPROTO_IPIP };
if (ip_route_output_key(&init_net, &rt, &fl))
goto out_free;
+ dev = rt->u.dst.dev;
}
- dev = rt->u.dst.dev;
+ if (!(vif->flags & VIFF_NBMA)) {
+ if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+ /* Do not fragment multicasts. Alas, IPv4 does not
+ allow to send ICMP, so that packets will disappear
+ to blackhole.
+ */
- if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
- /* Do not fragment multicasts. Alas, IPv4 does not
- allow to send ICMP, so that packets will disappear
- to blackhole.
- */
-
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
- ip_rt_put(rt);
- goto out_free;
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ goto out_free_rt;
+ }
}
encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
- if (skb_cow(skb, encap)) {
- ip_rt_put(rt);
- goto out_free;
- }
+ if (skb_cow(skb, encap))
+ goto out_free_rt;
vif->pkt_out++;
vif->bytes_out += skb->len;
dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ if (vif->flags & VIFF_NBMA) {
+ ip_rt_put(rt);
+ skb->dst = NULL;
+ rt = NULL;
+ } else {
+ skb->dst = &rt->u.dst;
+ }
ip_decrease_ttl(ip_hdr(skb));
+ skb->dev = dev;
/* FIXME: forward and output firewalls used to be called here.
* What do we do with netfilter? -- RR */
@@ -1280,6 +1328,10 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
/* FIXME: extra output firewall step used to be here. --RR */
vif->dev->stats.tx_packets++;
vif->dev->stats.tx_bytes += skb->len;
+ } else if (vif->flags & VIFF_NBMA) {
+ if (dev_hard_header(skb, dev, ntohs(skb->protocol),
+ &vif->remote, NULL, 4) < 0)
+ goto out_free_rt;
}
IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1295,20 +1347,29 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
* not mrouter) cannot join to more than one interface - it will
* result in receiving multiple packets.
*/
- NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
+ NF_HOOK(PF_INET, NF_INET_FORWARD, skb, fromdev, dev,
ipmr_forward_finish);
return;
+out_free_rt:
+ if (rt != NULL)
+ ip_rt_put(rt);
out_free:
kfree_skb(skb);
return;
}
-static int ipmr_find_vif(struct net_device *dev)
+static int ipmr_find_vif(struct net_device *dev, __be32 nbma_origin)
{
int ct;
for (ct=maxvif-1; ct>=0; ct--) {
- if (vif_table[ct].dev == dev)
+ if (vif_table[ct].dev != dev)
+ continue;
+
+ if (vif_table[ct].flags & VIFF_NBMA) {
+ if (vif_table[ct].remote == nbma_origin)
+ break;
+ } else if (nbma_origin == INADDR_ANY)
break;
}
return ct;
@@ -1328,7 +1389,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (vif_table[vif].dev != skb->dev) {
+ if (!ip_mr_match_vif_skb(&vif_table[vif], skb)) {
int true_vifi;
if (skb->rtable->fl.iif == 0) {
@@ -1347,7 +1408,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
}
cache->mfc_un.res.wrong_if++;
- true_vifi = ipmr_find_vif(skb->dev);
+ true_vifi = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb));
if (true_vifi >= 0 && mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -1454,7 +1515,7 @@ int ip_mr_input(struct sk_buff *skb)
skb = skb2;
}
- vif = ipmr_find_vif(skb->dev);
+ vif = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb));
if (vif >= 0) {
int err = ipmr_cache_unresolved(vif, skb);
read_unlock(&mrt_lock);
@@ -1634,7 +1695,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
}
dev = skb->dev;
- if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
+ if (dev == NULL || (vif = ipmr_find_vif(dev, INADDR_ANY)) < 0) {
read_unlock(&mrt_lock);
return -ENODEV;
}