Message ID | CAGsizz+x1GVNR9Pz=b7wO0ELx29=FMosfvhQWL5K26EpkwsUCg@mail.gmail.com |
---|---|
State | Rejected, archived |
Delegated to: | David Miller |
Headers | show |
Le lundi 16 janvier 2012 à 20:45 +0100, Štefan Gula a écrit : > From: Stefan Gula <steweg@gmail.com > > This patch is an extension for current Ethernet over GRE > implementation, which allows user to create virtual bridge (multipoint > VPN) and forward traffic based on Ethernet MAC address information in > it. It simulates the Bridge behavior learning mechanism, but instead > of learning port ID from which given MAC address comes, it learns IP > address of peer which encapsulated given packet. Multicast, Broadcast > and unknown-multicast traffic is send over network as multicast > encapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be > represented as one single virtual switch on logical level and be also > represented as one multicast IPv4 address on network level. > > Signed-off-by: Stefan Gula <steweg@gmail.com> > > --- > > code was merged with latest bridge code and should be easily comparable > Please make sure it applies properly on net-next tree. That is mandatory. > diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff > linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h > --- linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100 > +++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100 > @@ -27,6 +27,14 @@ struct ip_tunnel { > __u32 o_seqno; /* The last output seqno */ > int hlen; /* Precalculated GRE header length */ > int mlink; > +#ifdef CONFIG_NET_IPGRE_BRIDGE > +#define GRETAP_BR_HASH_BITS 8 > +#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS) > + struct hlist_head hash[GRETAP_BR_HASH_SIZE]; > + spinlock_t hash_lock; > + unsigned long ageing_time; > + struct timer_list gc_timer; > +#endif > > struct ip_tunnel_parm parms; > > diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff > linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig > --- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100 > +++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100 > @@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST > Network), but can be distributed all over the Internet. If you want > to do that, say Y here and to "IP multicast routing" below. > > +config NET_IPGRE_BRIDGE > + bool "IP: Ethernet over multipoint GRE over IP" > + depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST > + help > + Allows you to use multipoint GRE VPN as virtual switch and interconnect > + several L2 endpoints over L3 routed infrastructure. It is useful for > + creating multipoint L2 VPNs which can be later used inside bridge > + interfaces If you want to use. GRE multipoint L2 VPN feature say Y. > + > config IP_MROUTE > bool "IP: multicast routing" > depends on IP_MULTICAST > diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff > linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c > --- linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100 > +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 > @@ -52,6 +52,11 @@ > #include <net/ip6_route.h> > #endif > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > +#include <linux/jhash.h> > +#include <asm/unaligned.h> > +#endif > + > /* > Problems & solutions > -------------------- > @@ -134,6 +139,191 @@ struct ipgre_net { > struct net_device *fb_tunnel_dev; > }; > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + /* > + * This part of code includes codes to enable L2 ethernet > + * switch virtualization over IP routed infrastructure with > + * utilization of multicast capable endpoint using Ethernet > + * over GRE > + * > + * Author: Stefan Gula > + * Signed-off-by: Stefan Gula <steweg@gmail.com> > + */ > +struct mac_addr { > + unsigned char addr[6]; Did I mention : ETH_ALEN ? > +}; > + > +struct ipgre_tap_bridge_entry { > + struct hlist_node hlist; > + u32 raddr; __be32 raddr ? > + struct mac_addr addr; > + struct net_device *dev; > + struct rcu_head rcu; > + unsigned long updated; > +}; > + > +static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly; > +static u32 ipgre_salt __read_mostly; > + > +int __net_init ipgre_tap_bridge_init(void) > +{ > + ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache", > + sizeof(struct ipgre_tap_bridge_entry), > + 0, > + SLAB_HWCACHE_ALIGN, NULL); > + if (!ipgre_tap_bridge_cache) > + return -ENOMEM; > + get_random_bytes(&ipgre_salt, sizeof(ipgre_salt)); > + return 0; > +} > + > +void ipgre_tap_bridge_fini(void) > +{ > + kmem_cache_destroy(ipgre_tap_bridge_cache); > +} > + > +static inline int ipgre_tap_bridge_hash(const unsigned char *mac) > +{ > + u32 key = get_unaligned((u32 *)(mac + 2)); > + return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1); > +} > + > +static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel, > + const struct ipgre_tap_bridge_entry *entry) > +{ > + return time_before_eq(entry->updated + tunnel->ageing_time, > + jiffies); > +} > + > +static void ipgre_tap_bridge_rcu_free(struct rcu_head *head) > +{ > + struct ipgre_tap_bridge_entry *entry > + = container_of(head, struct ipgre_tap_bridge_entry, rcu); > + kmem_cache_free(ipgre_tap_bridge_cache, entry); > +} > + > +static inline void ipgre_tap_bridge_delete(struct > ipgre_tap_bridge_entry *entry) > +{ > + hlist_del_rcu(&entry->hlist); > + call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free); > +} Did I mention : kfree_rcu() ? > + > + > + > +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find( > + struct hlist_head *head, > + const unsigned char *addr) > +{ > + struct hlist_node *h; > + struct ipgre_tap_bridge_entry *entry; > + hlist_for_each_entry(entry, h, head, hlist) { > + if (!compare_ether_addr(entry->addr.addr, addr)) > + return entry; > + } > + return NULL; > +} > + > + > +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find_rcu( > + struct hlist_head *head, > + const unsigned char *addr) > +{ > + struct hlist_node *h; > + struct ipgre_tap_bridge_entry *entry; > + hlist_for_each_entry_rcu(entry, h, head, hlist) { > + if (!compare_ether_addr(entry->addr.addr, addr)) > + return entry; > + } > + return NULL; > +} > + > +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create( > + struct hlist_head *head, > + u32 source, __be32 source, > + const unsigned char *addr, struct net_device *dev) > +{ > + struct ipgre_tap_bridge_entry *entry; > + entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC); > + if (entry) { > + memcpy(entry->addr.addr, addr, ETH_ALEN); > + hlist_add_head_rcu(&entry->hlist, head); Thats bogus. You must init all entry fields _before_ inserting entry in hash table. > + entry->raddr = source; > + entry->dev = dev; > + entry->updated = jiffies; > + } > + return entry; > +} > + > +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel, > + const unsigned char *addr) > +{ > + struct ipgre_tap_bridge_entry *entry; > + entry = __ipgre_tap_bridge_get(tunnel, addr); > + if (entry == NULL) > + return 0; > + else > + return entry->raddr; > +} > + > +#endif > /* Tunnel hash table */ > > /* > @@ -563,6 +753,13 @@ static int ipgre_rcv(struct sk_buff *skb > int offset = 4; > __be16 gre_proto; > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + u32 orig_source; Please run sparse on your code... (make C=2 ...) orig_source is not u32, but __be32 > + struct hlist_head *head; > + struct ipgre_tap_bridge_entry *entry; > + struct ethhdr *tethhdr; > +#endif > + > if (!pskb_may_pull(skb, 16)) > goto drop_nolock; > > @@ -659,10 +856,39 @@ static int ipgre_rcv(struct sk_buff *skb > tunnel->dev->stats.rx_errors++; > goto drop; > } > - > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + orig_source = iph->saddr; > +#endif > iph = ip_hdr(skb); > skb->protocol = eth_type_trans(skb, tunnel->dev); > skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + if (ipv4_is_multicast(tunnel->parms.iph.daddr)) { > + tethhdr = eth_hdr(skb); > + if (!is_multicast_ether_addr( > + tethhdr->h_source)) { > + head = &tunnel->hash[ > + ipgre_tap_bridge_hash( > + tethhdr->h_source)]; > + entry = ipgre_tap_bridge_find_rcu(head, > + tethhdr->h_source); > + if (likely(entry)) { > + entry->raddr = orig_source; > + entry->updated = jiffies; > + } else { > + spin_lock_bh(&tunnel->hash_lock); You dont need the _bh() variant here, since we run from softirq handler. > + if (!ipgre_tap_bridge_find(head, > + tethhdr->h_source)) > + ipgre_tap_bridge_create( > + head, > + orig_source, > + tethhdr->h_source, > + tunnel->dev); > + spin_unlock_bh(&tunnel->hash_lock); > + } > + } > + } > +#endif > } > > tstats = this_cpu_ptr(tunnel->dev->tstats); > @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str > tiph = &tunnel->parms.iph; > } > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + rcu_read_lock(); > + if ((dev->type == ARPHRD_ETHER) && > + ipv4_is_multicast(tunnel->parms.iph.daddr)) > + dst = ipgre_tap_bridge_get_raddr(tunnel, > + ((struct ethhdr *)skb->data)->h_dest); > + rcu_read_unlock(); this rcu_read_lock()/rcu_read_unlock() pair should be done in ipgre_tap_bridge_get_raddr() instead... so we dont hit this for all packets. > + if (dst == 0) > + dst = tiph->daddr; > + if (dst == 0) { > +#else > if ((dst = tiph->daddr) == 0) { > +#endif > /* NBMA tunnel */ > General comment : It would be nice this stuff is installed on a new "ip tunnel add" option... Hash table is 2048 bytes long... and an "ip tunnel " option would permit to size the hash table eventually, instead of fixed 256 slots. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
At the risk of repeating myself, Linux GRE support already has provisions for multipoint tunnels. And unlike your code, those reuse the existing neighbor table infrastructure, including all of its user interface and introspection capabilities. It's actually slightly visible in your patch: On Mon, Jan 16, 2012 at 08:45:14PM +0100, Štefan Gula wrote: > +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 > @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str [...] > /* NBMA tunnel */ > > if (skb_dst(skb) == NULL) { -David -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dňa 16. januára 2012 21:29, David Lamparter <equinox@diac24.net> napísal/a: > At the risk of repeating myself, Linux GRE support already has > provisions for multipoint tunnels. And unlike your code, those reuse the > existing neighbor table infrastructure, including all of its user > interface and introspection capabilities. > > It's actually slightly visible in your patch: > > On Mon, Jan 16, 2012 at 08:45:14PM +0100, Štefan Gula wrote: >> +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 >> @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str > [...] >> /* NBMA tunnel */ >> >> if (skb_dst(skb) == NULL) { > > > -David That code you are referring to is used only for routed traffic inside GRE - L3 traffic over L3 routed infrastructure. My patch is dealing with L2 traffic over L3 routed infrastructure - so the decision here is based on destination MAC addresses and not based on IPv4/IPv6 addresses.
On Mon, Jan 16, 2012 at 11:52:25PM +0100, Štefan Gula wrote: > Dňa 16. januára 2012 21:29, David Lamparter <equinox@diac24.net> napísal/a: > > At the risk of repeating myself, Linux GRE support already has > > provisions for multipoint tunnels. And unlike your code, those reuse the > > existing neighbor table infrastructure, including all of its user > > interface and introspection capabilities. > > > > It's actually slightly visible in your patch: > > > > On Mon, Jan 16, 2012 at 08:45:14PM +0100, Štefan Gula wrote: > >> +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 > >> @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str > > [...] > >> /* NBMA tunnel */ > >> > >> if (skb_dst(skb) == NULL) { > > > > > > -David > > That code you are referring to is used only for routed traffic inside > GRE - L3 traffic over L3 routed infrastructure. My patch is dealing > with L2 traffic over L3 routed infrastructure - so the decision here > is based on destination MAC addresses and not based on IPv4/IPv6 > addresses. Yes, it currently only does IPv4/IPv6 -> IPv4 through the neighbor table. That doesn't mean it can't be extended to handle ethernet addresses the same way. -David -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dňa 17. januára 2012 10:50, David Lamparter <equinox@diac24.net> napísal/a: > On Mon, Jan 16, 2012 at 11:52:25PM +0100, Štefan Gula wrote: >> Dňa 16. januára 2012 21:29, David Lamparter <equinox@diac24.net> napísal/a: >> > At the risk of repeating myself, Linux GRE support already has >> > provisions for multipoint tunnels. And unlike your code, those reuse the >> > existing neighbor table infrastructure, including all of its user >> > interface and introspection capabilities. >> > >> > It's actually slightly visible in your patch: >> > >> > On Mon, Jan 16, 2012 at 08:45:14PM +0100, Štefan Gula wrote: >> >> +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 >> >> @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str >> > [...] >> >> /* NBMA tunnel */ >> >> >> >> if (skb_dst(skb) == NULL) { >> > >> > >> > -David >> >> That code you are referring to is used only for routed traffic inside >> GRE - L3 traffic over L3 routed infrastructure. My patch is dealing >> with L2 traffic over L3 routed infrastructure - so the decision here >> is based on destination MAC addresses and not based on IPv4/IPv6 >> addresses. > > Yes, it currently only does IPv4/IPv6 -> IPv4 through the neighbor > table. That doesn't mean it can't be extended to handle ethernet > addresses the same way. > > > -David Routing mechanisms and switching mechanisms works completely different, in switching you simply don't have anything like next-hop from routing, which can be resolved by utilizing modified ARP message and there is also absolutely no hierarchy in MAC address (like you have in routing table), so I seriously doubt that it can be done the same way, but I am opened to ideas here. So how would you do like to do that? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jan 17, 2012 at 10:56:51AM +0100, Štefan Gula wrote: > Dňa 17. januára 2012 10:50, David Lamparter <equinox@diac24.net> napísal/a: > > On Mon, Jan 16, 2012 at 11:52:25PM +0100, Štefan Gula wrote: > >> Dňa 16. januára 2012 21:29, David Lamparter <equinox@diac24.net> napísal/a: > >> > At the risk of repeating myself, Linux GRE support already has > >> > provisions for multipoint tunnels. And unlike your code, those reuse the > >> > existing neighbor table infrastructure, including all of its user > >> > interface and introspection capabilities. > >> > > >> > It's actually slightly visible in your patch: > >> > > >> > On Mon, Jan 16, 2012 at 08:45:14PM +0100, Štefan Gula wrote: > >> >> +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 > >> >> @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str > >> > [...] > >> >> /* NBMA tunnel */ > >> >> > >> >> if (skb_dst(skb) == NULL) { > >> > > >> > > >> > -David > >> > >> That code you are referring to is used only for routed traffic inside > >> GRE - L3 traffic over L3 routed infrastructure. My patch is dealing > >> with L2 traffic over L3 routed infrastructure - so the decision here > >> is based on destination MAC addresses and not based on IPv4/IPv6 > >> addresses. > > > > Yes, it currently only does IPv4/IPv6 -> IPv4 through the neighbor > > table. That doesn't mean it can't be extended to handle ethernet > > addresses the same way. > > Routing mechanisms and switching mechanisms works completely > different, in switching you simply don't have anything like next-hop > from routing, which can be resolved by utilizing modified ARP message > and there is also absolutely no hierarchy in MAC address (like you > have in routing table), so I seriously doubt that it can be done the > same way, but I am opened to ideas here. So how would you do like to > do that? NBMA GRE does not use routing mechanisms, it uses the neighbor table. The neighbor table does not use any hierarchy in its lookups. So you have the existing neighbor table use cases: - IPv4 -> MAC with in-kernel ARP - IPv6 -> MAC with in-kernel ND - IPv4 -> IPv4 on NBMA GRE devices (without in-kernel filling) - IPv6 -> IPv4 on NBMA GRE devices (without in-kernel filling) You could add to that: - MAC -> IPv4 on NBMA GRETAP devices -David -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dňa 17. januára 2012 11:11, David Lamparter <equinox@diac24.net> napísal/a: > On Tue, Jan 17, 2012 at 10:56:51AM +0100, Štefan Gula wrote: >> Dňa 17. januára 2012 10:50, David Lamparter <equinox@diac24.net> napísal/a: >> > On Mon, Jan 16, 2012 at 11:52:25PM +0100, Štefan Gula wrote: >> >> Dňa 16. januára 2012 21:29, David Lamparter <equinox@diac24.net> napísal/a: >> >> > At the risk of repeating myself, Linux GRE support already has >> >> > provisions for multipoint tunnels. And unlike your code, those reuse the >> >> > existing neighbor table infrastructure, including all of its user >> >> > interface and introspection capabilities. >> >> > >> >> > It's actually slightly visible in your patch: >> >> > >> >> > On Mon, Jan 16, 2012 at 08:45:14PM +0100, Štefan Gula wrote: >> >> >> +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 >> >> >> @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str >> >> > [...] >> >> >> /* NBMA tunnel */ >> >> >> >> >> >> if (skb_dst(skb) == NULL) { >> >> > >> >> > >> >> > -David >> >> >> >> That code you are referring to is used only for routed traffic inside >> >> GRE - L3 traffic over L3 routed infrastructure. My patch is dealing >> >> with L2 traffic over L3 routed infrastructure - so the decision here >> >> is based on destination MAC addresses and not based on IPv4/IPv6 >> >> addresses. >> > >> > Yes, it currently only does IPv4/IPv6 -> IPv4 through the neighbor >> > table. That doesn't mean it can't be extended to handle ethernet >> > addresses the same way. >> >> Routing mechanisms and switching mechanisms works completely >> different, in switching you simply don't have anything like next-hop >> from routing, which can be resolved by utilizing modified ARP message >> and there is also absolutely no hierarchy in MAC address (like you >> have in routing table), so I seriously doubt that it can be done the >> same way, but I am opened to ideas here. So how would you do like to >> do that? > > NBMA GRE does not use routing mechanisms, it uses the neighbor table. > The neighbor table does not use any hierarchy in its lookups. > > So you have the existing neighbor table use cases: > - IPv4 -> MAC with in-kernel ARP > - IPv6 -> MAC with in-kernel ND > - IPv4 -> IPv4 on NBMA GRE devices (without in-kernel filling) > - IPv6 -> IPv4 on NBMA GRE devices (without in-kernel filling) > You could add to that: > - MAC -> IPv4 on NBMA GRETAP devices > > > -David Current status: - no general neighbor table exists (currently used build-in tables for forwarding mechanism ARP, ND, RT, RT6 or combinations to get the right information) Your proposal: - creating another kernel build-in table - MAC -> IPv4 on NBMA GRETAP device table doesn't currently exists at all in kernel - adjust/implement learning mechanism to populate the table - adjust/implement forwarding mechanisms to forward frames based on information from given table My patch: - creates table for holding MAC -> IPv4 on NBMA GRETAP device - adjust/implement learning mechanism (using bridge code logic) - adjust/implement forwarding mechanism (using bridge code logic) So I think we are talking about the same thing here, only you are talking in more general way, I am talking more about the actual implementation. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h --- linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100 @@ -27,6 +27,14 @@ struct ip_tunnel { __u32 o_seqno; /* The last output seqno */ int hlen; /* Precalculated GRE header length */ int mlink; +#ifdef CONFIG_NET_IPGRE_BRIDGE +#define GRETAP_BR_HASH_BITS 8 +#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS) + struct hlist_head hash[GRETAP_BR_HASH_SIZE]; + spinlock_t hash_lock; + unsigned long ageing_time; + struct timer_list gc_timer; +#endif struct ip_tunnel_parm parms; diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig --- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100 @@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST Network), but can be distributed all over the Internet. If you want to do that, say Y here and to "IP multicast routing" below. +config NET_IPGRE_BRIDGE + bool "IP: Ethernet over multipoint GRE over IP" + depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST + help + Allows you to use multipoint GRE VPN as virtual switch and interconnect + several L2 endpoints over L3 routed infrastructure. It is useful for + creating multipoint L2 VPNs which can be later used inside bridge + interfaces If you want to use. GRE multipoint L2 VPN feature say Y. + config IP_MROUTE bool "IP: multicast routing" depends on IP_MULTICAST diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c --- linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 @@ -52,6 +52,11 @@ #include <net/ip6_route.h> #endif +#ifdef CONFIG_NET_IPGRE_BRIDGE +#include <linux/jhash.h> +#include <asm/unaligned.h> +#endif + /* Problems & solutions -------------------- @@ -134,6 +139,191 @@ struct ipgre_net { struct net_device *fb_tunnel_dev; }; +#ifdef CONFIG_NET_IPGRE_BRIDGE + /* + * This part of code includes codes to enable L2 ethernet + * switch virtualization over IP routed infrastructure with + * utilization of multicast capable endpoint using Ethernet + * over GRE + * + * Author: Stefan Gula + * Signed-off-by: Stefan Gula <steweg@gmail.com> + */ +struct mac_addr { + unsigned char addr[6]; +}; + +struct ipgre_tap_bridge_entry { + struct hlist_node hlist; + u32 raddr; + struct mac_addr addr; + struct net_device *dev; + struct rcu_head rcu; + unsigned long updated; +}; + +static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly; +static u32 ipgre_salt __read_mostly; + +int __net_init ipgre_tap_bridge_init(void) +{ + ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache", + sizeof(struct ipgre_tap_bridge_entry), + 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!ipgre_tap_bridge_cache) + return -ENOMEM; + get_random_bytes(&ipgre_salt, sizeof(ipgre_salt)); + return 0; +} + +void ipgre_tap_bridge_fini(void) +{ + kmem_cache_destroy(ipgre_tap_bridge_cache); +} + +static inline int ipgre_tap_bridge_hash(const unsigned char *mac) +{ + u32 key = get_unaligned((u32 *)(mac + 2)); + return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1); +} + +static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel, + const struct ipgre_tap_bridge_entry *entry) +{ + return time_before_eq(entry->updated + tunnel->ageing_time, + jiffies); +} + +static void ipgre_tap_bridge_rcu_free(struct rcu_head *head) +{ + struct ipgre_tap_bridge_entry *entry + = container_of(head, struct ipgre_tap_bridge_entry, rcu); + kmem_cache_free(ipgre_tap_bridge_cache, entry); +} + +static inline void ipgre_tap_bridge_delete(struct ipgre_tap_bridge_entry *entry) +{ + hlist_del_rcu(&entry->hlist); + call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free); +} + +void ipgre_tap_bridge_cleanup(unsigned long _data) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel *)_data; + unsigned long delay = tunnel->ageing_time; + unsigned long next_timer = jiffies + tunnel->ageing_time; + int i; + spin_lock(&tunnel->hash_lock); + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { + struct ipgre_tap_bridge_entry *entry; + struct hlist_node *h, *n; + hlist_for_each_entry_safe(entry, h, n, + &tunnel->hash[i], hlist) + { + unsigned long this_timer; + this_timer = entry->updated + delay; + if (time_before_eq(this_timer, jiffies)) + ipgre_tap_bridge_delete(entry); + else if (time_before(this_timer, next_timer)) + next_timer = this_timer; + } + } + spin_unlock(&tunnel->hash_lock); + mod_timer(&tunnel->gc_timer, round_jiffies_up(next_timer)); +} + +void ipgre_tap_bridge_flush(struct ip_tunnel *tunnel) +{ + int i; + spin_lock_bh(&tunnel->hash_lock); + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { + struct ipgre_tap_bridge_entry *entry; + struct hlist_node *h, *n; + hlist_for_each_entry_safe(entry, h, n, + &tunnel->hash[i], hlist) + { + ipgre_tap_bridge_delete(entry); + } + } + spin_unlock_bh(&tunnel->hash_lock); +} + +struct ipgre_tap_bridge_entry *__ipgre_tap_bridge_get(struct ip_tunnel *tunnel, + const unsigned char *addr) +{ + struct hlist_node *h; + struct ipgre_tap_bridge_entry *entry; + hlist_for_each_entry_rcu(entry, h, + &tunnel->hash[ipgre_tap_bridge_hash(addr)], hlist) + { + if (!compare_ether_addr(entry->addr.addr, addr)) { + if (unlikely(ipgre_tap_bridge_has_expired(tunnel, + entry))) + break; + return entry; + } + } + + return NULL; +} + +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find( + struct hlist_head *head, + const unsigned char *addr) +{ + struct hlist_node *h; + struct ipgre_tap_bridge_entry *entry; + hlist_for_each_entry(entry, h, head, hlist) { + if (!compare_ether_addr(entry->addr.addr, addr)) + return entry; + } + return NULL; +} + + +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find_rcu( + struct hlist_head *head, + const unsigned char *addr) +{ + struct hlist_node *h; + struct ipgre_tap_bridge_entry *entry; + hlist_for_each_entry_rcu(entry, h, head, hlist) { + if (!compare_ether_addr(entry->addr.addr, addr)) + return entry; + } + return NULL; +} + +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create( + struct hlist_head *head, + u32 source, + const unsigned char *addr, struct net_device *dev) +{ + struct ipgre_tap_bridge_entry *entry; + entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC); + if (entry) { + memcpy(entry->addr.addr, addr, ETH_ALEN); + hlist_add_head_rcu(&entry->hlist, head); + entry->raddr = source; + entry->dev = dev; + entry->updated = jiffies; + } + return entry; +} + +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel, + const unsigned char *addr) +{ + struct ipgre_tap_bridge_entry *entry; + entry = __ipgre_tap_bridge_get(tunnel, addr); + if (entry == NULL) + return 0; + else + return entry->raddr; +} + +#endif /* Tunnel hash table */