From patchwork Mon Jan 16 19:45:14 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?=C5=A0tefan_Gula?= X-Patchwork-Id: 136357 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 8D236B6F7E for ; Tue, 17 Jan 2012 06:46:05 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756186Ab2APTph (ORCPT ); Mon, 16 Jan 2012 14:45:37 -0500 Received: from mail-pz0-f46.google.com ([209.85.210.46]:56905 "EHLO mail-pz0-f46.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755890Ab2APTpf (ORCPT ); Mon, 16 Jan 2012 14:45:35 -0500 Received: by dake40 with SMTP id e40so231276dak.19 for ; Mon, 16 Jan 2012 11:45:35 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=mime-version:sender:from:date:x-google-sender-auth:message-id :subject:to:cc:content-type; bh=HBrbwIc1xCcDDEd3/Q3DRv/R6twklL5OchaY4uZuep8=; b=Jmx3MK2wvvHmq8sCLVePuYM2ssbV3Q8x3POxoG485hlJ7PMiAoboksI3S2vyCwM+ZU LZRbgUPx1bsb46R9fI2EdpBHip0YEtYOiEMxZiTCxPnMwSkbYdpbhat82XgnRiXbrF66 NmAs+EuZmXvGWJkyDTuzxXe64XeqVNPRYwBHs= Received: by 10.68.74.4 with SMTP id p4mr28320817pbv.123.1326743135133; Mon, 16 Jan 2012 11:45:35 -0800 (PST) MIME-Version: 1.0 Received: by 10.68.64.136 with HTTP; Mon, 16 Jan 2012 11:45:14 -0800 (PST) From: =?ISO-8859-2?Q?=A9tefan_Gula?= Date: Mon, 16 Jan 2012 20:45:14 +0100 X-Google-Sender-Auth: GLPZQq4YL4Jl1WjM-mj9TdJhgJg Message-ID: Subject: [patch v2, kernel version 3.2.1] net/ipv4/ip_gre: Ethernet multipoint GRE over IP To: Alexey Kuznetsov , "David S. Miller" , James Morris , Hideaki YOSHIFUJI , Patrick McHardy Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Stefan Gula --- code was merged with latest bridge code and should be easily comparable /* @@ -563,6 +753,13 @@ static int ipgre_rcv(struct sk_buff *skb int offset = 4; __be16 gre_proto; +#ifdef CONFIG_NET_IPGRE_BRIDGE + u32 orig_source; + struct hlist_head *head; + struct ipgre_tap_bridge_entry *entry; + struct ethhdr *tethhdr; +#endif + if (!pskb_may_pull(skb, 16)) goto drop_nolock; @@ -659,10 +856,39 @@ static int ipgre_rcv(struct sk_buff *skb tunnel->dev->stats.rx_errors++; goto drop; } - +#ifdef CONFIG_NET_IPGRE_BRIDGE + orig_source = iph->saddr; +#endif iph = ip_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); +#ifdef CONFIG_NET_IPGRE_BRIDGE + if (ipv4_is_multicast(tunnel->parms.iph.daddr)) { + tethhdr = eth_hdr(skb); + if (!is_multicast_ether_addr( + tethhdr->h_source)) { + head = &tunnel->hash[ + ipgre_tap_bridge_hash( + tethhdr->h_source)]; + entry = ipgre_tap_bridge_find_rcu(head, + tethhdr->h_source); + if (likely(entry)) { + entry->raddr = orig_source; + entry->updated = jiffies; + } else { + spin_lock_bh(&tunnel->hash_lock); + if (!ipgre_tap_bridge_find(head, + tethhdr->h_source)) + ipgre_tap_bridge_create( + head, + orig_source, + tethhdr->h_source, + tunnel->dev); + spin_unlock_bh(&tunnel->hash_lock); + } + } + } +#endif } tstats = this_cpu_ptr(tunnel->dev->tstats); @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str tiph = &tunnel->parms.iph; } +#ifdef CONFIG_NET_IPGRE_BRIDGE + rcu_read_lock(); + if ((dev->type == ARPHRD_ETHER) && + ipv4_is_multicast(tunnel->parms.iph.daddr)) + dst = ipgre_tap_bridge_get_raddr(tunnel, + ((struct ethhdr *)skb->data)->h_dest); + rcu_read_unlock(); + if (dst == 0) + dst = tiph->daddr; + if (dst == 0) { +#else if ((dst = tiph->daddr) == 0) { +#endif /* NBMA tunnel */ if (skb_dst(skb) == NULL) { @@ -1209,6 +1447,16 @@ static int ipgre_open(struct net_device return -EADDRNOTAVAIL; t->mlink = dev->ifindex; ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); +#ifdef CONFIG_NET_IPGRE_BRIDGE + if (t->dev->type == ARPHRD_ETHER) { + INIT_HLIST_HEAD(t->hash); + spin_lock_init(&t->hash_lock); + t->ageing_time = 300 * HZ; + setup_timer(&t->gc_timer, ipgre_tap_bridge_cleanup, + (unsigned long) t); + mod_timer(&t->gc_timer, jiffies + t->ageing_time); + } +#endif } return 0; } @@ -1219,6 +1467,12 @@ static int ipgre_close(struct net_device if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; +#ifdef CONFIG_NET_IPGRE_BRIDGE + if (t->dev->type == ARPHRD_ETHER) { + ipgre_tap_bridge_flush(t); + del_timer_sync(&t->gc_timer); + } +#endif in_dev = inetdev_by_index(dev_net(dev), t->mlink); if (in_dev) ip_mc_dec_group(in_dev, t->parms.iph.daddr); @@ -1341,6 +1595,12 @@ static int __net_init ipgre_init_net(str struct ipgre_net *ign = net_generic(net, ipgre_net_id); int err; +#ifdef CONFIG_NET_IPGRE_BRIDGE + err = ipgre_tap_bridge_init(); + if (err) + goto err_out; +#endif + ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", ipgre_tunnel_setup); if (!ign->fb_tunnel_dev) { @@ -1362,6 +1622,10 @@ static int __net_init ipgre_init_net(str err_reg_dev: ipgre_dev_free(ign->fb_tunnel_dev); err_alloc_dev: +#ifdef CONFIG_NET_IPGRE_BRIDGE + ipgre_tap_bridge_fini(); +err_out: +#endif return err; } @@ -1375,6 +1639,9 @@ static void __net_exit ipgre_exit_net(st ipgre_destroy_tunnels(ign, &list); unregister_netdevice_many(&list); rtnl_unlock(); +#ifdef CONFIG_NET_IPGRE_BRIDGE + ipgre_tap_bridge_fini(); +#endif } static struct pernet_operations ipgre_net_ops = { -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h --- linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100 @@ -27,6 +27,14 @@ struct ip_tunnel { __u32 o_seqno; /* The last output seqno */ int hlen; /* Precalculated GRE header length */ int mlink; +#ifdef CONFIG_NET_IPGRE_BRIDGE +#define GRETAP_BR_HASH_BITS 8 +#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS) + struct hlist_head hash[GRETAP_BR_HASH_SIZE]; + spinlock_t hash_lock; + unsigned long ageing_time; + struct timer_list gc_timer; +#endif struct ip_tunnel_parm parms; diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig --- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100 @@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST Network), but can be distributed all over the Internet. If you want to do that, say Y here and to "IP multicast routing" below. +config NET_IPGRE_BRIDGE + bool "IP: Ethernet over multipoint GRE over IP" + depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST + help + Allows you to use multipoint GRE VPN as virtual switch and interconnect + several L2 endpoints over L3 routed infrastructure. It is useful for + creating multipoint L2 VPNs which can be later used inside bridge + interfaces If you want to use. GRE multipoint L2 VPN feature say Y. + config IP_MROUTE bool "IP: multicast routing" depends on IP_MULTICAST diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c --- linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100 @@ -52,6 +52,11 @@ #include #endif +#ifdef CONFIG_NET_IPGRE_BRIDGE +#include +#include +#endif + /* Problems & solutions -------------------- @@ -134,6 +139,191 @@ struct ipgre_net { struct net_device *fb_tunnel_dev; }; +#ifdef CONFIG_NET_IPGRE_BRIDGE + /* + * This part of code includes codes to enable L2 ethernet + * switch virtualization over IP routed infrastructure with + * utilization of multicast capable endpoint using Ethernet + * over GRE + * + * Author: Stefan Gula + * Signed-off-by: Stefan Gula + */ +struct mac_addr { + unsigned char addr[6]; +}; + +struct ipgre_tap_bridge_entry { + struct hlist_node hlist; + u32 raddr; + struct mac_addr addr; + struct net_device *dev; + struct rcu_head rcu; + unsigned long updated; +}; + +static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly; +static u32 ipgre_salt __read_mostly; + +int __net_init ipgre_tap_bridge_init(void) +{ + ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache", + sizeof(struct ipgre_tap_bridge_entry), + 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!ipgre_tap_bridge_cache) + return -ENOMEM; + get_random_bytes(&ipgre_salt, sizeof(ipgre_salt)); + return 0; +} + +void ipgre_tap_bridge_fini(void) +{ + kmem_cache_destroy(ipgre_tap_bridge_cache); +} + +static inline int ipgre_tap_bridge_hash(const unsigned char *mac) +{ + u32 key = get_unaligned((u32 *)(mac + 2)); + return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1); +} + +static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel, + const struct ipgre_tap_bridge_entry *entry) +{ + return time_before_eq(entry->updated + tunnel->ageing_time, + jiffies); +} + +static void ipgre_tap_bridge_rcu_free(struct rcu_head *head) +{ + struct ipgre_tap_bridge_entry *entry + = container_of(head, struct ipgre_tap_bridge_entry, rcu); + kmem_cache_free(ipgre_tap_bridge_cache, entry); +} + +static inline void ipgre_tap_bridge_delete(struct ipgre_tap_bridge_entry *entry) +{ + hlist_del_rcu(&entry->hlist); + call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free); +} + +void ipgre_tap_bridge_cleanup(unsigned long _data) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel *)_data; + unsigned long delay = tunnel->ageing_time; + unsigned long next_timer = jiffies + tunnel->ageing_time; + int i; + spin_lock(&tunnel->hash_lock); + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { + struct ipgre_tap_bridge_entry *entry; + struct hlist_node *h, *n; + hlist_for_each_entry_safe(entry, h, n, + &tunnel->hash[i], hlist) + { + unsigned long this_timer; + this_timer = entry->updated + delay; + if (time_before_eq(this_timer, jiffies)) + ipgre_tap_bridge_delete(entry); + else if (time_before(this_timer, next_timer)) + next_timer = this_timer; + } + } + spin_unlock(&tunnel->hash_lock); + mod_timer(&tunnel->gc_timer, round_jiffies_up(next_timer)); +} + +void ipgre_tap_bridge_flush(struct ip_tunnel *tunnel) +{ + int i; + spin_lock_bh(&tunnel->hash_lock); + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { + struct ipgre_tap_bridge_entry *entry; + struct hlist_node *h, *n; + hlist_for_each_entry_safe(entry, h, n, + &tunnel->hash[i], hlist) + { + ipgre_tap_bridge_delete(entry); + } + } + spin_unlock_bh(&tunnel->hash_lock); +} + +struct ipgre_tap_bridge_entry *__ipgre_tap_bridge_get(struct ip_tunnel *tunnel, + const unsigned char *addr) +{ + struct hlist_node *h; + struct ipgre_tap_bridge_entry *entry; + hlist_for_each_entry_rcu(entry, h, + &tunnel->hash[ipgre_tap_bridge_hash(addr)], hlist) + { + if (!compare_ether_addr(entry->addr.addr, addr)) { + if (unlikely(ipgre_tap_bridge_has_expired(tunnel, + entry))) + break; + return entry; + } + } + + return NULL; +} + +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find( + struct hlist_head *head, + const unsigned char *addr) +{ + struct hlist_node *h; + struct ipgre_tap_bridge_entry *entry; + hlist_for_each_entry(entry, h, head, hlist) { + if (!compare_ether_addr(entry->addr.addr, addr)) + return entry; + } + return NULL; +} + + +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find_rcu( + struct hlist_head *head, + const unsigned char *addr) +{ + struct hlist_node *h; + struct ipgre_tap_bridge_entry *entry; + hlist_for_each_entry_rcu(entry, h, head, hlist) { + if (!compare_ether_addr(entry->addr.addr, addr)) + return entry; + } + return NULL; +} + +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create( + struct hlist_head *head, + u32 source, + const unsigned char *addr, struct net_device *dev) +{ + struct ipgre_tap_bridge_entry *entry; + entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC); + if (entry) { + memcpy(entry->addr.addr, addr, ETH_ALEN); + hlist_add_head_rcu(&entry->hlist, head); + entry->raddr = source; + entry->dev = dev; + entry->updated = jiffies; + } + return entry; +} + +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel, + const unsigned char *addr) +{ + struct ipgre_tap_bridge_entry *entry; + entry = __ipgre_tap_bridge_get(tunnel, addr); + if (entry == NULL) + return 0; + else + return entry->raddr; +} + +#endif /* Tunnel hash table */