@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/udp.h>
#include <linux/igmp.h>
+#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/ethtool.h>
#include <net/arp.h>
@@ -71,6 +72,167 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
ip_tunnel_collect_metadata();
}
+static struct ip_fan_map *vxlan_fan_find_map(struct vxlan_dev *vxlan, __be32 daddr)
+{
+ struct ip_fan_map *fan_map;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
+ if (fan_map->overlay ==
+ (daddr & inet_make_mask(fan_map->overlay_prefix))) {
+ rcu_read_unlock();
+ return fan_map;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static void vxlan_fan_flush_map(struct vxlan_dev *vxlan)
+{
+ struct ip_fan_map *fan_map;
+
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
+ list_del_rcu(&fan_map->list);
+ kfree_rcu(fan_map, rcu);
+ }
+}
+
+static int vxlan_fan_del_map(struct vxlan_dev *vxlan, __be32 overlay)
+{
+ struct ip_fan_map *fan_map;
+
+ fan_map = vxlan_fan_find_map(vxlan, overlay);
+ if (!fan_map)
+ return -ENOENT;
+
+ list_del_rcu(&fan_map->list);
+ kfree_rcu(fan_map, rcu);
+
+ return 0;
+}
+
+static int vxlan_fan_add_map(struct vxlan_dev *vxlan, struct ifla_fan_map *map)
+{
+ __be32 overlay_mask, underlay_mask;
+ struct ip_fan_map *fan_map;
+
+ overlay_mask = inet_make_mask(map->overlay_prefix);
+ underlay_mask = inet_make_mask(map->underlay_prefix);
+
+ netdev_dbg(vxlan->dev, "vfam: map: o %x/%d u %x/%d om %x um %x\n",
+ map->overlay, map->overlay_prefix,
+ map->underlay, map->underlay_prefix,
+ overlay_mask, underlay_mask);
+
+ if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
+ return -EINVAL;
+
+ if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
+ return -EINVAL;
+
+ /* Special case: overlay 0 and underlay 0: flush all mappings */
+ if (!map->overlay && !map->underlay) {
+ vxlan_fan_flush_map(vxlan);
+ return 0;
+ }
+
+ /* Special case: overlay set and underlay 0: clear map for overlay */
+ if (!map->underlay)
+ return vxlan_fan_del_map(vxlan, map->overlay);
+
+ if (vxlan_fan_find_map(vxlan, map->overlay))
+ return -EEXIST;
+
+ fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
+ fan_map->underlay = map->underlay;
+ fan_map->overlay = map->overlay;
+ fan_map->underlay_prefix = map->underlay_prefix;
+ fan_map->overlay_mask = ntohl(overlay_mask);
+ fan_map->overlay_prefix = map->overlay_prefix;
+
+ list_add_tail_rcu(&fan_map->list, &vxlan->fan.fan_maps);
+
+ return 0;
+}
+
+static int vxlan_parse_fan_map(struct nlattr *data[], struct vxlan_dev *vxlan)
+{
+ struct ifla_fan_map *map;
+ struct nlattr *attr;
+ int rem, rv;
+
+ nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
+ map = nla_data(attr);
+ rv = vxlan_fan_add_map(vxlan, map);
+ if (rv)
+ return rv;
+ }
+
+ return 0;
+}
+
+static int vxlan_fan_build_rdst(struct vxlan_dev *vxlan, struct sk_buff *skb,
+ struct vxlan_rdst *fan_rdst)
+{
+ struct ip_fan_map *f_map;
+ union vxlan_addr *va;
+ u32 daddr, underlay;
+ struct arphdr *arp;
+ void *arp_ptr;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+
+ eth = eth_hdr(skb);
+ switch (eth->h_proto) {
+ case htons(ETH_P_IP):
+ iph = ip_hdr(skb);
+ if (!iph)
+ return -EINVAL;
+ daddr = iph->daddr;
+ break;
+ case htons(ETH_P_ARP):
+ arp = arp_hdr(skb);
+ if (!arp)
+ return -EINVAL;
+ arp_ptr = arp + 1;
+ netdev_dbg(vxlan->dev,
+ "vfbr: arp sha %pM sip %pI4 tha %pM tip %pI4\n",
+ arp_ptr, arp_ptr + skb->dev->addr_len,
+ arp_ptr + skb->dev->addr_len + 4,
+ arp_ptr + (skb->dev->addr_len * 2) + 4);
+ arp_ptr += (skb->dev->addr_len * 2) + 4;
+ memcpy(&daddr, arp_ptr, 4);
+ break;
+ default:
+ netdev_dbg(vxlan->dev, "vfbr: unknown eth p %x\n", eth->h_proto);
+ return -EINVAL;
+ }
+
+ f_map = vxlan_fan_find_map(vxlan, daddr);
+ if (!f_map)
+ return -EINVAL;
+
+ daddr = ntohl(daddr);
+ underlay = ntohl(f_map->underlay);
+ if (!underlay)
+ return -EINVAL;
+
+ memset(fan_rdst, 0, sizeof(*fan_rdst));
+ va = &fan_rdst->remote_ip;
+ va->sa.sa_family = AF_INET;
+ fan_rdst->remote_vni = vxlan->default_dst.remote_vni;
+ va->sin.sin_addr.s_addr = htonl(underlay |
+ ((daddr & ~f_map->overlay_mask) >>
+ (32 - f_map->overlay_prefix -
+ (32 - f_map->underlay_prefix))));
+ netdev_dbg(vxlan->dev, "vfbr: daddr %x ul %x dst %x\n",
+ daddr, underlay, va->sin.sin_addr.s_addr);
+
+ return 0;
+}
+
/* Find VXLAN socket based on network namespace, address family, UDP port,
* enabled unshareable flags and socket device binding (see l3mdev with
* non-default VRF).
@@ -2433,6 +2595,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
+ if (fan_has_map(&vxlan->fan) && rt->rt_flags & RTCF_LOCAL) {
+ netdev_dbg(dev, "discard fan to localhost %pI4\n",
+ &rdst->remote_ip.sin.sin_addr.s_addr);
+ ip_rt_put(rt);
+ goto tx_free;
+ }
+
if (!info) {
/* Bypass encapsulation if the destination is local */
err = encap_bypass_if_local(skb, dev, vxlan, AF_INET,
@@ -2569,6 +2738,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_release(ndst);
dev->stats.tx_errors++;
vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0);
+tx_free:
kfree_skb(skb);
}
@@ -2716,6 +2886,20 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
rcu_read_unlock();
}
+ if (fan_has_map(&vxlan->fan)) {
+ struct vxlan_rdst fan_rdst;
+
+ netdev_dbg(vxlan->dev, "vxlan_xmit p %x d %pM\n",
+ eth->h_proto, eth->h_dest);
+ if (vxlan_fan_build_rdst(vxlan, skb, &fan_rdst)) {
+ dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+ vxlan_xmit_one(skb, dev, vni, &fan_rdst, 0);
+ return NETDEV_TX_OK;
+ }
+
eth = eth_hdr(skb);
f = vxlan_find_mac(vxlan, eth->h_dest, vni);
did_rsc = false;
@@ -3325,6 +3509,8 @@ static void vxlan_setup(struct net_device *dev)
spin_lock_init(&vxlan->hash_lock[h]);
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
}
+
+ INIT_LIST_HEAD(&vxlan->fan.fan_maps);
}
static void vxlan_ether_setup(struct net_device *dev)
@@ -4055,6 +4241,12 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
conf->remote_ip.sa.sa_family = AF_INET6;
}
+ if (data[IFLA_VXLAN_FAN_MAP]) {
+ err = vxlan_parse_fan_map(data, vxlan);
+ if (err)
+ return err;
+ }
+
if (data[IFLA_VXLAN_LOCAL]) {
if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
@@ -4440,6 +4632,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(0) + /* IFLA_VXLAN_GPE */
nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */
+ nla_total_size(sizeof(struct ip_fan_map) * 256) +
0;
}
@@ -4486,6 +4679,26 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
}
}
+ if (fan_has_map(&vxlan->fan)) {
+ struct nlattr *fan_nest;
+ struct ip_fan_map *fan_map;
+
+ fan_nest = nla_nest_start(skb, IFLA_VXLAN_FAN_MAP);
+ if (!fan_nest)
+ goto nla_put_failure;
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
+ struct ifla_fan_map map;
+
+ map.underlay = fan_map->underlay;
+ map.underlay_prefix = fan_map->underlay_prefix;
+ map.overlay = fan_map->overlay;
+ map.overlay_prefix = fan_map->overlay_prefix;
+ if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, fan_nest);
+ }
+
if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
!!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
@@ -4826,6 +5039,22 @@ static __net_init int vxlan_init_net(struct net *net)
NULL);
}
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *vxlan_fan_header;
+static unsigned int vxlan_fan_version = 4;
+
+static struct ctl_table vxlan_fan_sysctls[] = {
+ {
+ .procname = "vxlan",
+ .data = &vxlan_fan_version,
+ .maxlen = sizeof(vxlan_fan_version),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {},
+};
+#endif /* CONFIG_SYSCTL */
+
static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -4903,7 +5132,20 @@ static int __init vxlan_init_module(void)
vxlan_vnifilter_init();
+#ifdef CONFIG_SYSCTL
+ vxlan_fan_header = register_net_sysctl(&init_net, "net/fan",
+ vxlan_fan_sysctls);
+ if (!vxlan_fan_header) {
+ rc = -ENOMEM;
+ goto sysctl_failed;
+ }
+#endif /* CONFIG_SYSCTL */
+
return 0;
+#ifdef CONFIG_SYSCTL
+sysctl_failed:
+ rtnl_link_unregister(&vxlan_link_ops);
+#endif /* CONFIG_SYSCTL */
out4:
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
out3:
@@ -4917,6 +5159,9 @@ late_initcall(vxlan_init_module);
static void __exit vxlan_cleanup_module(void)
{
+#ifdef CONFIG_SYSCTL
+ unregister_net_sysctl_table(vxlan_fan_header);
+#endif /* CONFIG_SYSCTL */
vxlan_vnifilter_uninit();
rtnl_link_unregister(&vxlan_link_ops);
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
@@ -118,9 +118,18 @@ struct metadata_dst;
*/
#define FAN_OVERLAY_CNT 256
+struct ip_fan_map {
+ __be32 underlay;
+ __be32 overlay;
+ u16 underlay_prefix;
+ u16 overlay_prefix;
+ u32 overlay_mask;
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
struct ip_tunnel_fan {
-/* u32 __rcu *map;*/
- u32 map[FAN_OVERLAY_CNT];
+ struct list_head fan_maps;
};
struct ip_tunnel {
@@ -170,6 +179,11 @@ struct ip_tunnel {
bool ignore_df;
};
+static inline int fan_has_map(const struct ip_tunnel_fan *fan)
+{
+ return !list_empty(&fan->fan_maps);
+}
+
struct tnl_ptk_info {
__be16 flags;
__be16 proto;
@@ -294,6 +294,8 @@ struct vxlan_dev {
struct net *net; /* netns for packet i/o */
struct vxlan_rdst default_dst; /* default destination */
+ struct ip_tunnel_fan fan;
+
struct timer_list age_timer;
spinlock_t hash_lock[FDB_HASH_SIZE];
unsigned int addrcnt;
@@ -1378,6 +1378,7 @@ enum {
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
+ IFLA_VXLAN_FAN_MAP = 33,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -186,8 +186,6 @@ enum {
(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT | \
TUNNEL_GTP_OPT)
-#define TUNNEL_FAN __cpu_to_be16(0x8000)
-
enum {
IFLA_FAN_UNSPEC,
IFLA_FAN_MAPPING,
@@ -196,7 +194,7 @@ enum {
#define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1)
-struct ip_tunnel_fan_map {
+struct ifla_fan_map {
__be32 underlay;
__be32 overlay;
__u16 underlay_prefix;
@@ -1227,11 +1227,6 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
-static int ip_tunnel_is_fan(struct ip_tunnel *tunnel)
-{
- return tunnel->parms.i_flags & TUNNEL_FAN;
-}
-
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark)
{
@@ -1241,7 +1236,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
if (dev == itn->fb_tunnel_dev)
- return ip_tunnel_is_fan(tunnel) ? 0 : -EINVAL;
+ return fan_has_map(&tunnel->fan) ? 0 : -EINVAL;
t = ip_tunnel_find(itn, p, dev->type);
@@ -102,6 +102,7 @@
#include <linux/netfilter_ipv4.h>
#include <linux/if_ether.h>
#include <linux/inetdevice.h>
+#include <linux/rculist.h>
#include <net/sock.h>
#include <net/ip.h>
@@ -268,37 +269,144 @@ static int mplsip_rcv(struct sk_buff *skb)
}
#endif
-static int ipip_tunnel_is_fan(struct ip_tunnel *tunnel)
+static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
{
- return tunnel->parms.i_flags & TUNNEL_FAN;
+ struct ip_fan_map *fan_map;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
+ if (fan_map->overlay ==
+ (daddr & inet_make_mask(fan_map->overlay_prefix))) {
+ rcu_read_unlock();
+ return fan_map;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
}
-/*
- * Determine fan tunnel endpoint to send packet to, based on the inner IP
- * address. For an overlay (inner) address Y.A.B.C, the transformation is
- * F.G.A.B, where "F" and "G" are the first two octets of the underlay
- * network (the network portion of a /16), "A" and "B" are the low order
- * two octets of the underlay network host (the host portion of a /16),
- * and "Y" is a configured first octet of the overlay network.
+/* Determine fan tunnel endpoint to send packet to, based on the inner IP
+ * address.
+ *
+ * Given a /8 overlay and /16 underlay, for an overlay (inner) address
+ * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
+ * two octets of the underlay network (the network portion of a /16), "A"
+ * and "B" are the low order two octets of the underlay network host (the
+ * host portion of a /16), and "Y" is a configured first octet of the
+ * overlay network.
+ *
+ * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
+ * host overlay subnet 99.3.4.0/24. An overlay network datagram from
+ * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
+ * which hosts overlay network subnet 99.6.7.0/24. This transformation is
+ * described in detail further below.
+ *
+ * Using netmasks for the overlay and underlay other than /8 and /16, as
+ * shown above, can yield larger (or smaller) overlay subnets, with the
+ * trade-off of allowing fewer (or more) underlay hosts to participate.
+ *
+ * The size of each overlay network subnet is defined by the total of the
+ * network mask of the overlay plus the size of host portion of the
+ * underlay network. In the above example, /8 + /16 = /24.
+ *
+ * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
+ * this case, the network portion of the underlay is 10.99.224.0/20, and
+ * the host portion is 0.0.14.5 (12 bits). To determine the overlay
+ * network subnet, the 12 bits of host portion are left shifted 12 bits
+ * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
+ * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
+ * 12 bits underlay. This yields 12 bits in the overlay network portion,
+ * allowing for 4094 addresses in each overlay network subnet. The
+ * trade-off is that fewer hosts may participate in the underlay network,
+ * as its host address size has shrunk from 16 bits (65534 addresses) in
+ * the first example to 12 bits (4094 addresses) here.
+ *
+ * For fewer hosts per overlay subnet (permitting a larger number of
+ * underlay hosts to participate), the underlay netmask may be made
+ * smaller.
+ *
+ * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
+ * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
+ * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
+ * the lowest order bit of the /8 overlay). This yields an overlay subnet
+ * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
+ * the underlay). This provides more addresses for the underlay network
+ * (approximately 2^20), but each host's segment of the overlay provides
+ * only 4 bits of addresses (14 usable).
+ *
+ * It is also possible to adjust the overlay subnet.
+ *
+ * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
+ * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
+ * shifted 15 bits (/20 - /5), yielding an overlay network of
+ * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
+ * overlay network of 242.107.128.0/17.
+ *
+ * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
+ * underlay host 10.224.220.10, the underlay host portion (.10) is left
+ * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
+ * This would permit 254 addresses on the underlay, with each overlay
+ * segment providing approximately 2^14 - 2 addresses (16382).
+ *
+ * For packets being encapsulated, the overlay network destination IP
+ * address is deconstructed into its overlay and underlay-derived
+ * portions. The underlay portion (determined by the overlay mask and
+ * overlay subnet mask) is right shifted according to the size of the
+ * underlay network mask. This value is then ORed with the network
+ * portion of the underlay network to produce the underlay network
+ * destination for the encapsulated datagram.
+ *
+ * For example, using the initial example of underlay 10.88.3.4/16 and
+ * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
+ * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
+ * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
+ * of the address extracted. This is a number of bits equal to underlay
+ * network host portion. In the destination address, the highest order of
+ * these bits is one bit lower than the lowest order bit from the overlay
+ * network mask.
*
- * E.g., underlay host 10.88.3.4 with an overlay of 99 would host overlay
- * subnet 99.3.4.0/24. An overlay network datagram from 99.3.4.5 to
- * 99.6.7.8, would be directed to underlay host 10.88.6.7, which hosts
- * overlay network 99.6.7.0/24.
+ * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
+ * underlay mask is /16 (leaving 16 bits for the host portion). The bits
+ * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
+ * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
+ * which is 1 bit lower than the lowest order overlay address bit).
+ *
+ * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
+ * This value is then ORed with the underlay network portion,
+ * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
+ * the encapuslated datagram.
+ *
+ * Another transform using the final example: overlay 100.64.0.0/10 and
+ * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
+ * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
+ * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
+ * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
+ * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
+ * network portion and the underlay host portion) bits, yielding 0.0.0.11.
+ * This is ORed with the underlay network portion, 10.224.220.0/24, giving
+ * the underlay destination of 10.224.220.11 for overlay destination
+ * 100.66.200.5.
*/
static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
{
- unsigned int overlay;
+ struct ip_fan_map *f_map;
u32 daddr, underlay;
+ f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
+ if (!f_map)
+ return -ENOENT;
+
daddr = ntohl(ip_hdr(skb)->daddr);
- overlay = daddr >> 24;
- underlay = tunnel->fan.map[overlay];
+ underlay = ntohl(f_map->underlay);
if (!underlay)
return -EINVAL;
*iph = tunnel->parms.iph;
- iph->daddr = htonl(underlay | ((daddr >> 8) & 0x0000ffff));
+ iph->daddr = htonl(underlay |
+ ((daddr & ~f_map->overlay_mask) >>
+ (32 - f_map->overlay_prefix -
+ (32 - f_map->underlay_prefix))));
return 0;
}
@@ -336,7 +444,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
goto tx_error;
- if (ipip_tunnel_is_fan(tunnel)) {
+ if (fan_has_map(&tunnel->fan)) {
if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
goto tx_error;
tiph = &fiph;
@@ -407,6 +515,8 @@ static const struct net_device_ops ipip_netdev_ops = {
static void ipip_tunnel_setup(struct net_device *dev)
{
+ struct ip_tunnel *t = netdev_priv(dev);
+
dev->netdev_ops = &ipip_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
@@ -419,6 +529,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->features |= IPIP_FEATURES;
dev->hw_features |= IPIP_FEATURES;
ip_tunnel_setup(dev, ipip_net_id);
+ INIT_LIST_HEAD(&t->fan.fan_maps);
}
static int ipip_tunnel_init(struct net_device *dev)
@@ -471,41 +582,65 @@ static void ipip_netlink_parms(struct nlattr *data[],
*fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
-static void ipip_fan_free_map(struct ip_tunnel *t)
+static void ipip_fan_flush_map(struct ip_tunnel *t)
{
- memset(&t->fan.map, 0, sizeof(t->fan.map));
+ struct ip_fan_map *fan_map;
+
+ list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
+ list_del_rcu(&fan_map->list);
+ kfree_rcu(fan_map, rcu);
+ }
}
-static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map)
+static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
{
- u32 overlay, overlay_mask, underlay, underlay_mask;
+ struct ip_fan_map *fan_map;
- if ((map->underlay_prefix && map->underlay_prefix != 16) ||
- (map->overlay_prefix && map->overlay_prefix != 8))
- return -EINVAL;
+ fan_map = ipip_fan_find_map(t, overlay);
+ if (!fan_map)
+ return -ENOENT;
- overlay = ntohl(map->overlay);
- overlay_mask = ntohl(inet_make_mask(map->overlay_prefix));
+ list_del_rcu(&fan_map->list);
+ kfree_rcu(fan_map, rcu);
- underlay = ntohl(map->underlay);
- underlay_mask = ntohl(inet_make_mask(map->underlay_prefix));
+ return 0;
+}
- if ((overlay & ~overlay_mask) || (underlay & ~underlay_mask))
- return -EINVAL;
+static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
+{
+ __be32 overlay_mask, underlay_mask;
+ struct ip_fan_map *fan_map;
- if (!(overlay & overlay_mask) && (underlay & underlay_mask))
+ overlay_mask = inet_make_mask(map->overlay_prefix);
+ underlay_mask = inet_make_mask(map->underlay_prefix);
+
+ if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
return -EINVAL;
- t->parms.i_flags |= TUNNEL_FAN;
+ if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
+ return -EINVAL;
- /* Special case: overlay 0 and underlay 0 clears all mappings */
- if (!overlay && !underlay) {
- ipip_fan_free_map(t);
+ /* Special case: overlay 0 and underlay 0: flush all mappings */
+ if (!map->overlay && !map->underlay) {
+ ipip_fan_flush_map(t);
return 0;
}
+
+ /* Special case: overlay set and underlay 0: clear map for overlay */
+ if (!map->underlay)
+ return ipip_fan_del_map(t, map->overlay);
+
+ if (ipip_fan_find_map(t, map->overlay))
+ return -EEXIST;
+
+ fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
+ fan_map->underlay = map->underlay;
+ fan_map->overlay = map->overlay;
+ fan_map->underlay_prefix = map->underlay_prefix;
+ fan_map->overlay_mask = ntohl(overlay_mask);
+ fan_map->overlay_prefix = map->overlay_prefix;
- overlay >>= (32 - map->overlay_prefix);
- t->fan.map[overlay] = underlay;
+ list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
return 0;
}
@@ -513,7 +648,7 @@ static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map)
static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
struct ip_tunnel_parm *parms)
{
- struct ip_tunnel_fan_map *map;
+ struct ifla_fan_map *map;
struct nlattr *attr;
int rem, rv;
@@ -525,7 +660,7 @@ static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
map = nla_data(attr);
- rv = ipip_fan_set_map(t, map);
+ rv = ipip_fan_add_map(t, map);
if (rv)
return rv;
}
@@ -619,7 +754,7 @@ static size_t ipip_get_size(const struct net_device *dev)
/* IFLA_IPTUN_FWMARK */
nla_total_size(4) +
/* IFLA_IPTUN_FAN_MAP */
- nla_total_size(sizeof(struct ip_tunnel_fan_map)) * 256 +
+ nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
0;
}
@@ -652,25 +787,22 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (tunnel->collect_md)
if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
goto nla_put_failure;
- if (tunnel->parms.i_flags & TUNNEL_FAN) {
+ if (fan_has_map(&tunnel->fan)) {
struct nlattr *fan_nest;
- int i;
+ struct ip_fan_map *fan_map;
fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
if (!fan_nest)
goto nla_put_failure;
- for (i = 0; i < 256; i++) {
- if (tunnel->fan.map[i]) {
- struct ip_tunnel_fan_map map;
-
- map.underlay = htonl(tunnel->fan.map[i]);
- map.underlay_prefix = 16;
- map.overlay = htonl(i << 24);
- map.overlay_prefix = 8;
- if (nla_put(skb, IFLA_FAN_MAPPING,
- sizeof(map), &map))
- goto nla_put_failure;
- }
+ list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
+ struct ifla_fan_map map;
+
+ map.underlay = fan_map->underlay;
+ map.underlay_prefix = fan_map->underlay_prefix;
+ map.overlay = fan_map->overlay;
+ map.overlay_prefix = fan_map->overlay_prefix;
+ if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
+ goto nla_put_failure;
}
nla_nest_end(skb, fan_nest);
}