From patchwork Wed Aug 16 17:02:00 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Lamparter X-Patchwork-Id: 802143 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3xXbJ12gn2z9t4P for ; Thu, 17 Aug 2017 03:02:41 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752490AbdHPRCa (ORCPT ); Wed, 16 Aug 2017 13:02:30 -0400 Received: from eidolon.nox.tf ([185.142.180.128]:43712 "EHLO eidolon.nox.tf" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752376AbdHPRC1 (ORCPT ); Wed, 16 Aug 2017 13:02:27 -0400 Received: from equinox by eidolon.nox.tf with local (Exim 4.89) (envelope-from ) id 1di1i5-001v34-2V; Wed, 16 Aug 2017 19:02:25 +0200 From: David Lamparter To: netdev@vger.kernel.org Cc: amine.kherbouche@6wind.com, roopa@cumulusnetworks.com, David Lamparter Subject: [PATCH 4/6] mpls: VPLS support Date: Wed, 16 Aug 2017 19:02:00 +0200 Message-Id: <20170816170202.456851-5-equinox@diac24.net> X-Mailer: git-send-email 2.13.0 In-Reply-To: <20170816170202.456851-1-equinox@diac24.net> References: <20170816170202.456851-1-equinox@diac24.net> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org [work-in-progress, works but needs changes] [v2: refactored lots of things, e.g. dst_metadata, no more genetlink] Signed-off-by: David Lamparter --- include/net/dst_metadata.h | 21 ++ include/net/vpls.h | 8 + net/mpls/Kconfig | 11 ++ net/mpls/Makefile | 1 + net/mpls/vpls.c | 469 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 510 insertions(+) create mode 100644 include/net/vpls.h create mode 100644 net/mpls/vpls.c diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 8858dc441458..aeee4ce3b654 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -3,11 +3,13 @@ #include #include +#include #include enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, + METADATA_VPLS, }; struct hw_port_info { @@ -21,6 +23,7 @@ struct metadata_dst { union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; + struct vpls_info vpls_info; } u; }; @@ -49,6 +52,15 @@ static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) return NULL; } +static inline struct vpls_info *skb_vpls_info(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + if (md_dst && md_dst->type == METADATA_VPLS) + return &md_dst->u.vpls_info; + return NULL; +} + + static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -73,6 +85,9 @@ static inline int dst_metadata_cmp(const struct dst_entry *dst_a, case METADATA_HW_PORT_MUX: return memcmp(&a->u.port_info, &b->u.port_info, sizeof(a->u.port_info)); + case METADATA_VPLS: + return memcmp(&a->u.vpls_info, &b->u.vpls_info, + sizeof(a->u.vpls_info)); case METADATA_IP_TUNNEL: return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + @@ -218,4 +233,10 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } + +static inline struct metadata_dst *vpls_rx_dst(void) +{ + return metadata_dst_alloc(0, METADATA_VPLS, GFP_ATOMIC); +} + #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/vpls.h b/include/net/vpls.h new file mode 100644 index 000000000000..b261e2d97734 --- /dev/null +++ b/include/net/vpls.h @@ -0,0 +1,8 @@ +#ifndef __NET_VPLS_H +#define __NET_VPLS_H 1 + +struct vpls_info { + u32 pw_label; +}; + +#endif /* __NET_VPLS_H */ diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index 5c467ef97311..c15ba73efb34 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -27,6 +27,17 @@ config MPLS_ROUTING ---help--- Add support for forwarding of mpls packets. +config MPLS_VPLS + bool "VPLS support" + default y + depends on MPLS_ROUTING && BRIDGE_NETFILTER=n + ---help--- + Add support for de-&encapsulating VPLS. Not compatible with + bridge netfilter due to the latter stomping over VPLS' dst metadata. + +comment "disable 'Bridged IP/ARP packets filtering' for VPLS support" + depends on BRIDGE_NETFILTER + config MPLS_IPTUNNEL tristate "MPLS: IP over MPLS tunnel support" depends on LWTUNNEL && MPLS_ROUTING diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 9ca923625016..3c028600a980 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o obj-$(CONFIG_MPLS_IPTUNNEL) += mpls_iptunnel.o mpls_router-y := af_mpls.o +mpls_router-$(CONFIG_MPLS_VPLS) += vpls.o diff --git a/net/mpls/vpls.c b/net/mpls/vpls.c new file mode 100644 index 000000000000..28ac810da6e9 --- /dev/null +++ b/net/mpls/vpls.c @@ -0,0 +1,469 @@ +/* + * net/mpls/vpls.c + * + * Copyright (C) 2016 David Lamparter + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +#define DRV_NAME "vpls" + +#define MIN_MTU 68 /* Min L3 MTU */ +#define MAX_MTU 65535 /* Max L3 MTU (arbitrary) */ + +struct vpls_wirelist { + struct rcu_head rcu; + size_t count; + unsigned wires[0]; +}; + +struct vpls_priv { + struct net *encap_net; + struct vpls_wirelist __rcu *wires; +}; + +static int vpls_xmit_wire(struct sk_buff *skb, struct net_device *dev, + struct vpls_priv *vpls, u32 wire) +{ + struct mpls_route *rt; + struct mpls_entry_decoded dec; + + dec.bos = 1; + dec.ttl = 255; + + rt = mpls_route_input_rcu(vpls->encap_net, wire); + if (!rt) + return -ENOENT; + if (rt->rt_vpls_dev != dev) + return -EINVAL; + + return mpls_rt_xmit(skb, rt, dec); +} + +static netdev_tx_t vpls_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int err = -EINVAL, ok_count = 0; + struct vpls_priv *priv = netdev_priv(dev); + struct vpls_info *vi; + struct pcpu_sw_netstats *stats; + size_t len = skb->len; + + rcu_read_lock(); + vi = skb_vpls_info(skb); + + skb_orphan(skb); + skb_forward_csum(skb); + + if (vi) { + err = vpls_xmit_wire(skb, dev, priv, vi->pw_label); + if (err) + goto out_err; + } else { + struct sk_buff *cloned; + struct vpls_wirelist *wl; + size_t i; + + wl = rcu_dereference(priv->wires); + if (wl->count == 0) { + dev->stats.tx_carrier_errors++; + goto out_err; + } + + for (i = 0; i < wl->count; i++) { + cloned = skb_clone(skb, GFP_KERNEL); + if (vpls_xmit_wire(cloned, dev, priv, wl->wires[i])) + consume_skb(cloned); + else + ok_count++; + } + if (!ok_count) + goto out_err; + + consume_skb(skb); + } + + stats = this_cpu_ptr(dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += len; + u64_stats_update_end(&stats->syncp); + + rcu_read_unlock(); + return 0; + +out_err: + dev->stats.tx_errors++; + + consume_skb(skb); + rcu_read_unlock(); + return err; +} + +int vpls_rcv(struct sk_buff *skb, struct net_device *in_dev, + struct packet_type *pt, struct mpls_route *rt, + struct mpls_shim_hdr *hdr, struct net_device *orig_dev) +{ + struct net_device *dev = rt->rt_vpls_dev; + struct mpls_entry_decoded dec; + struct metadata_dst *md_dst; + struct pcpu_sw_netstats *stats; + + if (!dev) + goto drop_nodev; + + dec = mpls_entry_decode(hdr); + if (!dec.bos) { + dev->stats.rx_frame_errors++; + goto drop; + } + + skb_pull(skb, sizeof(*hdr)); + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) { + dev->stats.rx_length_errors++; + goto drop; + } + + md_dst = vpls_rx_dst(); + if (unlikely(!md_dst)) { + netdev_err(dev, "failed to allocate dst metadata\n"); + goto drop; + } + md_dst->u.vpls_info.pw_label = dec.label; + + skb->dev = dev; + + skb_reset_mac_header(skb); + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = CHECKSUM_NONE; + skb->pkt_type = PACKET_HOST; + + skb_clear_hash(skb); + skb->vlan_tci = 0; + skb_set_queue_mapping(skb, 0); + skb_scrub_packet(skb, !net_eq(dev_net(in_dev), dev_net(dev))); + + skb_reset_network_header(skb); + skb_probe_transport_header(skb, 0); + + skb_dst_drop(skb); + skb_dst_set(skb, &md_dst->dst); + + stats = this_cpu_ptr(dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += skb->len; + u64_stats_update_end(&stats->syncp); + + netif_rx(skb); + return 0; + +drop: + dev->stats.rx_errors++; +drop_nodev: + kfree_skb(skb); + return NET_RX_DROP; +} + +void vpls_label_update(unsigned label, struct mpls_route *rt_old, + struct mpls_route *rt_new) +{ + struct vpls_priv *priv; + struct vpls_wirelist *wl, *wl_new; + size_t i; + + ASSERT_RTNL(); + + if (rt_old && rt_new && rt_old->rt_vpls_dev == rt_new->rt_vpls_dev) + return; + + if (rt_old && rt_old->rt_vpls_dev) { + priv = netdev_priv(rt_old->rt_vpls_dev); + wl = rcu_dereference(priv->wires); + + for (i = 0; i < wl->count; i++) + if (wl->wires[i] == label) + break; + + if (i == wl->count) { + netdev_err(rt_old->rt_vpls_dev, + "can't find pseudowire to remove!\n"); + goto update_new; + } + + wl_new = kmalloc(sizeof(*wl) + + (wl->count - 1) * sizeof(wl->wires[0]), + GFP_ATOMIC); + if (!wl_new) { + netdev_err(rt_old->rt_vpls_dev, + "out of memory for pseudowire delete!\n"); + goto update_new; + } + + wl_new->count = wl->count - 1; + memcpy(wl_new->wires, wl->wires, i * sizeof(wl->wires[0])); + memcpy(wl_new->wires + i, wl->wires + i + 1, + (wl->count - i - 1) * sizeof(wl->wires[0])); + + rcu_assign_pointer(priv->wires, wl_new); + kfree_rcu(wl, rcu); + + if (wl_new->count == 0) + netif_carrier_off(rt_old->rt_vpls_dev); + } + +update_new: + if (rt_new && rt_new->rt_vpls_dev) { + priv = netdev_priv(rt_new->rt_vpls_dev); + wl = rcu_dereference(priv->wires); + + wl_new = kmalloc(sizeof(*wl) + + (wl->count + 1) * sizeof(wl->wires[0]), + GFP_ATOMIC); + if (!wl_new) { + netdev_err(rt_new->rt_vpls_dev, + "out of memory for pseudowire add!\n"); + return; + } + wl_new->count = wl->count + 1; + memcpy(wl_new->wires, wl->wires, + wl->count * sizeof(wl->wires[0])); + wl_new->wires[wl->count] = label; + + rcu_assign_pointer(priv->wires, wl_new); + kfree_rcu(wl, rcu); + + if (wl_new->count == 1) + netif_carrier_on(rt_new->rt_vpls_dev); + } +} + +/* fake multicast ability */ +static void vpls_set_multicast_list(struct net_device *dev) +{ +} + +static int vpls_open(struct net_device *dev) +{ + struct vpls_priv *priv = netdev_priv(dev); + struct vpls_wirelist *wl; + + wl = rcu_dereference(priv->wires); + if (wl->count > 0) + netif_carrier_on(dev); + + return 0; +} + +static int vpls_close(struct net_device *dev) +{ + netif_carrier_off(dev); + return 0; +} + +static int is_valid_vpls_mtu(int new_mtu) +{ + return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU; +} + +static int vpls_change_mtu(struct net_device *dev, int new_mtu) +{ + if (!is_valid_vpls_mtu(new_mtu)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static int vpls_dev_init(struct net_device *dev) +{ + struct vpls_priv *priv = netdev_priv(dev); + priv->wires = kzalloc(sizeof(struct vpls_wirelist), GFP_KERNEL); + if (!priv->wires) + return -ENOMEM; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) { + kfree(priv->wires); + return -ENOMEM; + } + + return 0; +} + +static void vpls_dev_free(struct net_device *dev) +{ + struct vpls_priv *priv = netdev_priv(dev); + + free_percpu(dev->tstats); + + if (priv->wires) + kfree(priv->wires); + + if (priv->encap_net) + put_net(priv->encap_net); + + free_netdev(dev); +} + +static const struct net_device_ops vpls_netdev_ops = { + .ndo_init = vpls_dev_init, + .ndo_open = vpls_open, + .ndo_stop = vpls_close, + .ndo_start_xmit = vpls_xmit, + .ndo_change_mtu = vpls_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_set_rx_mode = vpls_set_multicast_list, + .ndo_set_mac_address = eth_mac_addr, + .ndo_features_check = passthru_features_check, +}; + +int is_vpls_dev(struct net_device *dev) +{ + return dev->netdev_ops == &vpls_netdev_ops; +} + +#define VPLS_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | \ + NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA) + +static void vpls_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + dev->priv_flags |= IFF_NO_QUEUE; + + dev->netdev_ops = &vpls_netdev_ops; + dev->features |= NETIF_F_LLTX; + dev->features |= VPLS_FEATURES; + dev->vlan_features = dev->features; + dev->priv_destructor = vpls_dev_free; + + dev->hw_features = VPLS_FEATURES; + dev->hw_enc_features = VPLS_FEATURES; + + netif_keep_dst(dev); +} + +/* + * netlink interface + */ + +static int vpls_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], + "Invalid Ethernet address length"); + return -EINVAL; + } + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], + "Invalid Ethernet address"); + return -EADDRNOTAVAIL; + } + } + if (tb[IFLA_MTU]) { + if (!is_valid_vpls_mtu(nla_get_u32(tb[IFLA_MTU]))) { + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU], + "Invalid MTU"); + return -EINVAL; + } + } + return 0; +} + +static struct rtnl_link_ops vpls_link_ops; + +static int vpls_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + int err; + struct vpls_priv *priv = netdev_priv(dev); + + if (tb[IFLA_ADDRESS] == NULL) + eth_hw_addr_random(dev); + + if (tb[IFLA_IFNAME]) + nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); + else + snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); + + err = register_netdevice(dev); + if (err < 0) + goto err; + priv->encap_net = get_net(src_net); + + netif_carrier_off(dev); + return 0; + +err: + return err; +} + +static void vpls_dellink(struct net_device *dev, struct list_head *head) +{ + unregister_netdevice_queue(dev, head); +} + + +static struct rtnl_link_ops vpls_link_ops = { + .kind = DRV_NAME, + .priv_size = sizeof(struct vpls_priv), + .setup = vpls_setup, + .validate = vpls_validate, + .newlink = vpls_newlink, + .dellink = vpls_dellink, +}; + +/* + * init/fini + */ + +__init int vpls_init(void) +{ + int ret; + + ret = rtnl_link_register(&vpls_link_ops); + if (ret) + goto out; + + return 0; + +out: + return ret; +} + +__exit void vpls_exit(void) +{ + rtnl_link_unregister(&vpls_link_ops); +} + +#if 0 +/* not currently available as a separate module... */ + +module_init(vpls_init); +module_exit(vpls_exit); + +MODULE_DESCRIPTION("Virtual Private LAN Service"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_RTNL_LINK(DRV_NAME); +#endif