From patchwork Tue Apr 24 14:39:16 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Toshiaki Makita X-Patchwork-Id: 903534 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: ozlabs.org; dkim=pass (2048-bit key; unprotected) header.d=gmail.com header.i=@gmail.com header.b="cGjz0ADV"; dkim-atps=neutral Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 40VmFQ180jz9ry1 for ; Wed, 25 Apr 2018 00:39:54 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754196AbeDXOju (ORCPT ); Tue, 24 Apr 2018 10:39:50 -0400 Received: from mail-pg0-f66.google.com ([74.125.83.66]:43469 "EHLO mail-pg0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753690AbeDXOjo (ORCPT ); Tue, 24 Apr 2018 10:39:44 -0400 Received: by mail-pg0-f66.google.com with SMTP id f132so11024302pgc.10 for ; Tue, 24 Apr 2018 07:39:44 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=94RuyL3125mHKsDk71P2MTaFHguqhHT8JxkiDaO3RGA=; b=cGjz0ADVBos9aGJmkLMZGawY3z48YAq+hCn9dLoDz0sRv1DyiO2UUu79uOyvmsB0Eu pMRXgmPsWR7XbNur2LUz1tb8bn8RPKbB1LYX4+hDGq5SxpzjOnW4a+huY+WvrS9t72fU nwzUDVPavR/5Q6ptGY+h8i8YtmhXZEh8drEw1fEQESeKnt3ExtbGoej91T86+wULQdft 5R7SKrcZUpSwiSx6xUfH4R3k3a4d6AAmwucc849jQP7LETjY2PM5LEGq3ecDRHdHqXbm kkJNgaLS/0XknP3T38BkoloMsXN6dmJeCQrxGOJLgYQjUJvHi9JAhAruJXQpjOzAJQhH hBXw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=94RuyL3125mHKsDk71P2MTaFHguqhHT8JxkiDaO3RGA=; b=GoIbTSnGKH7eE2FcUkza02YRDR+9l6j/A6paIMx/2cXLjIlhn7+gB5Iz2c1kE20162 wcT+7txcj8CmLCo+m5+bV9RtKDGrGo2xH4yMV/K+5nbOQJeO1ldrBgKQvGQ3ppUb3Sup u+StwAw6z1IzLUWHGfvAPR7A6Mz2ZMRGI9WSAQvBl8ttD5p8wh5P8FvgJ7l1DhiNerwl vQHSHzUfJHGN0hgDnxFCtG1BVjqnE1OrfBytEbtc8IKjpXM4SMgZuFC2R2XLkOz3YnM2 ZuZ74ROStLVWhKWkFXXABofl1ZKmb/CiooBe1ZD/JwtU3XK/iDliS302cPZk6Bul25CM vNQg== X-Gm-Message-State: ALQs6tAAZkjXLleFpAAcCG+Z/Rp4y3MAeGULQjpzGU3vkm4jCEc6cphG HdFU753+p8Uqh67bn8068svTzqczqtU= X-Google-Smtp-Source: AIpwx4/VrwKc06xrPsOoFrqgIXFKhGIBMmhQRZyBDUuT/EPi6xbtUyz3Lv4mqX5VXRAb2UrXbtGRqQ== X-Received: by 10.99.95.210 with SMTP id t201mr20967289pgb.315.1524580783886; Tue, 24 Apr 2018 07:39:43 -0700 (PDT) Received: from localhost.localdomain (i121-115-166-6.s42.a013.ap.plala.or.jp. [121.115.166.6]) by smtp.gmail.com with ESMTPSA id o64sm28179970pfb.62.2018.04.24.07.39.42 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 24 Apr 2018 07:39:43 -0700 (PDT) From: Toshiaki Makita To: netdev@vger.kernel.org Cc: Toshiaki Makita Subject: [PATCH RFC 2/9] veth: Add driver XDP Date: Tue, 24 Apr 2018 23:39:16 +0900 Message-Id: <20180424143923.26519-3-toshiaki.makita1@gmail.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20180424143923.26519-1-toshiaki.makita1@gmail.com> References: <20180424143923.26519-1-toshiaki.makita1@gmail.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Toshiaki Makita This is basic implementation of veth driver XDP. Incoming packets are sent from the peer veth device in the form of skb, so this is generally doing the same thing as generic XDP. This itself is not so useful, but a starting point to implement other useful veth XDP features like TX and REDIRECT. Signed-off-by: Toshiaki Makita --- drivers/net/veth.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 205 insertions(+), 5 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index a69ad39ee57e..9c4197306716 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -19,10 +19,15 @@ #include #include #include +#include +#include +#include #define DRV_NAME "veth" #define DRV_VERSION "1.0" +#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) + struct pcpu_vstats { u64 packets; u64 bytes; @@ -30,9 +35,11 @@ struct pcpu_vstats { }; struct veth_priv { + struct bpf_prog __rcu *xdp_prog; struct net_device __rcu *peer; atomic64_t dropped; unsigned requested_headroom; + struct xdp_rxq_info xdp_rxq; }; /* @@ -98,6 +105,25 @@ static const struct ethtool_ops veth_ethtool_ops = { .get_link_ksettings = veth_get_link_ksettings, }; +/* general routines */ + +static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, + struct sk_buff *skb); + +static int veth_xdp_rx(struct net_device *dev, struct sk_buff *skb) +{ + skb = veth_xdp_rcv_skb(dev, skb); + if (!skb) + return NET_RX_DROP; + + return netif_rx(skb); +} + +static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + return __dev_forward_skb(dev, skb) ?: veth_xdp_rx(dev, skb); +} + static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -111,7 +137,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; } - if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { + if (likely(veth_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); u64_stats_update_begin(&stats->syncp); @@ -126,10 +152,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -/* - * general routines - */ - static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -179,19 +201,152 @@ static void veth_set_multicast_list(struct net_device *dev) { } +static struct sk_buff *veth_build_skb(void *head, int headroom, int len, + int buflen) +{ + struct sk_buff *skb; + + if (!buflen) { + buflen = SKB_DATA_ALIGN(headroom + len) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + skb = build_skb(head, buflen); + if (!skb) + return NULL; + + skb_reserve(skb, headroom); + skb_put(skb, len); + + return skb; +} + +static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev, + struct sk_buff *skb) +{ + struct veth_priv *priv = netdev_priv(dev); + u32 pktlen, headroom, act, metalen; + int size, mac_len, delta, off; + struct bpf_prog *xdp_prog; + struct xdp_buff xdp; + void *orig_data; + + rcu_read_lock(); + xdp_prog = rcu_dereference(priv->xdp_prog); + if (!xdp_prog) { + rcu_read_unlock(); + goto out; + } + + mac_len = skb->data - skb_mac_header(skb); + pktlen = skb->len + mac_len; + size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + if (size > PAGE_SIZE) + goto drop; + + headroom = skb_headroom(skb) - mac_len; + if (skb_shared(skb) || skb_head_is_locked(skb) || + skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) { + struct sk_buff *nskb; + void *head, *start; + struct page *page; + int head_off; + + page = alloc_page(GFP_ATOMIC); + if (!page) + goto drop; + + head = page_address(page); + start = head + VETH_XDP_HEADROOM; + if (skb_copy_bits(skb, -mac_len, start, pktlen)) { + page_frag_free(head); + goto drop; + } + + nskb = veth_build_skb(head, + VETH_XDP_HEADROOM + mac_len, skb->len, + PAGE_SIZE); + if (!nskb) { + page_frag_free(head); + goto drop; + } + + skb_copy_header(nskb, skb); + head_off = skb_headroom(nskb) - skb_headroom(skb); + skb_headers_offset_update(nskb, head_off); + dev_consume_skb_any(skb); + skb = nskb; + } + + xdp.data_hard_start = skb->head; + xdp.data = skb_mac_header(skb); + xdp.data_end = xdp.data + pktlen; + xdp.data_meta = xdp.data; + xdp.rxq = &priv->xdp_rxq; + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + switch (act) { + case XDP_PASS: + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(dev, xdp_prog, act); + case XDP_DROP: + goto drop; + } + rcu_read_unlock(); + + delta = orig_data - xdp.data; + off = mac_len + delta; + if (off > 0) + __skb_push(skb, off); + else if (off < 0) + __skb_pull(skb, -off); + skb->mac_header -= delta; + skb->protocol = eth_type_trans(skb, dev); + + metalen = xdp.data - xdp.data_meta; + if (metalen) + skb_metadata_set(skb, metalen); +out: + return skb; +drop: + rcu_read_unlock(); + dev_kfree_skb_any(skb); + return NULL; +} + static int veth_open(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); + int err; if (!peer) return -ENOTCONN; + err = xdp_rxq_info_reg(&priv->xdp_rxq, dev, 0); + if (err < 0) + return err; + + err = xdp_rxq_info_reg_mem_model(&priv->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err < 0) + goto err_reg_mem; + if (peer->flags & IFF_UP) { netif_carrier_on(dev); netif_carrier_on(peer); } + return 0; +err_reg_mem: + xdp_rxq_info_unreg(&priv->xdp_rxq); + + return err; } static int veth_close(struct net_device *dev) @@ -203,6 +358,8 @@ static int veth_close(struct net_device *dev) if (peer) netif_carrier_off(peer); + xdp_rxq_info_unreg(&priv->xdp_rxq); + return 0; } @@ -276,6 +433,48 @@ static void veth_set_rx_headroom(struct net_device *dev, int new_hr) rcu_read_unlock(); } +static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct veth_priv *priv = netdev_priv(dev); + struct bpf_prog *old_prog; + + old_prog = rtnl_dereference(priv->xdp_prog); + + rcu_assign_pointer(priv->xdp_prog, prog); + + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static u32 veth_xdp_query(struct net_device *dev) +{ + struct veth_priv *priv = netdev_priv(dev); + const struct bpf_prog *xdp_prog; + + xdp_prog = rtnl_dereference(priv->xdp_prog); + if (xdp_prog) + return xdp_prog->aux->id; + + return 0; +} + +static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return veth_xdp_set(dev, xdp->prog, xdp->extack); + case XDP_QUERY_PROG: + xdp->prog_id = veth_xdp_query(dev); + xdp->prog_attached = !!xdp->prog_id; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops veth_netdev_ops = { .ndo_init = veth_dev_init, .ndo_open = veth_open, @@ -290,6 +489,7 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_get_iflink = veth_get_iflink, .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = veth_set_rx_headroom, + .ndo_bpf = veth_xdp, }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \