@@ -19,10 +19,15 @@
#include <net/xfrm.h>
#include <linux/veth.h>
#include <linux/module.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/bpf_trace.h>
#define DRV_NAME "veth"
#define DRV_VERSION "1.0"
+#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
+
struct pcpu_vstats {
u64 packets;
u64 bytes;
@@ -30,9 +35,11 @@ struct pcpu_vstats {
};
struct veth_priv {
+ struct bpf_prog __rcu *xdp_prog;
struct net_device __rcu *peer;
atomic64_t dropped;
unsigned requested_headroom;
+ struct xdp_rxq_info xdp_rxq;
};
/*
@@ -98,6 +105,25 @@ static const struct ethtool_ops veth_ethtool_ops = {
.get_link_ksettings = veth_get_link_ksettings,
};
+/* general routines */
+
+static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev,
+ struct sk_buff *skb);
+
+static int veth_xdp_rx(struct net_device *dev, struct sk_buff *skb)
+{
+ skb = veth_xdp_rcv_skb(dev, skb);
+ if (!skb)
+ return NET_RX_DROP;
+
+ return netif_rx(skb);
+}
+
+static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ return __dev_forward_skb(dev, skb) ?: veth_xdp_rx(dev, skb);
+}
+
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
@@ -111,7 +137,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
goto drop;
}
- if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
+ if (likely(veth_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
u64_stats_update_begin(&stats->syncp);
@@ -126,10 +152,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-/*
- * general routines
- */
-
static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
@@ -179,19 +201,152 @@ static void veth_set_multicast_list(struct net_device *dev)
{
}
+static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
+ int buflen)
+{
+ struct sk_buff *skb;
+
+ if (!buflen) {
+ buflen = SKB_DATA_ALIGN(headroom + len) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ }
+ skb = build_skb(head, buflen);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, headroom);
+ skb_put(skb, len);
+
+ return skb;
+}
+
+static struct sk_buff *veth_xdp_rcv_skb(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ u32 pktlen, headroom, act, metalen;
+ int size, mac_len, delta, off;
+ struct bpf_prog *xdp_prog;
+ struct xdp_buff xdp;
+ void *orig_data;
+
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(priv->xdp_prog);
+ if (!xdp_prog) {
+ rcu_read_unlock();
+ goto out;
+ }
+
+ mac_len = skb->data - skb_mac_header(skb);
+ pktlen = skb->len + mac_len;
+ size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ if (size > PAGE_SIZE)
+ goto drop;
+
+ headroom = skb_headroom(skb) - mac_len;
+ if (skb_shared(skb) || skb_head_is_locked(skb) ||
+ skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
+ struct sk_buff *nskb;
+ void *head, *start;
+ struct page *page;
+ int head_off;
+
+ page = alloc_page(GFP_ATOMIC);
+ if (!page)
+ goto drop;
+
+ head = page_address(page);
+ start = head + VETH_XDP_HEADROOM;
+ if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
+ page_frag_free(head);
+ goto drop;
+ }
+
+ nskb = veth_build_skb(head,
+ VETH_XDP_HEADROOM + mac_len, skb->len,
+ PAGE_SIZE);
+ if (!nskb) {
+ page_frag_free(head);
+ goto drop;
+ }
+
+ skb_copy_header(nskb, skb);
+ head_off = skb_headroom(nskb) - skb_headroom(skb);
+ skb_headers_offset_update(nskb, head_off);
+ dev_consume_skb_any(skb);
+ skb = nskb;
+ }
+
+ xdp.data_hard_start = skb->head;
+ xdp.data = skb_mac_header(skb);
+ xdp.data_end = xdp.data + pktlen;
+ xdp.data_meta = xdp.data;
+ xdp.rxq = &priv->xdp_rxq;
+ orig_data = xdp.data;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ switch (act) {
+ case XDP_PASS:
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ case XDP_ABORTED:
+ trace_xdp_exception(dev, xdp_prog, act);
+ case XDP_DROP:
+ goto drop;
+ }
+ rcu_read_unlock();
+
+ delta = orig_data - xdp.data;
+ off = mac_len + delta;
+ if (off > 0)
+ __skb_push(skb, off);
+ else if (off < 0)
+ __skb_pull(skb, -off);
+ skb->mac_header -= delta;
+ skb->protocol = eth_type_trans(skb, dev);
+
+ metalen = xdp.data - xdp.data_meta;
+ if (metalen)
+ skb_metadata_set(skb, metalen);
+out:
+ return skb;
+drop:
+ rcu_read_unlock();
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
static int veth_open(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
struct net_device *peer = rtnl_dereference(priv->peer);
+ int err;
if (!peer)
return -ENOTCONN;
+ err = xdp_rxq_info_reg(&priv->xdp_rxq, dev, 0);
+ if (err < 0)
+ return err;
+
+ err = xdp_rxq_info_reg_mem_model(&priv->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err < 0)
+ goto err_reg_mem;
+
if (peer->flags & IFF_UP) {
netif_carrier_on(dev);
netif_carrier_on(peer);
}
+
return 0;
+err_reg_mem:
+ xdp_rxq_info_unreg(&priv->xdp_rxq);
+
+ return err;
}
static int veth_close(struct net_device *dev)
@@ -203,6 +358,8 @@ static int veth_close(struct net_device *dev)
if (peer)
netif_carrier_off(peer);
+ xdp_rxq_info_unreg(&priv->xdp_rxq);
+
return 0;
}
@@ -276,6 +433,48 @@ static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
rcu_read_unlock();
}
+static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct bpf_prog *old_prog;
+
+ old_prog = rtnl_dereference(priv->xdp_prog);
+
+ rcu_assign_pointer(priv->xdp_prog, prog);
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+static u32 veth_xdp_query(struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ const struct bpf_prog *xdp_prog;
+
+ xdp_prog = rtnl_dereference(priv->xdp_prog);
+ if (xdp_prog)
+ return xdp_prog->aux->id;
+
+ return 0;
+}
+
+static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return veth_xdp_set(dev, xdp->prog, xdp->extack);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = veth_xdp_query(dev);
+ xdp->prog_attached = !!xdp->prog_id;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops veth_netdev_ops = {
.ndo_init = veth_dev_init,
.ndo_open = veth_open,
@@ -290,6 +489,7 @@ static const struct net_device_ops veth_netdev_ops = {
.ndo_get_iflink = veth_get_iflink,
.ndo_features_check = passthru_features_check,
.ndo_set_rx_headroom = veth_set_rx_headroom,
+ .ndo_bpf = veth_xdp,
};
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \