From patchwork Thu Feb 28 13:25:59 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Toke_H=C3=B8iland-J=C3=B8rgensen?= X-Patchwork-Id: 1049472 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=redhat.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 449CxJ2nZzz9s6w for ; Fri, 1 Mar 2019 00:26:12 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732978AbfB1N0F (ORCPT ); Thu, 28 Feb 2019 08:26:05 -0500 Received: from mail-ed1-f65.google.com ([209.85.208.65]:36006 "EHLO mail-ed1-f65.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1732956AbfB1N0E (ORCPT ); Thu, 28 Feb 2019 08:26:04 -0500 Received: by mail-ed1-f65.google.com with SMTP id g9so16979829eds.3 for ; Thu, 28 Feb 2019 05:26:02 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:subject:from:to:cc:date:message-id:in-reply-to :references:user-agent:mime-version:content-transfer-encoding; bh=qj9QnBzbwINP6SaIsS3v9kAXXFe2n/e5QsbrjFR6y+U=; b=SBtAYUiLGGbQfIPRbKKjbFwNWh71iy1Y1T5v5KB27qTSqhUCpN9BIspF3/xNkqZ66V 0Q5JBnUHO4H4f68ctkztOLrXI+U2eRw+fY2UTrdAIHUhcp4wtcq+J6QfwUKP976l5/i2 0Tjf3su+ATkxo4FZaC5ujOc71XuxJDP9YuQQ0eazc9XeOsg6bsifd9N5NBAsWrgP8AtJ sl3/CkuiOzg65nvB6qVpwQ8tLylzcbb3IijAMGEu3rEj8gM2JLS0ql93wLbG+zJGPi/t PCxuTMdoSeY7T8LwZ3hCjc+jfcGWbVEtccITRhPT9sERcbINgg7jV+n1/4bxSoBJGzr1 1EDA== X-Gm-Message-State: AHQUAub2qFuyCpIvD+waWBKI3zlk+Qo3JlSb0CWAvyAMK6tcmBxYhuE2 phKorWbUwH4TqtXwihdUks7zSw== X-Google-Smtp-Source: AHgI3IbkLGbp2lV6Dgr9U/AjtImDuADQuzhu9brwBgFoV4l2lTnq/hbsfp+FLW1RIIBEGMxuujPnyg== X-Received: by 2002:a17:906:a30f:: with SMTP id j15mr5416690ejz.109.1551360361132; Thu, 28 Feb 2019 05:26:01 -0800 (PST) Received: from alrua-x1.borgediget.toke.dk (borgediget.toke.dk. [85.204.121.218]) by smtp.gmail.com with ESMTPSA id t9sm5153527edb.13.2019.02.28.05.25.59 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Thu, 28 Feb 2019 05:26:00 -0800 (PST) Received: by alrua-x1.borgediget.toke.dk (Postfix, from userid 1000) id A8331183BC2; Thu, 28 Feb 2019 14:25:59 +0100 (CET) Subject: [PATCH net-next v2 1/3] xdp: Refactor devmap code in preparation for subsequent additions From: Toke =?utf-8?q?H=C3=B8iland-J=C3=B8rgensen?= To: David Miller Cc: netdev@vger.kernel.org, Jesper Dangaard Brouer , Daniel Borkmann , Alexei Starovoitov , Jakub Kicinski Date: Thu, 28 Feb 2019 14:25:59 +0100 Message-ID: <155136035963.3381.14966629345493976311.stgit@alrua-x1> In-Reply-To: <155136028377.3381.2072266362746015640.stgit@alrua-x1> References: <155136028377.3381.2072266362746015640.stgit@alrua-x1> User-Agent: StGit/unknown-version MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org The subsequent commits introducing default maps and a hash-based ifindex devmap require a bit of refactoring of the devmap code. Perform this first so the subsequent commits become easier to read. Signed-off-by: Toke Høiland-Jørgensen --- kernel/bpf/devmap.c | 177 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 109 insertions(+), 68 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 191b79948424..1037fc08c504 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -75,6 +75,7 @@ struct bpf_dtab { struct bpf_dtab_netdev **netdev_map; unsigned long __percpu *flush_needed; struct list_head list; + struct rcu_head rcu; }; static DEFINE_SPINLOCK(dev_map_lock); @@ -85,23 +86,11 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr) return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long); } -static struct bpf_map *dev_map_alloc(union bpf_attr *attr) +static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr, + bool check_memlock) { - struct bpf_dtab *dtab; - int err = -EINVAL; u64 cost; - - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - - /* check sanity of attributes */ - if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) - return ERR_PTR(-EINVAL); - - dtab = kzalloc(sizeof(*dtab), GFP_USER); - if (!dtab) - return ERR_PTR(-ENOMEM); + int err; bpf_map_init_from_attr(&dtab->map, attr); @@ -109,60 +98,72 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); cost += dev_map_bitmap_size(attr) * num_possible_cpus(); if (cost >= U32_MAX - PAGE_SIZE) - goto free_dtab; + return -EINVAL; dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - /* if map size is larger than memlock limit, reject it early */ - err = bpf_map_precharge_memlock(dtab->map.pages); - if (err) - goto free_dtab; - - err = -ENOMEM; + if (check_memlock) { + /* if map size is larger than memlock limit, reject it early */ + err = bpf_map_precharge_memlock(dtab->map.pages); + if (err) + return -EINVAL; + } /* A per cpu bitfield with a bit per possible net device */ dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), __alignof__(unsigned long), GFP_KERNEL | __GFP_NOWARN); if (!dtab->flush_needed) - goto free_dtab; + goto err_alloc; dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) - goto free_dtab; + goto err_map; - spin_lock(&dev_map_lock); - list_add_tail_rcu(&dtab->list, &dev_map_list); - spin_unlock(&dev_map_lock); + return 0; - return &dtab->map; -free_dtab: + err_map: free_percpu(dtab->flush_needed); - kfree(dtab); - return ERR_PTR(err); + err_alloc: + return -ENOMEM; } -static void dev_map_free(struct bpf_map *map) +static struct bpf_map *dev_map_alloc(union bpf_attr *attr) { - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int i, cpu; + struct bpf_dtab *dtab; + int err; - /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, - * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding critical sections in - * these programs to complete. The rcu critical section only guarantees - * no further reads against netdev_map. It does __not__ ensure pending - * flush operations (if any) are complete. - */ + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) + return ERR_PTR(-EINVAL); + + dtab = kzalloc(sizeof(*dtab), GFP_USER); + if (!dtab) + return ERR_PTR(-ENOMEM); + + err = dev_map_init_map(dtab, attr, true); + if (err) { + kfree(dtab); + return ERR_PTR(err); + } spin_lock(&dev_map_lock); - list_del_rcu(&dtab->list); + list_add_tail_rcu(&dtab->list, &dev_map_list); spin_unlock(&dev_map_lock); - bpf_clear_redirect_map(map); - synchronize_rcu(); + return &dtab->map; +} + +static void __dev_map_free(struct rcu_head *rcu) +{ + struct bpf_dtab *dtab = container_of(rcu, struct bpf_dtab, rcu); + int i, cpu; /* To ensure all pending flush operations have completed wait for flush * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. @@ -192,6 +193,26 @@ static void dev_map_free(struct bpf_map *map) kfree(dtab); } +static void dev_map_free(struct bpf_map *map) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + + /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete. The rcu critical section only guarantees + * no further reads against netdev_map. It does __not__ ensure pending + * flush operations (if any) are complete. + */ + + spin_lock(&dev_map_lock); + list_del_rcu(&dtab->list); + spin_unlock(&dev_map_lock); + + bpf_clear_redirect_map(map); + call_rcu(&dtab->rcu, __dev_map_free); +} + static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); @@ -429,12 +450,42 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, + struct bpf_dtab *dtab, + u32 ifindex, + unsigned int bit) { - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - struct net *net = current->nsproxy->net_ns; gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct bpf_dtab_netdev *dev; + + dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), + sizeof(void *), gfp); + if (!dev->bulkq) { + kfree(dev); + return ERR_PTR(-ENOMEM); + } + + dev->dev = dev_get_by_index(net, ifindex); + if (!dev->dev) { + free_percpu(dev->bulkq); + kfree(dev); + return ERR_PTR(-EINVAL); + } + + dev->bit = bit; + dev->dtab = dtab; + + return dev; +} + +static int __dev_map_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dev, *old_dev; u32 i = *(u32 *)key; u32 ifindex = *(u32 *)value; @@ -449,26 +500,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, if (!ifindex) { dev = NULL; } else { - dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node); - if (!dev) - return -ENOMEM; - - dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), - sizeof(void *), gfp); - if (!dev->bulkq) { - kfree(dev); - return -ENOMEM; - } - - dev->dev = dev_get_by_index(net, ifindex); - if (!dev->dev) { - free_percpu(dev->bulkq); - kfree(dev); - return -EINVAL; - } - - dev->bit = i; - dev->dtab = dtab; + dev = __dev_map_alloc_node(net, dtab, ifindex, i); + if (IS_ERR(dev)) + return PTR_ERR(dev); } /* Use call_rcu() here to ensure rcu critical sections have completed @@ -482,6 +516,13 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, return 0; } +static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + return __dev_map_update_elem(current->nsproxy->net_ns, + map, key, value, map_flags); +} + const struct bpf_map_ops dev_map_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free,