From patchwork Thu Dec 12 23:30:48 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Andrey Ignatov <rdna@fb.com>
X-Patchwork-Id: 1208895
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <bpf-owner@vger.kernel.org>
X-Original-To: incoming-bpf@patchwork.ozlabs.org
Delivered-To: patchwork-incoming-bpf@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=none (no SPF record)
	smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67;
	helo=vger.kernel.org; envelope-from=bpf-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=pass (p=none dis=none) header.from=fb.com
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=fb.com header.i=@fb.com header.b="bMdPAXgq";
	dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 47Yqns2drQz9sPT
	for <incoming-bpf@patchwork.ozlabs.org>;
	Fri, 13 Dec 2019 10:32:01 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1731107AbfLLXcA (ORCPT
	<rfc822;incoming-bpf@patchwork.ozlabs.org>);
	Thu, 12 Dec 2019 18:32:00 -0500
Received: from mx0a-00082601.pphosted.com ([67.231.145.42]:2118 "EHLO
	mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1730934AbfLLXcA (ORCPT
	<rfc822;bpf@vger.kernel.org>); Thu, 12 Dec 2019 18:32:00 -0500
Received: from pps.filterd (m0109333.ppops.net [127.0.0.1])
	by mx0a-00082601.pphosted.com (8.16.0.42/8.16.0.42) with SMTP id
	xBCNVjSg027907
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:59 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com;
	h=from : to : cc : subject
	: date : message-id : in-reply-to : references : mime-version :
	content-type; s=facebook;
	bh=A3sa158hA6zbREWxvxveyq8Gd95yJYYiwnsYK6hgXOc=;
	b=bMdPAXgqqXtfCDUQVAaOPUQ0yDYjhR8Muln85TEXZJ3GDbfnxHmwrJW5b5wkwON5H5wO
	W3SEcG7Sjj4+HcRxQYiNwpgvRku9kulldy/Sivqf1zvWF9HBXVFHDfKvn12xycto+wWt
	V+aanFxtVOR0G4VJcTC8qIMhs6Dk1GmVZ3w=
Received: from maileast.thefacebook.com ([163.114.130.16])
	by mx0a-00082601.pphosted.com with ESMTP id 2wu87qp3n6-14
	(version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128
	verify=NOT)
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:59 -0800
Received: from intmgw002.41.prn1.facebook.com (2620:10d:c0a8:1b::d) by
	mail.thefacebook.com (2620:10d:c0a8:82::c) with Microsoft SMTP Server
	(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
	15.1.1713.5; Thu, 12 Dec 2019 15:31:10 -0800
Received: by dev082.prn2.facebook.com (Postfix, from userid 572249)
	id EE8BE3712A1F; Thu, 12 Dec 2019 15:31:07 -0800 (PST)
Smtp-Origin-Hostprefix: dev
From: Andrey Ignatov <rdna@fb.com>
Smtp-Origin-Hostname: dev082.prn2.facebook.com
To: <bpf@vger.kernel.org>
CC: Andrey Ignatov <rdna@fb.com>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <kafai@fb.com>, <andriin@fb.com>,
	<kernel-team@fb.com>
Smtp-Origin-Cluster: prn2c23
Subject: [PATCH v2 bpf-next 1/6] bpf: Simplify __cgroup_bpf_attach
Date: Thu, 12 Dec 2019 15:30:48 -0800
Message-ID: 
 <25b5a158a89c561088e7106bbc1319220be80ead.1576193131.git.rdna@fb.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <cover.1576193131.git.rdna@fb.com>
References: <cover.1576193131.git.rdna@fb.com>
X-FB-Internal: Safe
MIME-Version: 1.0
X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.95, 18.0.572
	definitions=2019-12-12_08:2019-12-12,
	2019-12-12 signatures=0
X-Proofpoint-Spam-Details: rule=fb_default_notspam policy=fb_default score=0
	bulkscore=0
	priorityscore=1501 clxscore=1015 impostorscore=0 malwarescore=0
	spamscore=0 mlxscore=0 suspectscore=38 phishscore=0
	lowpriorityscore=0
	mlxlogscore=830 adultscore=0 classifier=spam adjust=0 reason=mlx
	scancount=1 engine=8.12.0-1910280000 definitions=main-1912120181
X-FB-Internal: deliver
Sender: bpf-owner@vger.kernel.org
Precedence: bulk
List-ID: <bpf.vger.kernel.org>
X-Mailing-List: bpf@vger.kernel.org

__cgroup_bpf_attach has a lot of identical code to handle two scenarios:
BPF_F_ALLOW_MULTI is set and unset.

Simplify it by splitting the two main steps:

* First, the decision is made whether a new bpf_prog_list entry should
  be allocated or existing entry should be reused for the new program.
  This decision is saved in replace_pl pointer;

* Next, replace_pl pointer is used to handle both possible states of
  BPF_F_ALLOW_MULTI flag (set / unset) instead of doing similar work for
  them separately.

This splitting, in turn, allows to make further simplifications:

* The check for attaching same program twice in BPF_F_ALLOW_MULTI mode
  can be done before allocating cgroup storage, so that if user tries to
  attach same program twice no alloc/free happens as it was before;

* pl_was_allocated becomes redundant so it's removed.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
---
 kernel/bpf/cgroup.c | 62 +++++++++++++++++----------------------------
 1 file changed, 23 insertions(+), 39 deletions(-)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9f90d3c92bda..e8cbdd1be687 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -295,9 +295,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	struct bpf_prog *old_prog = NULL;
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
 		*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
+	struct bpf_prog_list *pl, *replace_pl = NULL;
 	enum bpf_cgroup_storage_type stype;
-	struct bpf_prog_list *pl;
-	bool pl_was_allocated;
 	int err;
 
 	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
@@ -317,6 +316,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
 		return -E2BIG;
 
+	if (flags & BPF_F_ALLOW_MULTI) {
+		list_for_each_entry(pl, progs, node) {
+			if (pl->prog == prog)
+				/* disallow attaching the same prog twice */
+				return -EINVAL;
+		}
+	} else if (!list_empty(progs)) {
+		replace_pl = list_first_entry(progs, typeof(*pl), node);
+	}
+
 	for_each_cgroup_storage_type(stype) {
 		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
 		if (IS_ERR(storage[stype])) {
@@ -327,52 +336,27 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 		}
 	}
 
-	if (flags & BPF_F_ALLOW_MULTI) {
-		list_for_each_entry(pl, progs, node) {
-			if (pl->prog == prog) {
-				/* disallow attaching the same prog twice */
-				for_each_cgroup_storage_type(stype)
-					bpf_cgroup_storage_free(storage[stype]);
-				return -EINVAL;
-			}
+	if (replace_pl) {
+		pl = replace_pl;
+		old_prog = pl->prog;
+		for_each_cgroup_storage_type(stype) {
+			old_storage[stype] = pl->storage[stype];
+			bpf_cgroup_storage_unlink(old_storage[stype]);
 		}
-
+	} else {
 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
 		if (!pl) {
 			for_each_cgroup_storage_type(stype)
 				bpf_cgroup_storage_free(storage[stype]);
 			return -ENOMEM;
 		}
-
-		pl_was_allocated = true;
-		pl->prog = prog;
-		for_each_cgroup_storage_type(stype)
-			pl->storage[stype] = storage[stype];
 		list_add_tail(&pl->node, progs);
-	} else {
-		if (list_empty(progs)) {
-			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-			if (!pl) {
-				for_each_cgroup_storage_type(stype)
-					bpf_cgroup_storage_free(storage[stype]);
-				return -ENOMEM;
-			}
-			pl_was_allocated = true;
-			list_add_tail(&pl->node, progs);
-		} else {
-			pl = list_first_entry(progs, typeof(*pl), node);
-			old_prog = pl->prog;
-			for_each_cgroup_storage_type(stype) {
-				old_storage[stype] = pl->storage[stype];
-				bpf_cgroup_storage_unlink(old_storage[stype]);
-			}
-			pl_was_allocated = false;
-		}
-		pl->prog = prog;
-		for_each_cgroup_storage_type(stype)
-			pl->storage[stype] = storage[stype];
 	}
 
+	pl->prog = prog;
+	for_each_cgroup_storage_type(stype)
+		pl->storage[stype] = storage[stype];
+
 	cgrp->bpf.flags[type] = flags;
 
 	err = update_effective_progs(cgrp, type);
@@ -401,7 +385,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 		pl->storage[stype] = old_storage[stype];
 		bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
 	}
-	if (pl_was_allocated) {
+	if (!replace_pl) {
 		list_del(&pl->node);
 		kfree(pl);
 	}

From patchwork Thu Dec 12 23:30:49 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Andrey Ignatov <rdna@fb.com>
X-Patchwork-Id: 1208890
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <bpf-owner@vger.kernel.org>
X-Original-To: incoming-bpf@patchwork.ozlabs.org
Delivered-To: patchwork-incoming-bpf@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=none (no SPF record)
	smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67;
	helo=vger.kernel.org; envelope-from=bpf-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=pass (p=none dis=none) header.from=fb.com
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=fb.com header.i=@fb.com header.b="PXXS8JDI";
	dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 47Yqn24G0Nz9sPT
	for <incoming-bpf@patchwork.ozlabs.org>;
	Fri, 13 Dec 2019 10:31:18 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1730868AbfLLXbR (ORCPT
	<rfc822;incoming-bpf@patchwork.ozlabs.org>);
	Thu, 12 Dec 2019 18:31:17 -0500
Received: from mx0a-00082601.pphosted.com ([67.231.145.42]:6928 "EHLO
	mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1726427AbfLLXbR (ORCPT
	<rfc822;bpf@vger.kernel.org>); Thu, 12 Dec 2019 18:31:17 -0500
Received: from pps.filterd (m0044010.ppops.net [127.0.0.1])
	by mx0a-00082601.pphosted.com (8.16.0.42/8.16.0.42) with SMTP id
	xBCNVDOY000909
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:16 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com;
	h=from : to : cc : subject
	: date : message-id : in-reply-to : references : mime-version :
	content-type; s=facebook;
	bh=2j1bjPUzrEYgDVZgAt65XwSniFueFlCVFDpnkzSnY0A=;
	b=PXXS8JDIVlsrN3IsD6JVx9zDgFnZnTaAF/7NlGaUcqedX2QZ3Hmqb1csk/aBTRRSQdX6
	nxkw9h77mRfo5kWWknjPptqr9c9w49+tHpFcwctT1iqIyqYRBqlb6CO07rFLU81D+48k
	YIDMvSRD/APVzh1iXcaJkj2iod9ek1QFy/U=
Received: from maileast.thefacebook.com ([163.114.130.16])
	by mx0a-00082601.pphosted.com with ESMTP id 2wusrmstyv-3
	(version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128
	verify=NOT)
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:16 -0800
Received: from intmgw002.41.prn1.facebook.com (2620:10d:c0a8:1b::d) by
	mail.thefacebook.com (2620:10d:c0a8:82::f) with Microsoft SMTP Server
	(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
	15.1.1713.5; Thu, 12 Dec 2019 15:31:12 -0800
Received: by dev082.prn2.facebook.com (Postfix, from userid 572249)
	id 038773712A1F; Thu, 12 Dec 2019 15:31:10 -0800 (PST)
Smtp-Origin-Hostprefix: dev
From: Andrey Ignatov <rdna@fb.com>
Smtp-Origin-Hostname: dev082.prn2.facebook.com
To: <bpf@vger.kernel.org>
CC: Andrey Ignatov <rdna@fb.com>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <kafai@fb.com>, <andriin@fb.com>,
	<kernel-team@fb.com>
Smtp-Origin-Cluster: prn2c23
Subject: [PATCH v2 bpf-next 2/6] bpf: Remove unused new_flags in
	hierarchy_allows_attach()
Date: Thu, 12 Dec 2019 15:30:49 -0800
Message-ID: 
 <678caf2c5f76fb9988d32c26a6587769677a648c.1576193131.git.rdna@fb.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <cover.1576193131.git.rdna@fb.com>
References: <cover.1576193131.git.rdna@fb.com>
X-FB-Internal: Safe
MIME-Version: 1.0
X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.95, 18.0.572
	definitions=2019-12-12_08:2019-12-12,
	2019-12-12 signatures=0
X-Proofpoint-Spam-Details: rule=fb_default_notspam policy=fb_default score=0
	malwarescore=0 mlxscore=0
	lowpriorityscore=0 bulkscore=0 phishscore=0 spamscore=0
	priorityscore=1501
	suspectscore=13 impostorscore=0 adultscore=0 mlxlogscore=471
	clxscore=1015
	classifier=spam adjust=0 reason=mlx scancount=1
	engine=8.12.0-1910280000
	definitions=main-1912120181
X-FB-Internal: deliver
Sender: bpf-owner@vger.kernel.org
Precedence: bulk
List-ID: <bpf.vger.kernel.org>
X-Mailing-List: bpf@vger.kernel.org

new_flags is unused, remove it.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
---
 kernel/bpf/cgroup.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index e8cbdd1be687..283efe3ce052 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -103,8 +103,7 @@ static u32 prog_list_length(struct list_head *head)
  * if parent has overridable or multi-prog, allow attaching
  */
 static bool hierarchy_allows_attach(struct cgroup *cgrp,
-				    enum bpf_attach_type type,
-				    u32 new_flags)
+				    enum bpf_attach_type type)
 {
 	struct cgroup *p;
 
@@ -303,7 +302,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 		/* invalid combination */
 		return -EINVAL;
 
-	if (!hierarchy_allows_attach(cgrp, type, flags))
+	if (!hierarchy_allows_attach(cgrp, type))
 		return -EPERM;
 
 	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)

From patchwork Thu Dec 12 23:30:50 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Andrey Ignatov <rdna@fb.com>
X-Patchwork-Id: 1208893
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <bpf-owner@vger.kernel.org>
X-Original-To: incoming-bpf@patchwork.ozlabs.org
Delivered-To: patchwork-incoming-bpf@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=none (no SPF record)
	smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67;
	helo=vger.kernel.org; envelope-from=bpf-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=pass (p=none dis=none) header.from=fb.com
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=fb.com header.i=@fb.com header.b="LJLEpXV8";
	dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 47Yqn7696Gz9sPT
	for <incoming-bpf@patchwork.ozlabs.org>;
	Fri, 13 Dec 2019 10:31:23 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1731165AbfLLXbX (ORCPT
	<rfc822;incoming-bpf@patchwork.ozlabs.org>);
	Thu, 12 Dec 2019 18:31:23 -0500
Received: from mx0a-00082601.pphosted.com ([67.231.145.42]:47344 "EHLO
	mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1726427AbfLLXbX (ORCPT
	<rfc822;bpf@vger.kernel.org>); Thu, 12 Dec 2019 18:31:23 -0500
Received: from pps.filterd (m0044010.ppops.net [127.0.0.1])
	by mx0a-00082601.pphosted.com (8.16.0.42/8.16.0.42) with SMTP id
	xBCNVD1X000900
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:22 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com;
	h=from : to : cc : subject
	: date : message-id : in-reply-to : references : mime-version :
	content-type; s=facebook;
	bh=vYkw2CaehPercPO0LUyVpfbZ0pTiw+awCqqktRJDb08=;
	b=LJLEpXV8uXy4Vf8XL3+9Aoim2W/rlej4wCSecq9Ku7AnR2lZg7QvNJwL0KePJXUx2C4X
	IMog9Djdd5sSjmQtAQJIUK/nRv5+mOaPAdpbabH6wH/CmNihPE1nZ9W672V849y5EMrT
	5PPxNVDGH7nCbIdcP96d9ivuFr7xM18IJKw=
Received: from maileast.thefacebook.com ([163.114.130.16])
	by mx0a-00082601.pphosted.com with ESMTP id 2wusrmstxs-12
	(version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128
	verify=NOT)
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:21 -0800
Received: from intmgw002.41.prn1.facebook.com (2620:10d:c0a8:1b::d) by
	mail.thefacebook.com (2620:10d:c0a8:82::e) with Microsoft SMTP Server
	(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
	15.1.1713.5; Thu, 12 Dec 2019 15:31:16 -0800
Received: by dev082.prn2.facebook.com (Postfix, from userid 572249)
	id 167593712A1F; Thu, 12 Dec 2019 15:31:14 -0800 (PST)
Smtp-Origin-Hostprefix: dev
From: Andrey Ignatov <rdna@fb.com>
Smtp-Origin-Hostname: dev082.prn2.facebook.com
To: <bpf@vger.kernel.org>
CC: Andrey Ignatov <rdna@fb.com>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <kafai@fb.com>, <andriin@fb.com>,
	<kernel-team@fb.com>
Smtp-Origin-Cluster: prn2c23
Subject: [PATCH v2 bpf-next 3/6] bpf: Support replacing cgroup-bpf program
	in MULTI mode
Date: Thu, 12 Dec 2019 15:30:50 -0800
Message-ID: 
 <eecde103d36610e8f5b1b615dda19eda6897b13f.1576193131.git.rdna@fb.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <cover.1576193131.git.rdna@fb.com>
References: <cover.1576193131.git.rdna@fb.com>
X-FB-Internal: Safe
MIME-Version: 1.0
X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.95, 18.0.572
	definitions=2019-12-12_08:2019-12-12,
	2019-12-12 signatures=0
X-Proofpoint-Spam-Details: rule=fb_default_notspam policy=fb_default score=0
	malwarescore=0 mlxscore=0
	lowpriorityscore=0 bulkscore=0 phishscore=0 spamscore=0
	priorityscore=1501
	suspectscore=38 impostorscore=0 adultscore=0 mlxlogscore=999
	clxscore=1015
	classifier=spam adjust=0 reason=mlx scancount=1
	engine=8.12.0-1910280000
	definitions=main-1912120181
X-FB-Internal: deliver
Sender: bpf-owner@vger.kernel.org
Precedence: bulk
List-ID: <bpf.vger.kernel.org>
X-Mailing-List: bpf@vger.kernel.org

The common use-case in production is to have multiple cgroup-bpf
programs per attach type that cover multiple use-cases. Such programs
are attached with BPF_F_ALLOW_MULTI and can be maintained by different
people.

Order of programs usually matters, for example imagine two egress
programs: the first one drops packets and the second one counts packets.
If they're swapped the result of counting program will be different.

It brings operational challenges with updating cgroup-bpf program(s)
attached with BPF_F_ALLOW_MULTI since there is no way to replace a
program:

* One way to update is to detach all programs first and then attach the
  new version(s) again in the right order. This introduces an
  interruption in the work a program is doing and may not be acceptable
  (e.g. if it's egress firewall);

* Another way is attach the new version of a program first and only then
  detach the old version. This introduces the time interval when two
  versions of same program are working, what may not be acceptable if a
  program is not idempotent. It also imposes additional burden on
  program developers to make sure that two versions of their program can
  co-exist.

Solve the problem by introducing a "replace" mode in BPF_PROG_ATTACH
command for cgroup-bpf programs being attached with BPF_F_ALLOW_MULTI
flag. This mode is enabled by newly introduced BPF_F_REPLACE attach flag
and bpf_attr.replace_bpf_fd attribute to pass fd of the old program to
replace

That way user can replace any program among those attached with
BPF_F_ALLOW_MULTI flag without the problems described above.

Details of the new API:

* If BPF_F_REPLACE is set but replace_bpf_fd doesn't have valid
  descriptor of BPF program, BPF_PROG_ATTACH will return corresponding
  error (EINVAL or EBADF).

* If replace_bpf_fd has valid descriptor of BPF program but such a
  program is not attached to specified cgroup, BPF_PROG_ATTACH will
  return ENOENT.

BPF_F_REPLACE is introduced to make the user intent clear, since
replace_bpf_fd alone can't be used for this (its default value, 0, is a
valid fd). BPF_F_REPLACE also makes it possible to extend the API in the
future (e.g. add BPF_F_BEFORE and BPF_F_AFTER if needed).

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
---
 include/linux/bpf-cgroup.h     |  4 +++-
 include/uapi/linux/bpf.h       | 10 ++++++++++
 kernel/bpf/cgroup.c            | 30 ++++++++++++++++++++++++++----
 kernel/bpf/syscall.c           |  4 ++--
 kernel/cgroup/cgroup.c         |  5 +++--
 tools/include/uapi/linux/bpf.h | 10 ++++++++++
 6 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 169fd25f6bc2..18f6a6da7c3c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp);
 void cgroup_bpf_offline(struct cgroup *cgrp);
 
 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+			struct bpf_prog *replace_prog,
 			enum bpf_attach_type type, u32 flags);
 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 			enum bpf_attach_type type);
@@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 
 /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
 int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
-		      enum bpf_attach_type type, u32 flags);
+		      struct bpf_prog *replace_prog, enum bpf_attach_type type,
+		      u32 flags);
 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 		      enum bpf_attach_type type, u32 flags);
 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dbbcf0b02970..7df436da542d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -231,6 +231,11 @@ enum bpf_attach_type {
  * When children program makes decision (like picking TCP CA or sock bind)
  * parent program has a chance to override it.
  *
+ * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
+ * programs for a cgroup. Though it's possible to replace an old program at
+ * any position by also specifying BPF_F_REPLACE flag and position itself in
+ * replace_bpf_fd attribute. Old program at this position will be released.
+ *
  * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
  * A cgroup with NONE doesn't allow any programs in sub-cgroups.
  * Ex1:
@@ -249,6 +254,7 @@ enum bpf_attach_type {
  */
 #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
 #define BPF_F_ALLOW_MULTI	(1U << 1)
+#define BPF_F_REPLACE		(1U << 2)
 
 /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
  * verifier will perform strict alignment checking as if the kernel
@@ -442,6 +448,10 @@ union bpf_attr {
 		__u32		attach_bpf_fd;	/* eBPF program to attach */
 		__u32		attach_type;
 		__u32		attach_flags;
+		__u32		replace_bpf_fd;	/* previously attached eBPF
+						 * program to replace if
+						 * BPF_F_REPLACE is used
+						 */
 	};
 
 	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 283efe3ce052..45346c79613a 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -282,14 +282,17 @@ static int update_effective_progs(struct cgroup *cgrp,
  *                         propagate the change to descendants
  * @cgrp: The cgroup which descendants to traverse
  * @prog: A program to attach
+ * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
  * @type: Type of attach operation
  * @flags: Option flags
  *
  * Must be called with cgroup_mutex held.
  */
 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+			struct bpf_prog *replace_prog,
 			enum bpf_attach_type type, u32 flags)
 {
+	u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
 	struct list_head *progs = &cgrp->bpf.progs[type];
 	struct bpf_prog *old_prog = NULL;
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
@@ -298,14 +301,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	enum bpf_cgroup_storage_type stype;
 	int err;
 
-	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
+	if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
+	    ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
 		/* invalid combination */
 		return -EINVAL;
 
 	if (!hierarchy_allows_attach(cgrp, type))
 		return -EPERM;
 
-	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
+	if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
 		/* Disallow attaching non-overridable on top
 		 * of existing overridable in this cgroup.
 		 * Disallow attaching multi-prog if overridable or none
@@ -320,7 +324,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 			if (pl->prog == prog)
 				/* disallow attaching the same prog twice */
 				return -EINVAL;
+			if (pl->prog == replace_prog)
+				replace_pl = pl;
 		}
+		if ((flags & BPF_F_REPLACE) && !replace_pl)
+			/* prog to replace not found for cgroup */
+			return -ENOENT;
 	} else if (!list_empty(progs)) {
 		replace_pl = list_first_entry(progs, typeof(*pl), node);
 	}
@@ -356,7 +365,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	for_each_cgroup_storage_type(stype)
 		pl->storage[stype] = storage[stype];
 
-	cgrp->bpf.flags[type] = flags;
+	cgrp->bpf.flags[type] = saved_flags;
 
 	err = update_effective_progs(cgrp, type);
 	if (err)
@@ -522,6 +531,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
 			   enum bpf_prog_type ptype, struct bpf_prog *prog)
 {
+	struct bpf_prog *replace_prog = NULL;
 	struct cgroup *cgrp;
 	int ret;
 
@@ -529,8 +539,20 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
 	if (IS_ERR(cgrp))
 		return PTR_ERR(cgrp);
 
-	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+	if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
+	    (attr->attach_flags & BPF_F_REPLACE)) {
+		replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
+		if (IS_ERR(replace_prog)) {
+			cgroup_put(cgrp);
+			return PTR_ERR(replace_prog);
+		}
+	}
+
+	ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type,
 				attr->attach_flags);
+
+	if (replace_prog)
+		bpf_prog_put(replace_prog);
 	cgroup_put(cgrp);
 	return ret;
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 66b90eaf99fe..65c6d4b3231e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2073,10 +2073,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 	}
 }
 
-#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
+#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
 
 #define BPF_F_ATTACH_MASK \
-	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
+	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
 
 static int bpf_prog_attach(const union bpf_attr *attr)
 {
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 735af8f15f95..725365df066d 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6288,12 +6288,13 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
 
 #ifdef CONFIG_CGROUP_BPF
 int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
-		      enum bpf_attach_type type, u32 flags)
+		      struct bpf_prog *replace_prog, enum bpf_attach_type type,
+		      u32 flags)
 {
 	int ret;
 
 	mutex_lock(&cgroup_mutex);
-	ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
+	ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags);
 	mutex_unlock(&cgroup_mutex);
 	return ret;
 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dbbcf0b02970..7df436da542d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -231,6 +231,11 @@ enum bpf_attach_type {
  * When children program makes decision (like picking TCP CA or sock bind)
  * parent program has a chance to override it.
  *
+ * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
+ * programs for a cgroup. Though it's possible to replace an old program at
+ * any position by also specifying BPF_F_REPLACE flag and position itself in
+ * replace_bpf_fd attribute. Old program at this position will be released.
+ *
  * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
  * A cgroup with NONE doesn't allow any programs in sub-cgroups.
  * Ex1:
@@ -249,6 +254,7 @@ enum bpf_attach_type {
  */
 #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
 #define BPF_F_ALLOW_MULTI	(1U << 1)
+#define BPF_F_REPLACE		(1U << 2)
 
 /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
  * verifier will perform strict alignment checking as if the kernel
@@ -442,6 +448,10 @@ union bpf_attr {
 		__u32		attach_bpf_fd;	/* eBPF program to attach */
 		__u32		attach_type;
 		__u32		attach_flags;
+		__u32		replace_bpf_fd;	/* previously attached eBPF
+						 * program to replace if
+						 * BPF_F_REPLACE is used
+						 */
 	};
 
 	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */

From patchwork Thu Dec 12 23:30:51 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Andrey Ignatov <rdna@fb.com>
X-Patchwork-Id: 1208891
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <bpf-owner@vger.kernel.org>
X-Original-To: incoming-bpf@patchwork.ozlabs.org
Delivered-To: patchwork-incoming-bpf@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=none (no SPF record)
	smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67;
	helo=vger.kernel.org; envelope-from=bpf-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=pass (p=none dis=none) header.from=fb.com
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=fb.com header.i=@fb.com header.b="Fy6eWIZG";
	dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 47Yqn56B1pz9sPT
	for <incoming-bpf@patchwork.ozlabs.org>;
	Fri, 13 Dec 2019 10:31:21 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1730979AbfLLXbV (ORCPT
	<rfc822;incoming-bpf@patchwork.ozlabs.org>);
	Thu, 12 Dec 2019 18:31:21 -0500
Received: from mx0b-00082601.pphosted.com ([67.231.153.30]:32978 "EHLO
	mx0b-00082601.pphosted.com" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1726427AbfLLXbV (ORCPT
	<rfc822;bpf@vger.kernel.org>); Thu, 12 Dec 2019 18:31:21 -0500
Received: from pps.filterd (m0109332.ppops.net [127.0.0.1])
	by mx0a-00082601.pphosted.com (8.16.0.42/8.16.0.42) with SMTP id
	xBCNUfot024272
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:20 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com;
	h=from : to : cc : subject
	: date : message-id : in-reply-to : references : mime-version :
	content-type; s=facebook;
	bh=exfKRwHGEpDTARVMgsME3iP36reIzun2TE76MQAKTOc=;
	b=Fy6eWIZGbBJpDAdXSun0pIJYDWkdtnGO50fk23WzfYZEDNhe4H3RiQdF/AjgglHAKbEF
	QW11cjjkCm1REP20oENUexzZJR5GrZbqiJWj9yOW2buCw7gGIKQOJO9RqVjGr8o5TaF6
	BJK3jotP8hGmzA5g3Xh+9lGSNfv2C5d1q7U=
Received: from mail.thefacebook.com (mailout.thefacebook.com [199.201.64.23])
	by mx0a-00082601.pphosted.com with ESMTP id 2wu2gf7uqd-3
	(version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:19 -0800
Received: from intmgw004.06.prn3.facebook.com (2620:10d:c081:10::13) by
	mail.thefacebook.com (2620:10d:c081:35::128) with Microsoft SMTP
	Server
	(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA) id
	15.1.1713.5; Thu, 12 Dec 2019 15:31:17 -0800
Received: by dev082.prn2.facebook.com (Postfix, from userid 572249)
	id F09AC3712A1F; Thu, 12 Dec 2019 15:31:16 -0800 (PST)
Smtp-Origin-Hostprefix: dev
From: Andrey Ignatov <rdna@fb.com>
Smtp-Origin-Hostname: dev082.prn2.facebook.com
To: <bpf@vger.kernel.org>
CC: Andrey Ignatov <rdna@fb.com>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <kafai@fb.com>, <andriin@fb.com>,
	<kernel-team@fb.com>
Smtp-Origin-Cluster: prn2c23
Subject: [PATCH v2 bpf-next 4/6] libbpf: Make DECLARE_LIBBPF_OPTS available
	in bpf.h
Date: Thu, 12 Dec 2019 15:30:51 -0800
Message-ID: 
 <db7f94fd1735a28b6729a7f908c1a219af046dec.1576193131.git.rdna@fb.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <cover.1576193131.git.rdna@fb.com>
References: <cover.1576193131.git.rdna@fb.com>
X-FB-Internal: Safe
MIME-Version: 1.0
X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.95, 18.0.572
	definitions=2019-12-12_08:2019-12-12,
	2019-12-12 signatures=0
X-Proofpoint-Spam-Details: rule=fb_default_notspam policy=fb_default score=0
	mlxscore=0 adultscore=0
	bulkscore=0 suspectscore=13 priorityscore=1501 impostorscore=0
	malwarescore=0 lowpriorityscore=0 spamscore=0 clxscore=1015
	mlxlogscore=889 phishscore=0 classifier=spam adjust=0 reason=mlx
	scancount=1 engine=8.12.0-1910280000 definitions=main-1912120181
X-FB-Internal: deliver
Sender: bpf-owner@vger.kernel.org
Precedence: bulk
List-ID: <bpf.vger.kernel.org>
X-Mailing-List: bpf@vger.kernel.org

DECLARE_LIBBPF_OPTS is the way to use option structures in a backward
compatible and extensible way. It's available only in libbpf.h.  Though
public interfaces in bpf.h have the same requirement  to accept options
in a way that can be simply extended w/o introducing new xattr-functions
every time.

libbpf.c depends on bpf.h, hence to share the macros a couple of options
exist:
* either a common header should be introduced;
* or the macro can be moved to bpf.h and used by libbpf.h from there;

The former seems to be an overkill, so do the latter:
* move DECLARE_LIBBPF_OPTS from libbpf.h to bpf.h;
* move `#include "bpf.h"` from libbpf.c to libbpf.h not to break users
  of the macro who already include only libbpf.h.

That makes the macro available to use in bpf.{h,c} and should not break
those who already use it.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/bpf.h    | 22 ++++++++++++++++++++++
 tools/lib/bpf/libbpf.c |  1 -
 tools/lib/bpf/libbpf.h | 24 ++----------------------
 3 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 3c791fa8e68e..5cfe6e0a1aef 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -36,6 +36,28 @@ extern "C" {
 #define LIBBPF_API __attribute__((visibility("default")))
 #endif
 
+/* Helper macro to declare and initialize libbpf options struct
+ *
+ * This dance with uninitialized declaration, followed by memset to zero,
+ * followed by assignment using compound literal syntax is done to preserve
+ * ability to use a nice struct field initialization syntax and **hopefully**
+ * have all the padding bytes initialized to zero. It's not guaranteed though,
+ * when copying literal, that compiler won't copy garbage in literal's padding
+ * bytes, but that's the best way I've found and it seems to work in practice.
+ *
+ * Macro declares opts struct of given type and name, zero-initializes,
+ * including any extra padding, it with memset() and then assigns initial
+ * values provided by users in struct initializer-syntax as varargs.
+ */
+#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...)				    \
+	struct TYPE NAME = ({						    \
+		memset(&NAME, 0, sizeof(struct TYPE));			    \
+		(struct TYPE) {						    \
+			.sz = sizeof(struct TYPE),			    \
+			__VA_ARGS__					    \
+		};							    \
+	})
+
 struct bpf_create_map_attr {
 	const char *name;
 	enum bpf_map_type map_type;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 920d4e06a5f9..9d788c1b9874 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -46,7 +46,6 @@
 #include <gelf.h>
 
 #include "libbpf.h"
-#include "bpf.h"
 #include "btf.h"
 #include "str_error.h"
 #include "libbpf_internal.h"
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 0dbf4bfba0c4..03189880d99e 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -17,6 +17,8 @@
 #include <sys/types.h>  // for size_t
 #include <linux/bpf.h>
 
+#include "bpf.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -67,28 +69,6 @@ struct bpf_object_open_attr {
 	enum bpf_prog_type prog_type;
 };
 
-/* Helper macro to declare and initialize libbpf options struct
- *
- * This dance with uninitialized declaration, followed by memset to zero,
- * followed by assignment using compound literal syntax is done to preserve
- * ability to use a nice struct field initialization syntax and **hopefully**
- * have all the padding bytes initialized to zero. It's not guaranteed though,
- * when copying literal, that compiler won't copy garbage in literal's padding
- * bytes, but that's the best way I've found and it seems to work in practice.
- *
- * Macro declares opts struct of given type and name, zero-initializes,
- * including any extra padding, it with memset() and then assigns initial
- * values provided by users in struct initializer-syntax as varargs.
- */
-#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...)				    \
-	struct TYPE NAME = ({ 						    \
-		memset(&NAME, 0, sizeof(struct TYPE));			    \
-		(struct TYPE) {						    \
-			.sz = sizeof(struct TYPE),			    \
-			__VA_ARGS__					    \
-		};							    \
-	})
-
 struct bpf_object_open_opts {
 	/* size of this struct, for forward/backward compatiblity */
 	size_t sz;

From patchwork Thu Dec 12 23:30:52 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Andrey Ignatov <rdna@fb.com>
X-Patchwork-Id: 1208894
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <bpf-owner@vger.kernel.org>
X-Original-To: incoming-bpf@patchwork.ozlabs.org
Delivered-To: patchwork-incoming-bpf@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=none (no SPF record)
	smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67;
	helo=vger.kernel.org; envelope-from=bpf-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=pass (p=none dis=none) header.from=fb.com
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=fb.com header.i=@fb.com header.b="PY331Nh8";
	dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 47Yqn84Jq0z9sPL
	for <incoming-bpf@patchwork.ozlabs.org>;
	Fri, 13 Dec 2019 10:31:24 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1726427AbfLLXbY (ORCPT
	<rfc822;incoming-bpf@patchwork.ozlabs.org>);
	Thu, 12 Dec 2019 18:31:24 -0500
Received: from mx0b-00082601.pphosted.com ([67.231.153.30]:51780 "EHLO
	mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-FAIL)
	by vger.kernel.org with ESMTP id S1730934AbfLLXbY (ORCPT
	<rfc822;bpf@vger.kernel.org>); Thu, 12 Dec 2019 18:31:24 -0500
Received: from pps.filterd (m0089730.ppops.net [127.0.0.1])
	by m0089730.ppops.net (8.16.0.42/8.16.0.42) with SMTP id
	xBCNUoa7019162
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:22 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com;
	h=from : to : cc : subject
	: date : message-id : in-reply-to : references : mime-version :
	content-type; s=facebook;
	bh=Xh6BKSTnCMYVO8T/wonHy5xbs5J2/Ki4m6r3HeUBtUs=;
	b=PY331Nh8gOlBxIeSAuNTK9v2clAVJklBkXr+UrcBe1P4pyMGPCDQL5fPBCF8YyX2+6EN
	PocVS14I9eJfm9TO5FH3loGJ0QHssju1gNGn1+RyepEMvCYRz+dkH5DBAPH7qe0ocF8z
	H51kw5fm9hV/FNqNdrzW2VEk8sbdb9NhGJk=
Received: from mail.thefacebook.com (mailout.thefacebook.com [199.201.64.23])
	by m0089730.ppops.net with ESMTP id 2wuke1kpks-2
	(version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT)
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:31:22 -0800
Received: from intmgw001.06.prn3.facebook.com (2620:10d:c081:10::13) by
	mail.thefacebook.com (2620:10d:c081:35::126) with Microsoft SMTP
	Server
	(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA) id
	15.1.1713.5; Thu, 12 Dec 2019 15:31:21 -0800
Received: by dev082.prn2.facebook.com (Postfix, from userid 572249)
	id 99CD73712A1F; Thu, 12 Dec 2019 15:31:20 -0800 (PST)
Smtp-Origin-Hostprefix: dev
From: Andrey Ignatov <rdna@fb.com>
Smtp-Origin-Hostname: dev082.prn2.facebook.com
To: <bpf@vger.kernel.org>
CC: Andrey Ignatov <rdna@fb.com>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <kafai@fb.com>, <andriin@fb.com>,
	<kernel-team@fb.com>
Smtp-Origin-Cluster: prn2c23
Subject: [PATCH v2 bpf-next 5/6] libbpf: Introduce bpf_prog_attach_xattr
Date: Thu, 12 Dec 2019 15:30:52 -0800
Message-ID: 
 <364944f93a1d77eab769eeba79bb74122a688338.1576193131.git.rdna@fb.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <cover.1576193131.git.rdna@fb.com>
References: <cover.1576193131.git.rdna@fb.com>
X-FB-Internal: Safe
MIME-Version: 1.0
X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.95, 18.0.572
	definitions=2019-12-12_08:2019-12-12,
	2019-12-12 signatures=0
X-Proofpoint-Spam-Details: rule=fb_default_notspam policy=fb_default score=0
	impostorscore=0
	malwarescore=0 suspectscore=13 adultscore=0 clxscore=1015 spamscore=0
	mlxscore=0 bulkscore=0 lowpriorityscore=0 mlxlogscore=355
	priorityscore=1501 phishscore=0 classifier=spam adjust=0 reason=mlx
	scancount=1 engine=8.12.0-1910280000 definitions=main-1912120181
X-FB-Internal: deliver
Sender: bpf-owner@vger.kernel.org
Precedence: bulk
List-ID: <bpf.vger.kernel.org>
X-Mailing-List: bpf@vger.kernel.org

Introduce a new bpf_prog_attach_xattr function that accepts an
extendable struct bpf_prog_attach_opts and supports passing a new
attribute to BPF_PROG_ATTACH command: replace_prog_fd that is fd of
previously attached cgroup-bpf program to replace if recently introduced
BPF_F_REPLACE flag is used.

The new function is named to be consistent with other xattr-functions
(bpf_prog_test_run_xattr, bpf_create_map_xattr, bpf_load_program_xattr).

The struct bpf_prog_attach_opts is supposed to be used with
DECLARE_LIBBPF_OPTS framework.

The opts argument is used directly in bpf_prog_attach_xattr
implementation since at the time of adding all fields already exist in
the kernel. New fields, if added, will need to be used via OPTS_* macros
from libbpf_internal.h.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
---
 tools/lib/bpf/bpf.c      | 21 +++++++++++++++++----
 tools/lib/bpf/bpf.h      | 12 ++++++++++++
 tools/lib/bpf/libbpf.map |  2 ++
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 98596e15390f..9f4e42abd185 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -466,14 +466,27 @@ int bpf_obj_get(const char *pathname)
 
 int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
 		    unsigned int flags)
+{
+	DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts,
+		.target_fd = target_fd,
+		.prog_fd = prog_fd,
+		.type = type,
+		.flags = flags,
+	);
+
+	return bpf_prog_attach_xattr(&opts);
+}
+
+int bpf_prog_attach_xattr(const struct bpf_prog_attach_opts *opts)
 {
 	union bpf_attr attr;
 
 	memset(&attr, 0, sizeof(attr));
-	attr.target_fd	   = target_fd;
-	attr.attach_bpf_fd = prog_fd;
-	attr.attach_type   = type;
-	attr.attach_flags  = flags;
+	attr.target_fd	   = opts->target_fd;
+	attr.attach_bpf_fd = opts->prog_fd;
+	attr.attach_type   = opts->type;
+	attr.attach_flags  = opts->flags;
+	attr.replace_bpf_fd = opts->replace_prog_fd;
 
 	return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
 }
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 5cfe6e0a1aef..5b5f9b374074 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -150,8 +150,20 @@ LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 LIBBPF_API int bpf_map_freeze(int fd);
 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
 LIBBPF_API int bpf_obj_get(const char *pathname);
+
+struct bpf_prog_attach_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	int target_fd;
+	int prog_fd;
+	enum bpf_attach_type type;
+	unsigned int flags;
+	int replace_prog_fd;
+};
+#define bpf_prog_attach_opts__last_field replace_prog_fd
+
 LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
 			       enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_xattr(const struct bpf_prog_attach_opts *opts);
 LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
 LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
 				enum bpf_attach_type type);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 495df575f87f..42b065454031 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -210,4 +210,6 @@ LIBBPF_0.0.6 {
 } LIBBPF_0.0.5;
 
 LIBBPF_0.0.7 {
+	global:
+		bpf_prog_attach_xattr;
 } LIBBPF_0.0.6;

From patchwork Thu Dec 12 23:30:53 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Andrey Ignatov <rdna@fb.com>
X-Patchwork-Id: 1208896
X-Patchwork-Delegate: bpf@iogearbox.net
Return-Path: <bpf-owner@vger.kernel.org>
X-Original-To: incoming-bpf@patchwork.ozlabs.org
Delivered-To: patchwork-incoming-bpf@bilbo.ozlabs.org
Authentication-Results: ozlabs.org; spf=none (no SPF record)
	smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67;
	helo=vger.kernel.org; envelope-from=bpf-owner@vger.kernel.org;
	receiver=<UNKNOWN>)
Authentication-Results: ozlabs.org;
	dmarc=pass (p=none dis=none) header.from=fb.com
Authentication-Results: ozlabs.org; dkim=pass (1024-bit key;
	unprotected) header.d=fb.com header.i=@fb.com header.b="OAxdsuc/";
	dkim-atps=neutral
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by ozlabs.org (Postfix) with ESMTP id 47Yqp53wHlz9sPL
	for <incoming-bpf@patchwork.ozlabs.org>;
	Fri, 13 Dec 2019 10:32:13 +1100 (AEDT)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1731158AbfLLXcN (ORCPT
	<rfc822;incoming-bpf@patchwork.ozlabs.org>);
	Thu, 12 Dec 2019 18:32:13 -0500
Received: from mx0a-00082601.pphosted.com ([67.231.145.42]:29990 "EHLO
	mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-OK)
	by vger.kernel.org with ESMTP id S1730934AbfLLXcN (ORCPT
	<rfc822;bpf@vger.kernel.org>); Thu, 12 Dec 2019 18:32:13 -0500
Received: from pps.filterd (m0044012.ppops.net [127.0.0.1])
	by mx0a-00082601.pphosted.com (8.16.0.42/8.16.0.42) with SMTP id
	xBCNW6YG014899
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:32:12 -0800
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fb.com;
	h=from : to : cc : subject
	: date : message-id : in-reply-to : references : mime-version :
	content-type; s=facebook;
	bh=OcW1omHt06g7weuVljtwWO3YOx5fc0PEP7yb6YDWVAk=;
	b=OAxdsuc/1A7waqEzR+kvXtKfuQVDuMNASUqIwAJm8llSxiw8dnZY+vwUrt2+wBnVW/u+
	f9gf/j/vm26zo7nmmvUaU2XIdPrZPniOSc7/hTT8cz4IdwHttAQS3yqutBxWes1SBWAg
	0pqbzMDt45dY1mThOobd0MKY4Re2hGDBzWQ=
Received: from maileast.thefacebook.com ([163.114.130.16])
	by mx0a-00082601.pphosted.com with ESMTP id 2wub46dape-10
	(version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128
	verify=NOT)
	for <bpf@vger.kernel.org>; Thu, 12 Dec 2019 15:32:11 -0800
Received: from intmgw002.41.prn1.facebook.com (2620:10d:c0a8:1b::d) by
	mail.thefacebook.com (2620:10d:c0a8:83::7) with Microsoft SMTP Server
	(version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
	15.1.1713.5; Thu, 12 Dec 2019 15:31:26 -0800
Received: by dev082.prn2.facebook.com (Postfix, from userid 572249)
	id 619D43712A1F; Thu, 12 Dec 2019 15:31:25 -0800 (PST)
Smtp-Origin-Hostprefix: dev
From: Andrey Ignatov <rdna@fb.com>
Smtp-Origin-Hostname: dev082.prn2.facebook.com
To: <bpf@vger.kernel.org>
CC: Andrey Ignatov <rdna@fb.com>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <kafai@fb.com>, <andriin@fb.com>,
	<kernel-team@fb.com>
Smtp-Origin-Cluster: prn2c23
Subject: [PATCH v2 bpf-next 6/6] selftests/bpf: Cover BPF_F_REPLACE in
	test_cgroup_attach
Date: Thu, 12 Dec 2019 15:30:53 -0800
Message-ID: 
 <bc55a274ea572d237bd091819f38502fa837abb5.1576193131.git.rdna@fb.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <cover.1576193131.git.rdna@fb.com>
References: <cover.1576193131.git.rdna@fb.com>
X-FB-Internal: Safe
MIME-Version: 1.0
X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.95, 18.0.572
	definitions=2019-12-12_08:2019-12-12,
	2019-12-12 signatures=0
X-Proofpoint-Spam-Details: rule=fb_default_notspam policy=fb_default score=0
	impostorscore=0
	bulkscore=0 phishscore=0 adultscore=0 priorityscore=1501
	mlxlogscore=390
	lowpriorityscore=0 suspectscore=13 spamscore=0 mlxscore=0
	clxscore=1015
	malwarescore=0 classifier=spam adjust=0 reason=mlx scancount=1
	engine=8.12.0-1910280000 definitions=main-1912120181
X-FB-Internal: deliver
Sender: bpf-owner@vger.kernel.org
Precedence: bulk
List-ID: <bpf.vger.kernel.org>
X-Mailing-List: bpf@vger.kernel.org

Test replacement of a cgroup-bpf program attached with BPF_F_ALLOW_MULTI
and possible failure modes: invalid combination of flags, invalid
replace_bpf_fd, replacing a non-attachd to specified cgroup program.

Example of program replacing:

  # gdb -q ./test_cgroup_attach
  Reading symbols from /data/users/rdna/bin/test_cgroup_attach...done.
  ...
  Breakpoint 1, test_multiprog () at test_cgroup_attach.c:443
  443     test_cgroup_attach.c: No such file or directory.
  (gdb)
  [2]+  Stopped                 gdb -q ./test_cgroup_attach
  # bpftool c s /mnt/cgroup2/cgroup-test-work-dir/cg1
  ID       AttachType      AttachFlags     Name
  35       egress          multi
  36       egress          multi
  # fg gdb -q ./test_cgroup_attach
  c
  Continuing.
  Detaching after fork from child process 361.

  Breakpoint 2, test_multiprog () at test_cgroup_attach.c:454
  454     in test_cgroup_attach.c
  (gdb)
  [2]+  Stopped                 gdb -q ./test_cgroup_attach
  # bpftool c s /mnt/cgroup2/cgroup-test-work-dir/cg1
  ID       AttachType      AttachFlags     Name
  41       egress          multi
  36       egress          multi

Signed-off-by: Andrey Ignatov <rdna@fb.com>
---
 .../selftests/bpf/test_cgroup_attach.c        | 62 +++++++++++++++++--
 1 file changed, 57 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
index 7671909ee1cb..6c7971ffe683 100644
--- a/tools/testing/selftests/bpf/test_cgroup_attach.c
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@@ -250,7 +250,7 @@ static int prog_load_cnt(int verdict, int val)
 		BPF_LD_MAP_FD(BPF_REG_1, map_fd),
 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
-		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
+		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = val */
 		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
 
 		BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
@@ -290,11 +290,11 @@ static int test_multiprog(void)
 {
 	__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
 	int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
-	int drop_prog, allow_prog[6] = {}, rc = 0;
+	int drop_prog, allow_prog[7] = {}, rc = 0;
 	unsigned long long value;
 	int i = 0;
 
-	for (i = 0; i < 6; i++) {
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
 		allow_prog[i] = prog_load_cnt(1, 1 << i);
 		if (!allow_prog[i])
 			goto err;
@@ -400,6 +400,58 @@ static int test_multiprog(void)
 	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
 	assert(value == 1 + 2 + 8 + 16);
 
+	/* invalid input */
+
+	DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts,
+		.target_fd		= cg1,
+		.prog_fd		= allow_prog[6],
+		.replace_prog_fd	= allow_prog[0],
+		.type			= BPF_CGROUP_INET_EGRESS,
+		.flags			= BPF_F_ALLOW_MULTI | BPF_F_REPLACE,
+	);
+
+	attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
+	if (!bpf_prog_attach_xattr(&attach_opts)) {
+		log_err("Unexpected success with OVERRIDE | REPLACE");
+		goto err;
+	}
+	assert(errno == EINVAL);
+
+	attach_opts.flags = BPF_F_REPLACE;
+	if (!bpf_prog_attach_xattr(&attach_opts)) {
+		log_err("Unexpected success with REPLACE alone");
+		goto err;
+	}
+	assert(errno == EINVAL);
+	attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
+
+	attach_opts.replace_prog_fd = -1;
+	if (!bpf_prog_attach_xattr(&attach_opts)) {
+		log_err("Unexpected success with bad replace fd");
+		goto err;
+	}
+	assert(errno == EBADF);
+
+	/* replacing a program that is not attached to cgroup should fail  */
+	attach_opts.replace_prog_fd = allow_prog[3];
+	if (!bpf_prog_attach_xattr(&attach_opts)) {
+		log_err("Unexpected success: replace not-attached prog on cg1");
+		goto err;
+	}
+	assert(errno == ENOENT);
+	attach_opts.replace_prog_fd = allow_prog[0];
+
+	/* replace 1st from the top program */
+	if (bpf_prog_attach_xattr(&attach_opts)) {
+		log_err("Replace prog1 with prog7 on cg1");
+		goto err;
+	}
+	value = 0;
+	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 64 + 2 + 8 + 16);
+
 	/* detach 3rd from bottom program and ping again */
 	errno = 0;
 	if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) {
@@ -414,7 +466,7 @@ static int test_multiprog(void)
 	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
 	assert(system(PING_CMD) == 0);
 	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 16);
+	assert(value == 64 + 2 + 16);
 
 	/* detach 2nd from bottom program and ping again */
 	if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) {
@@ -425,7 +477,7 @@ static int test_multiprog(void)
 	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
 	assert(system(PING_CMD) == 0);
 	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 4);
+	assert(value == 64 + 2 + 4);
 
 	prog_cnt = 4;
 	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,