Message ID | 20190528182946.3633-3-sdf@google.com |
---|---|
State | Superseded |
Headers | show |
Series | [bpf-next,v3,1/4] bpf: remove __rcu annotations from bpf_prog_array | expand |
On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote: > Now that we don't have __rcu markers on the bpf_prog_array helpers, > let's use proper rcu_dereference_protected to obtain array pointer > under mutex. > > We also don't need __rcu annotations on cgroup_bpf.inactive since > it's not read/updated concurrently. > > v3: > * amend cgroup_rcu_dereference to include percpu_ref_is_dying; > cgroup_bpf is now reference counted and we don't hold cgroup_mutex > anymore in cgroup_bpf_release > > v2: > * replace xchg with rcu_swap_protected > > Cc: Roman Gushchin <guro@fb.com> > Signed-off-by: Stanislav Fomichev <sdf@google.com> > --- > include/linux/bpf-cgroup.h | 2 +- > kernel/bpf/cgroup.c | 32 +++++++++++++++++++++----------- > 2 files changed, 22 insertions(+), 12 deletions(-) > > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h > index 9f100fc422c3..b631ee75762d 100644 > --- a/include/linux/bpf-cgroup.h > +++ b/include/linux/bpf-cgroup.h > @@ -72,7 +72,7 @@ struct cgroup_bpf { > u32 flags[MAX_BPF_ATTACH_TYPE]; > > /* temp storage for effective prog array used by prog_attach/detach */ > - struct bpf_prog_array __rcu *inactive; > + struct bpf_prog_array *inactive; > > /* reference counter used to detach bpf programs after cgroup removal */ > struct percpu_ref refcnt; > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index d995edbe816d..118b70175dd9 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -22,6 +22,13 @@ > DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); > EXPORT_SYMBOL(cgroup_bpf_enabled_key); > > +#define cgroup_rcu_dereference(cgrp, p) \ > + rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) || \ > + percpu_ref_is_dying(&cgrp->bpf.refcnt)) > + > +#define cgroup_rcu_swap(rcu_ptr, ptr) \ > + rcu_swap_protected(rcu_ptr, ptr, lockdep_is_held(&cgroup_mutex)) > + > void cgroup_bpf_offline(struct cgroup *cgrp) > { > cgroup_get(cgrp); > @@ -38,6 +45,7 @@ static void cgroup_bpf_release(struct work_struct *work) > struct cgroup *cgrp = container_of(work, struct cgroup, > bpf.release_work); > enum bpf_cgroup_storage_type stype; > + struct bpf_prog_array *old_array; > unsigned int type; > > for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { > @@ -54,7 +62,9 @@ static void cgroup_bpf_release(struct work_struct *work) > kfree(pl); > static_branch_dec(&cgroup_bpf_enabled_key); > } > - bpf_prog_array_free(cgrp->bpf.effective[type]); > + old_array = cgroup_rcu_dereference(cgrp, > + cgrp->bpf.effective[type]); > + bpf_prog_array_free(old_array); > } > > percpu_ref_exit(&cgrp->bpf.refcnt); > @@ -126,7 +136,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp, > */ > static int compute_effective_progs(struct cgroup *cgrp, > enum bpf_attach_type type, > - struct bpf_prog_array __rcu **array) > + struct bpf_prog_array **array) > { > enum bpf_cgroup_storage_type stype; > struct bpf_prog_array *progs; > @@ -164,17 +174,15 @@ static int compute_effective_progs(struct cgroup *cgrp, > } > } while ((p = cgroup_parent(p))); > > - rcu_assign_pointer(*array, progs); > + *array = progs; > return 0; > } > > static void activate_effective_progs(struct cgroup *cgrp, > enum bpf_attach_type type, > - struct bpf_prog_array __rcu *array) > + struct bpf_prog_array *old_array) > { > - struct bpf_prog_array __rcu *old_array; > - > - old_array = xchg(&cgrp->bpf.effective[type], array); > + cgroup_rcu_swap(cgrp->bpf.effective[type], old_array); > /* free prog array after grace period, since __cgroup_bpf_run_*() > * might be still walking the array > */ > @@ -191,7 +199,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) > * that array below is variable length > */ > #define NR ARRAY_SIZE(cgrp->bpf.effective) > - struct bpf_prog_array __rcu *arrays[NR] = {}; > + struct bpf_prog_array *arrays[NR] = {}; > int ret, i; > > ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, > @@ -477,10 +485,13 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, > enum bpf_attach_type type = attr->query.attach_type; > struct list_head *progs = &cgrp->bpf.progs[type]; > u32 flags = cgrp->bpf.flags[type]; > + struct bpf_prog_array *effective; > int cnt, ret = 0, i; > > + effective = cgroup_rcu_dereference(cgrp, cgrp->bpf.effective[type]); > + > if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) > - cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); > + cnt = bpf_prog_array_length(effective); > else > cnt = prog_list_length(progs); > > @@ -497,8 +508,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, > } > > if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { > - return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], > - prog_ids, cnt); > + return bpf_prog_array_copy_to_user(effective, prog_ids, cnt); > } else { > struct bpf_prog_list *pl; > u32 id; > -- > 2.22.0.rc1.257.g3120a18244-goog > Acked-by: Roman Gushchin <guro@fb.com> Thanks!
On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote: > Now that we don't have __rcu markers on the bpf_prog_array helpers, > let's use proper rcu_dereference_protected to obtain array pointer > under mutex. > > We also don't need __rcu annotations on cgroup_bpf.inactive since > it's not read/updated concurrently. > > v3: > * amend cgroup_rcu_dereference to include percpu_ref_is_dying; > cgroup_bpf is now reference counted and we don't hold cgroup_mutex > anymore in cgroup_bpf_release > > v2: > * replace xchg with rcu_swap_protected > > Cc: Roman Gushchin <guro@fb.com> > Signed-off-by: Stanislav Fomichev <sdf@google.com> > --- > include/linux/bpf-cgroup.h | 2 +- > kernel/bpf/cgroup.c | 32 +++++++++++++++++++++----------- > 2 files changed, 22 insertions(+), 12 deletions(-) > > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h > index 9f100fc422c3..b631ee75762d 100644 > --- a/include/linux/bpf-cgroup.h > +++ b/include/linux/bpf-cgroup.h > @@ -72,7 +72,7 @@ struct cgroup_bpf { > u32 flags[MAX_BPF_ATTACH_TYPE]; > > /* temp storage for effective prog array used by prog_attach/detach */ > - struct bpf_prog_array __rcu *inactive; > + struct bpf_prog_array *inactive; > > /* reference counter used to detach bpf programs after cgroup removal */ > struct percpu_ref refcnt; > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index d995edbe816d..118b70175dd9 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -22,6 +22,13 @@ > DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); > EXPORT_SYMBOL(cgroup_bpf_enabled_key); > > +#define cgroup_rcu_dereference(cgrp, p) \ > + rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) || \ > + percpu_ref_is_dying(&cgrp->bpf.refcnt)) Some comments why percpu_ref_is_dying(&cgrp->bpf.refcnt) is enough here will be appreciated. Thanks!
On 05/28, Roman Gushchin wrote: > On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote: > > Now that we don't have __rcu markers on the bpf_prog_array helpers, > > let's use proper rcu_dereference_protected to obtain array pointer > > under mutex. > > > > We also don't need __rcu annotations on cgroup_bpf.inactive since > > it's not read/updated concurrently. > > > > v3: > > * amend cgroup_rcu_dereference to include percpu_ref_is_dying; > > cgroup_bpf is now reference counted and we don't hold cgroup_mutex > > anymore in cgroup_bpf_release > > > > v2: > > * replace xchg with rcu_swap_protected > > > > Cc: Roman Gushchin <guro@fb.com> > > Signed-off-by: Stanislav Fomichev <sdf@google.com> > > --- > > include/linux/bpf-cgroup.h | 2 +- > > kernel/bpf/cgroup.c | 32 +++++++++++++++++++++----------- > > 2 files changed, 22 insertions(+), 12 deletions(-) > > > > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h > > index 9f100fc422c3..b631ee75762d 100644 > > --- a/include/linux/bpf-cgroup.h > > +++ b/include/linux/bpf-cgroup.h > > @@ -72,7 +72,7 @@ struct cgroup_bpf { > > u32 flags[MAX_BPF_ATTACH_TYPE]; > > > > /* temp storage for effective prog array used by prog_attach/detach */ > > - struct bpf_prog_array __rcu *inactive; > > + struct bpf_prog_array *inactive; > > > > /* reference counter used to detach bpf programs after cgroup removal */ > > struct percpu_ref refcnt; > > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > > index d995edbe816d..118b70175dd9 100644 > > --- a/kernel/bpf/cgroup.c > > +++ b/kernel/bpf/cgroup.c > > @@ -22,6 +22,13 @@ > > DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); > > EXPORT_SYMBOL(cgroup_bpf_enabled_key); > > > > +#define cgroup_rcu_dereference(cgrp, p) \ > > + rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) || \ > > + percpu_ref_is_dying(&cgrp->bpf.refcnt)) > > Some comments why percpu_ref_is_dying(&cgrp->bpf.refcnt) is enough here will > be appreciated. I was actually debating whether to just use raw rcu_dereference_protected(p, lockdep_is_held()) in __cgroup_bpf_query and rcu_dereference_protected(p, percpu_ref_is_dying()) in cgroup_bpf_release instead of having a cgroup_rcu_dereference which covers both cases. Maybe that should make it more clear (and doesn't require any comment)?
On Tue, May 28, 2019 at 01:16:46PM -0700, Stanislav Fomichev wrote: > On 05/28, Roman Gushchin wrote: > > On Tue, May 28, 2019 at 11:29:45AM -0700, Stanislav Fomichev wrote: > > > Now that we don't have __rcu markers on the bpf_prog_array helpers, > > > let's use proper rcu_dereference_protected to obtain array pointer > > > under mutex. > > > > > > We also don't need __rcu annotations on cgroup_bpf.inactive since > > > it's not read/updated concurrently. > > > > > > v3: > > > * amend cgroup_rcu_dereference to include percpu_ref_is_dying; > > > cgroup_bpf is now reference counted and we don't hold cgroup_mutex > > > anymore in cgroup_bpf_release > > > > > > v2: > > > * replace xchg with rcu_swap_protected > > > > > > Cc: Roman Gushchin <guro@fb.com> > > > Signed-off-by: Stanislav Fomichev <sdf@google.com> > > > --- > > > include/linux/bpf-cgroup.h | 2 +- > > > kernel/bpf/cgroup.c | 32 +++++++++++++++++++++----------- > > > 2 files changed, 22 insertions(+), 12 deletions(-) > > > > > > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h > > > index 9f100fc422c3..b631ee75762d 100644 > > > --- a/include/linux/bpf-cgroup.h > > > +++ b/include/linux/bpf-cgroup.h > > > @@ -72,7 +72,7 @@ struct cgroup_bpf { > > > u32 flags[MAX_BPF_ATTACH_TYPE]; > > > > > > /* temp storage for effective prog array used by prog_attach/detach */ > > > - struct bpf_prog_array __rcu *inactive; > > > + struct bpf_prog_array *inactive; > > > > > > /* reference counter used to detach bpf programs after cgroup removal */ > > > struct percpu_ref refcnt; > > > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > > > index d995edbe816d..118b70175dd9 100644 > > > --- a/kernel/bpf/cgroup.c > > > +++ b/kernel/bpf/cgroup.c > > > @@ -22,6 +22,13 @@ > > > DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); > > > EXPORT_SYMBOL(cgroup_bpf_enabled_key); > > > > > > +#define cgroup_rcu_dereference(cgrp, p) \ > > > + rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) || \ > > > + percpu_ref_is_dying(&cgrp->bpf.refcnt)) > > > > Some comments why percpu_ref_is_dying(&cgrp->bpf.refcnt) is enough here will > > be appreciated. > I was actually debating whether to just use raw > rcu_dereference_protected(p, lockdep_is_held()) in __cgroup_bpf_query and > rcu_dereference_protected(p, percpu_ref_is_dying()) in cgroup_bpf_release > instead of having a cgroup_rcu_dereference which covers both cases. > > Maybe that should make it more clear (and doesn't require any comment)? Yeah, this makes total sense to me.
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 9f100fc422c3..b631ee75762d 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -72,7 +72,7 @@ struct cgroup_bpf { u32 flags[MAX_BPF_ATTACH_TYPE]; /* temp storage for effective prog array used by prog_attach/detach */ - struct bpf_prog_array __rcu *inactive; + struct bpf_prog_array *inactive; /* reference counter used to detach bpf programs after cgroup removal */ struct percpu_ref refcnt; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index d995edbe816d..118b70175dd9 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -22,6 +22,13 @@ DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); EXPORT_SYMBOL(cgroup_bpf_enabled_key); +#define cgroup_rcu_dereference(cgrp, p) \ + rcu_dereference_protected(p, lockdep_is_held(&cgroup_mutex) || \ + percpu_ref_is_dying(&cgrp->bpf.refcnt)) + +#define cgroup_rcu_swap(rcu_ptr, ptr) \ + rcu_swap_protected(rcu_ptr, ptr, lockdep_is_held(&cgroup_mutex)) + void cgroup_bpf_offline(struct cgroup *cgrp) { cgroup_get(cgrp); @@ -38,6 +45,7 @@ static void cgroup_bpf_release(struct work_struct *work) struct cgroup *cgrp = container_of(work, struct cgroup, bpf.release_work); enum bpf_cgroup_storage_type stype; + struct bpf_prog_array *old_array; unsigned int type; for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { @@ -54,7 +62,9 @@ static void cgroup_bpf_release(struct work_struct *work) kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } - bpf_prog_array_free(cgrp->bpf.effective[type]); + old_array = cgroup_rcu_dereference(cgrp, + cgrp->bpf.effective[type]); + bpf_prog_array_free(old_array); } percpu_ref_exit(&cgrp->bpf.refcnt); @@ -126,7 +136,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp, */ static int compute_effective_progs(struct cgroup *cgrp, enum bpf_attach_type type, - struct bpf_prog_array __rcu **array) + struct bpf_prog_array **array) { enum bpf_cgroup_storage_type stype; struct bpf_prog_array *progs; @@ -164,17 +174,15 @@ static int compute_effective_progs(struct cgroup *cgrp, } } while ((p = cgroup_parent(p))); - rcu_assign_pointer(*array, progs); + *array = progs; return 0; } static void activate_effective_progs(struct cgroup *cgrp, enum bpf_attach_type type, - struct bpf_prog_array __rcu *array) + struct bpf_prog_array *old_array) { - struct bpf_prog_array __rcu *old_array; - - old_array = xchg(&cgrp->bpf.effective[type], array); + cgroup_rcu_swap(cgrp->bpf.effective[type], old_array); /* free prog array after grace period, since __cgroup_bpf_run_*() * might be still walking the array */ @@ -191,7 +199,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) * that array below is variable length */ #define NR ARRAY_SIZE(cgrp->bpf.effective) - struct bpf_prog_array __rcu *arrays[NR] = {}; + struct bpf_prog_array *arrays[NR] = {}; int ret, i; ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, @@ -477,10 +485,13 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, enum bpf_attach_type type = attr->query.attach_type; struct list_head *progs = &cgrp->bpf.progs[type]; u32 flags = cgrp->bpf.flags[type]; + struct bpf_prog_array *effective; int cnt, ret = 0, i; + effective = cgroup_rcu_dereference(cgrp, cgrp->bpf.effective[type]); + if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) - cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); + cnt = bpf_prog_array_length(effective); else cnt = prog_list_length(progs); @@ -497,8 +508,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, } if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { - return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], - prog_ids, cnt); + return bpf_prog_array_copy_to_user(effective, prog_ids, cnt); } else { struct bpf_prog_list *pl; u32 id;
Now that we don't have __rcu markers on the bpf_prog_array helpers, let's use proper rcu_dereference_protected to obtain array pointer under mutex. We also don't need __rcu annotations on cgroup_bpf.inactive since it's not read/updated concurrently. v3: * amend cgroup_rcu_dereference to include percpu_ref_is_dying; cgroup_bpf is now reference counted and we don't hold cgroup_mutex anymore in cgroup_bpf_release v2: * replace xchg with rcu_swap_protected Cc: Roman Gushchin <guro@fb.com> Signed-off-by: Stanislav Fomichev <sdf@google.com> --- include/linux/bpf-cgroup.h | 2 +- kernel/bpf/cgroup.c | 32 +++++++++++++++++++++----------- 2 files changed, 22 insertions(+), 12 deletions(-)