diff mbox series

[bpf-next,2/3] bpf: sk_msg helpers for probe_* and *current_task*

Message ID 158939787911.17281.887645911866087465.stgit@john-Precision-5820-Tower
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: Add sk_msg helpers | expand

Commit Message

John Fastabend May 13, 2020, 7:24 p.m. UTC
Often it is useful when applying policy to know something about the
task. If the administrator has CAP_SYS_ADMIN rights then they can
use kprobe + sk_msg and link the two programs together to accomplish
this. However, this is a bit clunky and also means we have to call
sk_msg program and kprobe program when we could just use a single
program and avoid passing metadata through sk_msg/skb, socket, etc.

To accomplish this add probe_* helpers to sk_msg programs guarded
by a CAP_SYS_ADMIN check. New supported helpers are the following,

 BPF_FUNC_get_current_task
 BPF_FUNC_current_task_under_cgroup
 BPF_FUNC_probe_read_user
 BPF_FUNC_probe_read_kernel
 BPF_FUNC_probe_read
 BPF_FUNC_probe_read_user_str
 BPF_FUNC_probe_read_kernel_str
 BPF_FUNC_probe_read_str

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 kernel/trace/bpf_trace.c |   16 ++++++++--------
 net/core/filter.c        |   34 ++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 8 deletions(-)

Comments

Yonghong Song May 14, 2020, 7:21 a.m. UTC | #1
On 5/13/20 12:24 PM, John Fastabend wrote:
> Often it is useful when applying policy to know something about the
> task. If the administrator has CAP_SYS_ADMIN rights then they can
> use kprobe + sk_msg and link the two programs together to accomplish
> this. However, this is a bit clunky and also means we have to call
> sk_msg program and kprobe program when we could just use a single
> program and avoid passing metadata through sk_msg/skb, socket, etc.
> 
> To accomplish this add probe_* helpers to sk_msg programs guarded
> by a CAP_SYS_ADMIN check. New supported helpers are the following,
> 
>   BPF_FUNC_get_current_task
>   BPF_FUNC_current_task_under_cgroup
>   BPF_FUNC_probe_read_user
>   BPF_FUNC_probe_read_kernel
>   BPF_FUNC_probe_read
>   BPF_FUNC_probe_read_user_str
>   BPF_FUNC_probe_read_kernel_str
>   BPF_FUNC_probe_read_str

I think this is a good idea. But this will require bpf program
to be GPLed, probably it will be okay. Currently, for capabilities,
it is CAP_SYS_ADMIN now, in the future, it may be CAP_PERFMON.

Also, do we want to remove BPF_FUNC_probe_read and
BPF_FUNC_probe_read_str from the list? Since we
introduce helpers to new program types, we can deprecate
these two helpers right away.

The new helpers will be subject to new security lockdown
rules which may have impact on networking bpf programs
on particular setup.

> 
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>   kernel/trace/bpf_trace.c |   16 ++++++++--------
>   net/core/filter.c        |   34 ++++++++++++++++++++++++++++++++++
>   2 files changed, 42 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index d961428..abe6721 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -147,7 +147,7 @@ BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
>   	return ret;
>   }
>   
> -static const struct bpf_func_proto bpf_probe_read_user_proto = {
> +const struct bpf_func_proto bpf_probe_read_user_proto = {
>   	.func		= bpf_probe_read_user,
>   	.gpl_only	= true,
>   	.ret_type	= RET_INTEGER,
> @@ -167,7 +167,7 @@ BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
>   	return ret;
>   }
>   
> -static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
> +const struct bpf_func_proto bpf_probe_read_user_str_proto = {
>   	.func		= bpf_probe_read_user_str,
>   	.gpl_only	= true,
>   	.ret_type	= RET_INTEGER,
> @@ -198,7 +198,7 @@ BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
>   	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
>   }
>   
> -static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
> +const struct bpf_func_proto bpf_probe_read_kernel_proto = {
>   	.func		= bpf_probe_read_kernel,
>   	.gpl_only	= true,
>   	.ret_type	= RET_INTEGER,
> @@ -213,7 +213,7 @@ BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
>   	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
>   }
>   
> -static const struct bpf_func_proto bpf_probe_read_compat_proto = {
> +const struct bpf_func_proto bpf_probe_read_compat_proto = {
>   	.func		= bpf_probe_read_compat,
>   	.gpl_only	= true,
>   	.ret_type	= RET_INTEGER,
> @@ -253,7 +253,7 @@ BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
>   	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
>   }
>   
> -static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
> +const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
>   	.func		= bpf_probe_read_kernel_str,
>   	.gpl_only	= true,
>   	.ret_type	= RET_INTEGER,
> @@ -268,7 +268,7 @@ BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
>   	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
>   }
>   
> -static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
> +const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
>   	.func		= bpf_probe_read_compat_str,
>   	.gpl_only	= true,
>   	.ret_type	= RET_INTEGER,
> @@ -874,7 +874,7 @@ BPF_CALL_0(bpf_get_current_task)
>   	return (long) current;
>   }
>   
> -static const struct bpf_func_proto bpf_get_current_task_proto = {
> +const struct bpf_func_proto bpf_get_current_task_proto = {
>   	.func		= bpf_get_current_task,
>   	.gpl_only	= true,
>   	.ret_type	= RET_INTEGER,
> @@ -895,7 +895,7 @@ BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
>   	return task_under_cgroup_hierarchy(current, cgrp);
>   }
>   
> -static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
> +const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
>   	.func           = bpf_current_task_under_cgroup,
>   	.gpl_only       = false,
>   	.ret_type       = RET_INTEGER,
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 45b4a16..d1c4739 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6362,6 +6362,15 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>   const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
>   const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
>   
> +const struct bpf_func_proto bpf_current_task_under_cgroup_proto __weak;
> +const struct bpf_func_proto bpf_get_current_task_proto __weak;
> +const struct bpf_func_proto bpf_probe_read_user_proto __weak;
> +const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
> +const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
> +const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
> +const struct bpf_func_proto bpf_probe_read_compat_proto __weak;
> +const struct bpf_func_proto bpf_probe_read_compat_str_proto __weak;
> +
>   static const struct bpf_func_proto *
>   sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>   {
> @@ -6397,6 +6406,31 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>   		return &bpf_get_cgroup_classid_curr_proto;
>   #endif
>   	default:
> +		break;
> +	}
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return bpf_base_func_proto(func_id);
> +
> +	/* All helpers below are for CAP_SYS_ADMIN only */
> +	switch (func_id) {
> +	case BPF_FUNC_get_current_task:
> +		return &bpf_get_current_task_proto;
> +	case BPF_FUNC_current_task_under_cgroup:
> +		return &bpf_current_task_under_cgroup_proto;
> +	case BPF_FUNC_probe_read_user:
> +		return &bpf_probe_read_user_proto;
> +	case BPF_FUNC_probe_read_kernel:
> +		return &bpf_probe_read_kernel_proto;
> +	case BPF_FUNC_probe_read:
> +		return &bpf_probe_read_compat_proto;
> +	case BPF_FUNC_probe_read_user_str:
> +		return &bpf_probe_read_user_str_proto;
> +	case BPF_FUNC_probe_read_kernel_str:
> +		return &bpf_probe_read_kernel_str_proto;
> +	case BPF_FUNC_probe_read_str:
> +		return &bpf_probe_read_compat_str_proto;
> +	default:
>   		return bpf_base_func_proto(func_id);

If we can get a consensus here, I think we can even folding all
these bpf helpers (get_current_task, ..., probe_read_kernel_str)
to bpf_base_func_proto, so any bpf program types including
other networking types can use them.
Any concerns?

>   	}
>   }
>
Daniel Borkmann May 14, 2020, 8:02 a.m. UTC | #2
On 5/13/20 9:24 PM, John Fastabend wrote:
> Often it is useful when applying policy to know something about the
> task. If the administrator has CAP_SYS_ADMIN rights then they can
> use kprobe + sk_msg and link the two programs together to accomplish
> this. However, this is a bit clunky and also means we have to call
> sk_msg program and kprobe program when we could just use a single
> program and avoid passing metadata through sk_msg/skb, socket, etc.
> 
> To accomplish this add probe_* helpers to sk_msg programs guarded
> by a CAP_SYS_ADMIN check. New supported helpers are the following,
> 
>   BPF_FUNC_get_current_task
>   BPF_FUNC_current_task_under_cgroup
>   BPF_FUNC_probe_read_user
>   BPF_FUNC_probe_read_kernel
>   BPF_FUNC_probe_read
>   BPF_FUNC_probe_read_user_str
>   BPF_FUNC_probe_read_kernel_str
>   BPF_FUNC_probe_read_str

Given the current discussion in the other thread with Linus et al, please
don't add more users for BPF_FUNC_probe_read and BPF_FUNC_probe_read_str
as I'm cooking up a patch to disable them on non-x86, and cleanups from
Christoph would make them less efficient than the *_user/_kernel{,_str}()
versions anyway, so lets only add the latter.

Thanks,
Daniel
John Fastabend May 14, 2020, 1:30 p.m. UTC | #3
Yonghong Song wrote:
> 
> 
> On 5/13/20 12:24 PM, John Fastabend wrote:
> > Often it is useful when applying policy to know something about the
> > task. If the administrator has CAP_SYS_ADMIN rights then they can
> > use kprobe + sk_msg and link the two programs together to accomplish
> > this. However, this is a bit clunky and also means we have to call
> > sk_msg program and kprobe program when we could just use a single
> > program and avoid passing metadata through sk_msg/skb, socket, etc.
> > 
> > To accomplish this add probe_* helpers to sk_msg programs guarded
> > by a CAP_SYS_ADMIN check. New supported helpers are the following,
> > 
> >   BPF_FUNC_get_current_task
> >   BPF_FUNC_current_task_under_cgroup
> >   BPF_FUNC_probe_read_user
> >   BPF_FUNC_probe_read_kernel
> >   BPF_FUNC_probe_read
> >   BPF_FUNC_probe_read_user_str
> >   BPF_FUNC_probe_read_kernel_str
> >   BPF_FUNC_probe_read_str
> 
> I think this is a good idea. But this will require bpf program
> to be GPLed, probably it will be okay. Currently, for capabilities,
> it is CAP_SYS_ADMIN now, in the future, it may be CAP_PERFMON.

Right.

> 
> Also, do we want to remove BPF_FUNC_probe_read and
> BPF_FUNC_probe_read_str from the list? Since we
> introduce helpers to new program types, we can deprecate
> these two helpers right away.

Removed, Daniel had the same comment.

> 
> The new helpers will be subject to new security lockdown
> rules which may have impact on networking bpf programs
> on particular setup.

But only if these helpers are used. If not everything should
be the same I think.

> 
> > 
> > Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> > ---

[...]

> > @@ -6397,6 +6406,31 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >   		return &bpf_get_cgroup_classid_curr_proto;
> >   #endif
> >   	default:
> > +		break;
> > +	}
> > +
> > +	if (!capable(CAP_SYS_ADMIN))
> > +		return bpf_base_func_proto(func_id);
> > +
> > +	/* All helpers below are for CAP_SYS_ADMIN only */
> > +	switch (func_id) {
> > +	case BPF_FUNC_get_current_task:
> > +		return &bpf_get_current_task_proto;
> > +	case BPF_FUNC_current_task_under_cgroup:
> > +		return &bpf_current_task_under_cgroup_proto;
> > +	case BPF_FUNC_probe_read_user:
> > +		return &bpf_probe_read_user_proto;
> > +	case BPF_FUNC_probe_read_kernel:
> > +		return &bpf_probe_read_kernel_proto;
> > +	case BPF_FUNC_probe_read:
> > +		return &bpf_probe_read_compat_proto;
> > +	case BPF_FUNC_probe_read_user_str:
> > +		return &bpf_probe_read_user_str_proto;
> > +	case BPF_FUNC_probe_read_kernel_str:
> > +		return &bpf_probe_read_kernel_str_proto;
> > +	case BPF_FUNC_probe_read_str:
> > +		return &bpf_probe_read_compat_str_proto;
> > +	default:
> >   		return bpf_base_func_proto(func_id);
> 
> If we can get a consensus here, I think we can even folding all
> these bpf helpers (get_current_task, ..., probe_read_kernel_str)
> to bpf_base_func_proto, so any bpf program types including
> other networking types can use them.
> Any concerns?
> 

Nothing comes to mind. I'm OK to move them into base if folks
agree its useful there. I was putting them where I have a known
use case at the moment but doesn't bother me to make them more
widely available.
diff mbox series

Patch

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d961428..abe6721 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -147,7 +147,7 @@  BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
 	return ret;
 }
 
-static const struct bpf_func_proto bpf_probe_read_user_proto = {
+const struct bpf_func_proto bpf_probe_read_user_proto = {
 	.func		= bpf_probe_read_user,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -167,7 +167,7 @@  BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
 	return ret;
 }
 
-static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
+const struct bpf_func_proto bpf_probe_read_user_str_proto = {
 	.func		= bpf_probe_read_user_str,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -198,7 +198,7 @@  BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
 }
 
-static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
+const struct bpf_func_proto bpf_probe_read_kernel_proto = {
 	.func		= bpf_probe_read_kernel,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -213,7 +213,7 @@  BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
 }
 
-static const struct bpf_func_proto bpf_probe_read_compat_proto = {
+const struct bpf_func_proto bpf_probe_read_compat_proto = {
 	.func		= bpf_probe_read_compat,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -253,7 +253,7 @@  BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
 }
 
-static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
+const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
 	.func		= bpf_probe_read_kernel_str,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -268,7 +268,7 @@  BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
 }
 
-static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
+const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
 	.func		= bpf_probe_read_compat_str,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -874,7 +874,7 @@  BPF_CALL_0(bpf_get_current_task)
 	return (long) current;
 }
 
-static const struct bpf_func_proto bpf_get_current_task_proto = {
+const struct bpf_func_proto bpf_get_current_task_proto = {
 	.func		= bpf_get_current_task,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -895,7 +895,7 @@  BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
 	return task_under_cgroup_hierarchy(current, cgrp);
 }
 
-static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
 	.func           = bpf_current_task_under_cgroup,
 	.gpl_only       = false,
 	.ret_type       = RET_INTEGER,
diff --git a/net/core/filter.c b/net/core/filter.c
index 45b4a16..d1c4739 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6362,6 +6362,15 @@  sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
 const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
 
+const struct bpf_func_proto bpf_current_task_under_cgroup_proto __weak;
+const struct bpf_func_proto bpf_get_current_task_proto __weak;
+const struct bpf_func_proto bpf_probe_read_user_proto __weak;
+const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
+const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
+const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
+const struct bpf_func_proto bpf_probe_read_compat_proto __weak;
+const struct bpf_func_proto bpf_probe_read_compat_str_proto __weak;
+
 static const struct bpf_func_proto *
 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -6397,6 +6406,31 @@  sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_cgroup_classid_curr_proto;
 #endif
 	default:
+		break;
+	}
+
+	if (!capable(CAP_SYS_ADMIN))
+		return bpf_base_func_proto(func_id);
+
+	/* All helpers below are for CAP_SYS_ADMIN only */
+	switch (func_id) {
+	case BPF_FUNC_get_current_task:
+		return &bpf_get_current_task_proto;
+	case BPF_FUNC_current_task_under_cgroup:
+		return &bpf_current_task_under_cgroup_proto;
+	case BPF_FUNC_probe_read_user:
+		return &bpf_probe_read_user_proto;
+	case BPF_FUNC_probe_read_kernel:
+		return &bpf_probe_read_kernel_proto;
+	case BPF_FUNC_probe_read:
+		return &bpf_probe_read_compat_proto;
+	case BPF_FUNC_probe_read_user_str:
+		return &bpf_probe_read_user_str_proto;
+	case BPF_FUNC_probe_read_kernel_str:
+		return &bpf_probe_read_kernel_str_proto;
+	case BPF_FUNC_probe_read_str:
+		return &bpf_probe_read_compat_str_proto;
+	default:
 		return bpf_base_func_proto(func_id);
 	}
 }