diff mbox series

[bpf-next,04/11] bpf: Add PTR_TO_SOCKET verifier type

Message ID 20180912003640.28316-5-joe@wand.net.nz
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series Add socket lookup support | expand

Commit Message

Joe Stringer Sept. 12, 2018, 12:36 a.m. UTC
Teach the verifier a little bit about a new type of pointer, a
PTR_TO_SOCKET. This pointer type is accessed from BPF through the
'struct bpf_sock' structure.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
---
 include/linux/bpf.h          |  17 +++++
 include/linux/bpf_verifier.h |   2 +
 kernel/bpf/verifier.c        | 125 ++++++++++++++++++++++++++++++-----
 net/core/filter.c            |  30 +++++----
 4 files changed, 147 insertions(+), 27 deletions(-)

Comments

Alexei Starovoitov Sept. 12, 2018, 10:50 p.m. UTC | #1
On Tue, Sep 11, 2018 at 05:36:33PM -0700, Joe Stringer wrote:
> Teach the verifier a little bit about a new type of pointer, a
> PTR_TO_SOCKET. This pointer type is accessed from BPF through the
> 'struct bpf_sock' structure.
> 
> Signed-off-by: Joe Stringer <joe@wand.net.nz>
> ---
>  include/linux/bpf.h          |  17 +++++
>  include/linux/bpf_verifier.h |   2 +
>  kernel/bpf/verifier.c        | 125 ++++++++++++++++++++++++++++++-----
>  net/core/filter.c            |  30 +++++----
>  4 files changed, 147 insertions(+), 27 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 523481a3471b..6ec93f3d66dd 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -154,6 +154,7 @@ enum bpf_arg_type {
>  
>  	ARG_PTR_TO_CTX,		/* pointer to context */
>  	ARG_ANYTHING,		/* any (initialized) argument is ok */
> +	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
>  };
>  
>  /* type of values returned from helper functions */
> @@ -162,6 +163,7 @@ enum bpf_return_type {
>  	RET_VOID,			/* function doesn't return anything */
>  	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
>  	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
> +	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
>  };
>  
>  /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
> @@ -212,6 +214,8 @@ enum bpf_reg_type {
>  	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
>  	PTR_TO_PACKET,		 /* reg points to skb->data */
>  	PTR_TO_PACKET_END,	 /* skb->data + headlen */
> +	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
> +	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
>  };
>  
>  /* The information passed from prog-specific *_is_valid_access
> @@ -334,6 +338,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
>  
>  typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
>  					unsigned long off, unsigned long len);
> +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
> +					const struct bpf_insn *src,
> +					struct bpf_insn *dst,
> +					struct bpf_prog *prog,
> +					u32 *target_size);
>  
>  u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
>  		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
> @@ -827,4 +836,12 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
>  void bpf_user_rnd_init_once(void);
>  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  
> +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
> +			      struct bpf_insn_access_aux *info);
> +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
> +			        const struct bpf_insn *si,
> +			        struct bpf_insn *insn_buf,
> +			        struct bpf_prog *prog,
> +			        u32 *target_size);
> +
>  #endif /* _LINUX_BPF_H */
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index af262b97f586..23a2b17bfd75 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -58,6 +58,8 @@ struct bpf_reg_state {
>  	 * offset, so they can share range knowledge.
>  	 * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
>  	 * came from, when one is tested for != NULL.
> +	 * For PTR_TO_SOCKET this is used to share which pointers retain the
> +	 * same reference to the socket, to determine proper reference freeing.
>  	 */
>  	u32 id;
>  	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f2357c8c90de..111e031cf65d 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -80,8 +80,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
>   * (like pointer plus pointer becomes SCALAR_VALUE type)
>   *
>   * When verifier sees load or store instructions the type of base register
> - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
> - * types recognized by check_mem_access() function.
> + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
> + * four pointer types recognized by check_mem_access() function.
>   *
>   * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
>   * and the range of [ptr, ptr + map's value_size) is accessible.
> @@ -266,6 +266,8 @@ static const char * const reg_type_str[] = {
>  	[PTR_TO_PACKET]		= "pkt",
>  	[PTR_TO_PACKET_META]	= "pkt_meta",
>  	[PTR_TO_PACKET_END]	= "pkt_end",
> +	[PTR_TO_SOCKET]		= "sock",
> +	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
>  };
>  
>  static char slot_type_char[] = {
> @@ -971,6 +973,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
>  	case PTR_TO_PACKET_META:
>  	case PTR_TO_PACKET_END:
>  	case CONST_PTR_TO_MAP:
> +	case PTR_TO_SOCKET:
> +	case PTR_TO_SOCKET_OR_NULL:
>  		return true;
>  	default:
>  		return false;
> @@ -1326,6 +1330,28 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
>  	return -EACCES;
>  }
>  
> +static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
> +			     int size, enum bpf_access_type t)
> +{
> +	struct bpf_reg_state *regs = cur_regs(env);
> +	struct bpf_reg_state *reg = &regs[regno];
> +	struct bpf_insn_access_aux info;
> +
> +	if (reg->smin_value < 0) {
> +		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
> +			regno);
> +		return -EACCES;
> +	}
> +
> +	if (!bpf_sock_is_valid_access(off, size, t, &info)) {
> +		verbose(env, "invalid bpf_sock_ops access off=%d size=%d\n",
> +			off, size);
> +		return -EACCES;
> +	}
> +
> +	return 0;
> +}
> +
>  static bool __is_pointer_value(bool allow_ptr_leaks,
>  			       const struct bpf_reg_state *reg)
>  {
> @@ -1441,6 +1467,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
>  		 */
>  		strict = true;
>  		break;
> +	case PTR_TO_SOCKET:
> +		pointer_desc = "sock ";
> +		break;
>  	default:
>  		break;
>  	}
> @@ -1697,6 +1726,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>  		err = check_packet_access(env, regno, off, size, false);
>  		if (!err && t == BPF_READ && value_regno >= 0)
>  			mark_reg_unknown(env, regs, value_regno);
> +
> +	} else if (reg->type == PTR_TO_SOCKET) {
> +		if (t == BPF_WRITE) {
> +			verbose(env, "cannot write into socket\n");
> +			return -EACCES;
> +		}
> +		err = check_sock_access(env, regno, off, size, t);
> +		if (!err && value_regno >= 0)
> +			mark_reg_unknown(env, regs, value_regno);
> +
>  	} else {
>  		verbose(env, "R%d invalid mem access '%s'\n", regno,
>  			reg_type_str[reg->type]);
> @@ -1918,6 +1957,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
>  		err = check_ctx_reg(env, reg, regno);
>  		if (err < 0)
>  			return err;
> +	} else if (arg_type == ARG_PTR_TO_SOCKET) {
> +		expected_type = PTR_TO_SOCKET;
> +		if (type != expected_type)
> +			goto err_type;
>  	} else if (arg_type_is_mem_ptr(arg_type)) {
>  		expected_type = PTR_TO_STACK;
>  		/* One exception here. In case function allows for NULL to be
> @@ -2511,6 +2554,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>  		}
>  		regs[BPF_REG_0].map_ptr = meta.map_ptr;
>  		regs[BPF_REG_0].id = ++env->id_gen;
> +	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
> +		mark_reg_known_zero(env, regs, BPF_REG_0);
> +		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
> +		regs[BPF_REG_0].id = ++env->id_gen;
>  	} else {
>  		verbose(env, "unknown return type %d of func %s#%d\n",
>  			fn->ret_type, func_id_name(func_id), func_id);
> @@ -2648,6 +2695,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
>  		return -EACCES;
>  	case CONST_PTR_TO_MAP:
>  	case PTR_TO_PACKET_END:
> +	case PTR_TO_SOCKET:
> +	case PTR_TO_SOCKET_OR_NULL:
>  		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
>  			dst, reg_type_str[ptr_reg->type]);
>  		return -EACCES;
> @@ -3595,6 +3644,8 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id,
>  			} else {
>  				reg->type = PTR_TO_MAP_VALUE;
>  			}
> +		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
> +			reg->type = PTR_TO_SOCKET;
>  		}
>  		/* We don't need id from this point onwards anymore, thus we
>  		 * should better reset it, so that state pruning has chances
> @@ -4369,6 +4420,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
>  	case PTR_TO_CTX:
>  	case CONST_PTR_TO_MAP:
>  	case PTR_TO_PACKET_END:
> +	case PTR_TO_SOCKET:
> +	case PTR_TO_SOCKET_OR_NULL:
>  		/* Only valid matches are exact, which memcmp() above
>  		 * would have accepted
>  		 */
> @@ -4646,6 +4699,37 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
>  	return 0;
>  }
>  
> +/* Return true if it's OK to have the same insn return a different type. */
> +static bool reg_type_mismatch_ok(enum bpf_reg_type type)
> +{
> +	switch (type) {
> +	case PTR_TO_CTX:
> +	case PTR_TO_SOCKET:
> +	case PTR_TO_SOCKET_OR_NULL:
> +		return false;
> +	default:
> +		return true;
> +	}
> +}
> +
> +/* If an instruction was previously used with particular pointer types, then we
> + * need to be careful to avoid cases such as the below, where it may be ok
> + * for one branch accessing the pointer, but not ok for the other branch:
> + *
> + * R1 = sock_ptr
> + * goto X;
> + * ...
> + * R1 = some_other_valid_ptr;
> + * goto X;
> + * ...
> + * R2 = *(u32 *)(R1 + 0);
> + */
> +static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
> +{
> +	return src != prev && (!reg_type_mismatch_ok(src) ||
> +			       !reg_type_mismatch_ok(prev));
> +}
> +
>  static int do_check(struct bpf_verifier_env *env)
>  {
>  	struct bpf_verifier_state *state;
> @@ -4778,9 +4862,7 @@ static int do_check(struct bpf_verifier_env *env)
>  				 */
>  				*prev_src_type = src_reg_type;
>  
> -			} else if (src_reg_type != *prev_src_type &&
> -				   (src_reg_type == PTR_TO_CTX ||
> -				    *prev_src_type == PTR_TO_CTX)) {
> +			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
>  				/* ABuser program is trying to use the same insn
>  				 * dst_reg = *(u32*) (src_reg + off)
>  				 * with different pointer types:
> @@ -4826,8 +4908,8 @@ static int do_check(struct bpf_verifier_env *env)
>  			if (*prev_dst_type == NOT_INIT) {
>  				*prev_dst_type = dst_reg_type;
>  			} else if (dst_reg_type != *prev_dst_type &&
> -				   (dst_reg_type == PTR_TO_CTX ||
> -				    *prev_dst_type == PTR_TO_CTX)) {
> +				   (!reg_type_mismatch_ok(dst_reg_type) ||
> +				    !reg_type_mismatch_ok(*prev_dst_type))) {

reg_type_mismatch() could have been used here as well ?

>  				verbose(env, "same insn cannot be used with different pointers\n");
>  				return -EINVAL;
>  			}
> @@ -5244,10 +5326,14 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
>  	}
>  }
>  
> -/* convert load instructions that access fields of 'struct __sk_buff'
> - * into sequence of instructions that access fields of 'struct sk_buff'
> +/* convert load instructions that access fields of a context type into a
> + * sequence of instructions that access fields of the underlying structure:
> + *     struct __sk_buff    -> struct sk_buff
> + *     struct bpf_sock_ops -> struct sock
>   */
> -static int convert_ctx_accesses(struct bpf_verifier_env *env)
> +static int convert_ctx_accesses(struct bpf_verifier_env *env,
> +				bpf_convert_ctx_access_t convert_ctx_access,
> +				enum bpf_reg_type ctx_type)
>  {
>  	const struct bpf_verifier_ops *ops = env->ops;
>  	int i, cnt, size, ctx_field_size, delta = 0;
> @@ -5274,12 +5360,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
>  		}
>  	}
>  
> -	if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
> +	if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
>  		return 0;
>  
>  	insn = env->prog->insnsi + delta;
>  
>  	for (i = 0; i < insn_cnt; i++, insn++) {
> +		enum bpf_reg_type ptr_type;
> +
>  		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
>  		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
>  		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
> @@ -5321,7 +5409,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
>  			continue;
>  		}
>  
> -		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
> +		ptr_type = env->insn_aux_data[i + delta].ptr_type;
> +		if (ptr_type != ctx_type)
>  			continue;
>  
>  		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
> @@ -5354,8 +5443,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
>  		}
>  
>  		target_size = 0;
> -		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
> -					      &target_size);
> +		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
> +					 &target_size);
>  		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
>  		    (ctx_field_size && !target_size)) {
>  			verbose(env, "bpf verifier is misconfigured\n");
> @@ -5899,7 +5988,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
>  
>  	if (ret == 0)
>  		/* program is valid, convert *(u32*)(ctx + off) accesses */
> -		ret = convert_ctx_accesses(env);
> +		ret = convert_ctx_accesses(env, env->ops->convert_ctx_access,
> +					   PTR_TO_CTX);
> +
> +	if (ret == 0)
> +		/* Convert *(u32*)(sock_ops + off) accesses */
> +		ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access,
> +					   PTR_TO_SOCKET);

can it be done in single pass instead?
env->insn_aux_data tells us what kind of conversion needs to happen.
while progs are small, I guess, it's ok, but would like to see a follow up
patch to this.
Joe Stringer Sept. 13, 2018, 6:59 p.m. UTC | #2
On Wed, 12 Sep 2018 at 15:50, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Sep 11, 2018 at 05:36:33PM -0700, Joe Stringer wrote:
> > ...
> > +static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
> > +{
> > +     return src != prev && (!reg_type_mismatch_ok(src) ||
> > +                            !reg_type_mismatch_ok(prev));
> > +}
> > +
> >  static int do_check(struct bpf_verifier_env *env)
> >  {
> >       struct bpf_verifier_state *state;
> > @@ -4778,9 +4862,7 @@ static int do_check(struct bpf_verifier_env *env)
> >                                */
> >                               *prev_src_type = src_reg_type;
> >
> > -                     } else if (src_reg_type != *prev_src_type &&
> > -                                (src_reg_type == PTR_TO_CTX ||
> > -                                 *prev_src_type == PTR_TO_CTX)) {
> > +                     } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
> >                               /* ABuser program is trying to use the same insn
> >                                * dst_reg = *(u32*) (src_reg + off)
> >                                * with different pointer types:
> > @@ -4826,8 +4908,8 @@ static int do_check(struct bpf_verifier_env *env)
> >                       if (*prev_dst_type == NOT_INIT) {
> >                               *prev_dst_type = dst_reg_type;
> >                       } else if (dst_reg_type != *prev_dst_type &&
> > -                                (dst_reg_type == PTR_TO_CTX ||
> > -                                 *prev_dst_type == PTR_TO_CTX)) {
> > +                                (!reg_type_mismatch_ok(dst_reg_type) ||
> > +                                 !reg_type_mismatch_ok(*prev_dst_type))) {
>
> reg_type_mismatch() could have been used here as well ?

Missed that before, will fix.

> >                               verbose(env, "same insn cannot be used with different pointers\n");
> >                               return -EINVAL;
> >                       }
> > @@ -5244,10 +5326,14 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
> >       }
> >  }
> >
> > -/* convert load instructions that access fields of 'struct __sk_buff'
> > - * into sequence of instructions that access fields of 'struct sk_buff'
> > +/* convert load instructions that access fields of a context type into a
> > + * sequence of instructions that access fields of the underlying structure:
> > + *     struct __sk_buff    -> struct sk_buff
> > + *     struct bpf_sock_ops -> struct sock
> >   */
> > -static int convert_ctx_accesses(struct bpf_verifier_env *env)
> > +static int convert_ctx_accesses(struct bpf_verifier_env *env,
> > +                             bpf_convert_ctx_access_t convert_ctx_access,
> > +                             enum bpf_reg_type ctx_type)
> >  {
> >       const struct bpf_verifier_ops *ops = env->ops;
> >       int i, cnt, size, ctx_field_size, delta = 0;
> > @@ -5274,12 +5360,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> >               }
> >       }
> >
> > -     if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
> > +     if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
> >               return 0;
> >
> >       insn = env->prog->insnsi + delta;
> >
> >       for (i = 0; i < insn_cnt; i++, insn++) {
> > +             enum bpf_reg_type ptr_type;
> > +
> >               if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
> >                   insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
> >                   insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
> > @@ -5321,7 +5409,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> >                       continue;
> >               }
> >
> > -             if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
> > +             ptr_type = env->insn_aux_data[i + delta].ptr_type;
> > +             if (ptr_type != ctx_type)
> >                       continue;
> >
> >               ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
> > @@ -5354,8 +5443,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> >               }
> >
> >               target_size = 0;
> > -             cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
> > -                                           &target_size);
> > +             cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
> > +                                      &target_size);
> >               if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
> >                   (ctx_field_size && !target_size)) {
> >                       verbose(env, "bpf verifier is misconfigured\n");
> > @@ -5899,7 +5988,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
> >
> >       if (ret == 0)
> >               /* program is valid, convert *(u32*)(ctx + off) accesses */
> > -             ret = convert_ctx_accesses(env);
> > +             ret = convert_ctx_accesses(env, env->ops->convert_ctx_access,
> > +                                        PTR_TO_CTX);
> > +
> > +     if (ret == 0)
> > +             /* Convert *(u32*)(sock_ops + off) accesses */
> > +             ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access,
> > +                                        PTR_TO_SOCKET);
>
> can it be done in single pass instead?
> env->insn_aux_data tells us what kind of conversion needs to happen.
> while progs are small, I guess, it's ok, but would like to see a follow up
> patch to this.

Yeah, good point - upon review it seems like this would be a simple
change (incremental):

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index db05f78bfc6e..7fa1b695a2a1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5329,9 +5329,7 @@ static void sanitize_dead_code(struct
bpf_verifier_env *env)
 *     struct __sk_buff    -> struct sk_buff
 *     struct bpf_sock_ops -> struct sock
 */
-static int convert_ctx_accesses(struct bpf_verifier_env *env,
-                               bpf_convert_ctx_access_t convert_ctx_access,
-                               enum bpf_reg_type ctx_type)
+static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
       const struct bpf_verifier_ops *ops = env->ops;
       int i, cnt, size, ctx_field_size, delta = 0;
@@ -5358,13 +5356,13 @@ static int convert_ctx_accesses(struct
bpf_verifier_env *env,
               }
       }

-       if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
+       if (bpf_prog_is_dev_bound(env->prog->aux))
               return 0;

       insn = env->prog->insnsi + delta;

       for (i = 0; i < insn_cnt; i++, insn++) {
-               enum bpf_reg_type ptr_type;
+               bpf_convert_ctx_access_t convert_ctx_access;

               if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
                   insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
@@ -5407,9 +5405,18 @@ static int convert_ctx_accesses(struct
bpf_verifier_env *env,
                       continue;
               }

-               ptr_type = env->insn_aux_data[i + delta].ptr_type;
-               if (ptr_type != ctx_type)
+               switch (env->insn_aux_data[i + delta].ptr_type) {
+               case PTR_TO_CTX:
+                       if (!ops->convert_ctx_access)
+                               continue;
+                       convert_ctx_access = ops->convert_ctx_access;
+                       break;
+               case PTR_TO_SOCKET:
+                       convert_ctx_access = bpf_sock_convert_ctx_access;
+                       break;
+               default:
                       continue;
+               }

               ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
               size = BPF_LDST_BYTES(insn);
@@ -5986,13 +5993,7 @@ int bpf_check(struct bpf_prog **prog, union
bpf_attr *attr)

       if (ret == 0)
               /* program is valid, convert *(u32*)(ctx + off) accesses */
-               ret = convert_ctx_accesses(env, env->ops->convert_ctx_access,
-                                          PTR_TO_CTX);
-
-       if (ret == 0)
-               /* Convert *(u32*)(sock_ops + off) accesses */
-               ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access,
-                                          PTR_TO_SOCKET);
+               ret = convert_ctx_accesses(env);

       if (ret == 0)
               ret = fixup_bpf_calls(env);
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..6ec93f3d66dd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -154,6 +154,7 @@  enum bpf_arg_type {
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
+	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
 };
 
 /* type of values returned from helper functions */
@@ -162,6 +163,7 @@  enum bpf_return_type {
 	RET_VOID,			/* function doesn't return anything */
 	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
+	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -212,6 +214,8 @@  enum bpf_reg_type {
 	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
 	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
+	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -334,6 +338,11 @@  const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 					unsigned long off, unsigned long len);
+typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
+					const struct bpf_insn *src,
+					struct bpf_insn *dst,
+					struct bpf_prog *prog,
+					u32 *target_size);
 
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
@@ -827,4 +836,12 @@  extern const struct bpf_func_proto bpf_get_local_storage_proto;
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+			      struct bpf_insn_access_aux *info);
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+			        const struct bpf_insn *si,
+			        struct bpf_insn *insn_buf,
+			        struct bpf_prog *prog,
+			        u32 *target_size);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index af262b97f586..23a2b17bfd75 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -58,6 +58,8 @@  struct bpf_reg_state {
 	 * offset, so they can share range knowledge.
 	 * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
 	 * came from, when one is tested for != NULL.
+	 * For PTR_TO_SOCKET this is used to share which pointers retain the
+	 * same reference to the socket, to determine proper reference freeing.
 	 */
 	u32 id;
 	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f2357c8c90de..111e031cf65d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -80,8 +80,8 @@  static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  * (like pointer plus pointer becomes SCALAR_VALUE type)
  *
  * When verifier sees load or store instructions the type of base register
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
- * types recognized by check_mem_access() function.
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
+ * four pointer types recognized by check_mem_access() function.
  *
  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  * and the range of [ptr, ptr + map's value_size) is accessible.
@@ -266,6 +266,8 @@  static const char * const reg_type_str[] = {
 	[PTR_TO_PACKET]		= "pkt",
 	[PTR_TO_PACKET_META]	= "pkt_meta",
 	[PTR_TO_PACKET_END]	= "pkt_end",
+	[PTR_TO_SOCKET]		= "sock",
+	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 };
 
 static char slot_type_char[] = {
@@ -971,6 +973,8 @@  static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_PACKET_META:
 	case PTR_TO_PACKET_END:
 	case CONST_PTR_TO_MAP:
+	case PTR_TO_SOCKET:
+	case PTR_TO_SOCKET_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -1326,6 +1330,28 @@  static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 	return -EACCES;
 }
 
+static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
+			     int size, enum bpf_access_type t)
+{
+	struct bpf_reg_state *regs = cur_regs(env);
+	struct bpf_reg_state *reg = &regs[regno];
+	struct bpf_insn_access_aux info;
+
+	if (reg->smin_value < 0) {
+		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+			regno);
+		return -EACCES;
+	}
+
+	if (!bpf_sock_is_valid_access(off, size, t, &info)) {
+		verbose(env, "invalid bpf_sock_ops access off=%d size=%d\n",
+			off, size);
+		return -EACCES;
+	}
+
+	return 0;
+}
+
 static bool __is_pointer_value(bool allow_ptr_leaks,
 			       const struct bpf_reg_state *reg)
 {
@@ -1441,6 +1467,9 @@  static int check_ptr_alignment(struct bpf_verifier_env *env,
 		 */
 		strict = true;
 		break;
+	case PTR_TO_SOCKET:
+		pointer_desc = "sock ";
+		break;
 	default:
 		break;
 	}
@@ -1697,6 +1726,16 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		err = check_packet_access(env, regno, off, size, false);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
+
+	} else if (reg->type == PTR_TO_SOCKET) {
+		if (t == BPF_WRITE) {
+			verbose(env, "cannot write into socket\n");
+			return -EACCES;
+		}
+		err = check_sock_access(env, regno, off, size, t);
+		if (!err && value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
+
 	} else {
 		verbose(env, "R%d invalid mem access '%s'\n", regno,
 			reg_type_str[reg->type]);
@@ -1918,6 +1957,10 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		err = check_ctx_reg(env, reg, regno);
 		if (err < 0)
 			return err;
+	} else if (arg_type == ARG_PTR_TO_SOCKET) {
+		expected_type = PTR_TO_SOCKET;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
@@ -2511,6 +2554,10 @@  static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		}
 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
 		regs[BPF_REG_0].id = ++env->id_gen;
+	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
+		regs[BPF_REG_0].id = ++env->id_gen;
 	} else {
 		verbose(env, "unknown return type %d of func %s#%d\n",
 			fn->ret_type, func_id_name(func_id), func_id);
@@ -2648,6 +2695,8 @@  static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		return -EACCES;
 	case CONST_PTR_TO_MAP:
 	case PTR_TO_PACKET_END:
+	case PTR_TO_SOCKET:
+	case PTR_TO_SOCKET_OR_NULL:
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
@@ -3595,6 +3644,8 @@  static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id,
 			} else {
 				reg->type = PTR_TO_MAP_VALUE;
 			}
+		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
+			reg->type = PTR_TO_SOCKET;
 		}
 		/* We don't need id from this point onwards anymore, thus we
 		 * should better reset it, so that state pruning has chances
@@ -4369,6 +4420,8 @@  static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_CTX:
 	case CONST_PTR_TO_MAP:
 	case PTR_TO_PACKET_END:
+	case PTR_TO_SOCKET:
+	case PTR_TO_SOCKET_OR_NULL:
 		/* Only valid matches are exact, which memcmp() above
 		 * would have accepted
 		 */
@@ -4646,6 +4699,37 @@  static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	return 0;
 }
 
+/* Return true if it's OK to have the same insn return a different type. */
+static bool reg_type_mismatch_ok(enum bpf_reg_type type)
+{
+	switch (type) {
+	case PTR_TO_CTX:
+	case PTR_TO_SOCKET:
+	case PTR_TO_SOCKET_OR_NULL:
+		return false;
+	default:
+		return true;
+	}
+}
+
+/* If an instruction was previously used with particular pointer types, then we
+ * need to be careful to avoid cases such as the below, where it may be ok
+ * for one branch accessing the pointer, but not ok for the other branch:
+ *
+ * R1 = sock_ptr
+ * goto X;
+ * ...
+ * R1 = some_other_valid_ptr;
+ * goto X;
+ * ...
+ * R2 = *(u32 *)(R1 + 0);
+ */
+static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
+{
+	return src != prev && (!reg_type_mismatch_ok(src) ||
+			       !reg_type_mismatch_ok(prev));
+}
+
 static int do_check(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state;
@@ -4778,9 +4862,7 @@  static int do_check(struct bpf_verifier_env *env)
 				 */
 				*prev_src_type = src_reg_type;
 
-			} else if (src_reg_type != *prev_src_type &&
-				   (src_reg_type == PTR_TO_CTX ||
-				    *prev_src_type == PTR_TO_CTX)) {
+			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
 				/* ABuser program is trying to use the same insn
 				 * dst_reg = *(u32*) (src_reg + off)
 				 * with different pointer types:
@@ -4826,8 +4908,8 @@  static int do_check(struct bpf_verifier_env *env)
 			if (*prev_dst_type == NOT_INIT) {
 				*prev_dst_type = dst_reg_type;
 			} else if (dst_reg_type != *prev_dst_type &&
-				   (dst_reg_type == PTR_TO_CTX ||
-				    *prev_dst_type == PTR_TO_CTX)) {
+				   (!reg_type_mismatch_ok(dst_reg_type) ||
+				    !reg_type_mismatch_ok(*prev_dst_type))) {
 				verbose(env, "same insn cannot be used with different pointers\n");
 				return -EINVAL;
 			}
@@ -5244,10 +5326,14 @@  static void sanitize_dead_code(struct bpf_verifier_env *env)
 	}
 }
 
-/* convert load instructions that access fields of 'struct __sk_buff'
- * into sequence of instructions that access fields of 'struct sk_buff'
+/* convert load instructions that access fields of a context type into a
+ * sequence of instructions that access fields of the underlying structure:
+ *     struct __sk_buff    -> struct sk_buff
+ *     struct bpf_sock_ops -> struct sock
  */
-static int convert_ctx_accesses(struct bpf_verifier_env *env)
+static int convert_ctx_accesses(struct bpf_verifier_env *env,
+				bpf_convert_ctx_access_t convert_ctx_access,
+				enum bpf_reg_type ctx_type)
 {
 	const struct bpf_verifier_ops *ops = env->ops;
 	int i, cnt, size, ctx_field_size, delta = 0;
@@ -5274,12 +5360,14 @@  static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		}
 	}
 
-	if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
+	if (!convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
 		return 0;
 
 	insn = env->prog->insnsi + delta;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		enum bpf_reg_type ptr_type;
+
 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
@@ -5321,7 +5409,8 @@  static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			continue;
 		}
 
-		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
+		ptr_type = env->insn_aux_data[i + delta].ptr_type;
+		if (ptr_type != ctx_type)
 			continue;
 
 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
@@ -5354,8 +5443,8 @@  static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		}
 
 		target_size = 0;
-		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
-					      &target_size);
+		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
+					 &target_size);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
 		    (ctx_field_size && !target_size)) {
 			verbose(env, "bpf verifier is misconfigured\n");
@@ -5899,7 +5988,13 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 
 	if (ret == 0)
 		/* program is valid, convert *(u32*)(ctx + off) accesses */
-		ret = convert_ctx_accesses(env);
+		ret = convert_ctx_accesses(env, env->ops->convert_ctx_access,
+					   PTR_TO_CTX);
+
+	if (ret == 0)
+		/* Convert *(u32*)(sock_ops + off) accesses */
+		ret = convert_ctx_accesses(env, bpf_sock_convert_ctx_access,
+					   PTR_TO_SOCKET);
 
 	if (ret == 0)
 		ret = fixup_bpf_calls(env);
diff --git a/net/core/filter.c b/net/core/filter.c
index 8cb242b4400f..23e6e5202138 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5375,23 +5375,29 @@  static bool __sock_filter_check_size(int off, int size,
 	return size == size_default;
 }
 
-static bool sock_filter_is_valid_access(int off, int size,
-					enum bpf_access_type type,
-					const struct bpf_prog *prog,
-					struct bpf_insn_access_aux *info)
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+			      struct bpf_insn_access_aux *info)
 {
 	if (off < 0 || off >= sizeof(struct bpf_sock))
 		return false;
 	if (off % size != 0)
 		return false;
-	if (!__sock_filter_check_attach_type(off, type,
-					     prog->expected_attach_type))
-		return false;
 	if (!__sock_filter_check_size(off, size, info))
 		return false;
 	return true;
 }
 
+static bool sock_filter_is_valid_access(int off, int size,
+					enum bpf_access_type type,
+					const struct bpf_prog *prog,
+					struct bpf_insn_access_aux *info)
+{
+	if (!__sock_filter_check_attach_type(off, type,
+					     prog->expected_attach_type))
+		return false;
+	return bpf_sock_is_valid_access(off, size, type, info);
+}
+
 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
 				const struct bpf_prog *prog, int drop_verdict)
 {
@@ -6059,10 +6065,10 @@  static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
-static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
-					  const struct bpf_insn *si,
-					  struct bpf_insn *insn_buf,
-					  struct bpf_prog *prog, u32 *target_size)
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+				const struct bpf_insn *si,
+				struct bpf_insn *insn_buf,
+				struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 	int off;
@@ -6974,7 +6980,7 @@  const struct bpf_prog_ops lwt_seg6local_prog_ops = {
 const struct bpf_verifier_ops cg_sock_verifier_ops = {
 	.get_func_proto		= sock_filter_func_proto,
 	.is_valid_access	= sock_filter_is_valid_access,
-	.convert_ctx_access	= sock_filter_convert_ctx_access,
+	.convert_ctx_access	= bpf_sock_convert_ctx_access,
 };
 
 const struct bpf_prog_ops cg_sock_prog_ops = {